diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h index ccb171370..1d4e35a94 100644 --- a/include/xgboost/learner.h +++ b/include/xgboost/learner.h @@ -8,29 +8,33 @@ #ifndef XGBOOST_LEARNER_H_ #define XGBOOST_LEARNER_H_ -#include // Serializable -#include -#include // Context -#include -#include -#include // Tensor -#include -#include +#include // for Serializable +#include // for bst_feature_t, bst_target_t, bst_float, Args, GradientPair +#include // for Context +#include // for Tensor, TensorView +#include // for Metric +#include // for Configurable, Model +#include // for Span +#include // for ObjInfo -#include -#include -#include -#include -#include +#include // for max +#include // for int32_t, uint32_t, uint8_t +#include // for map +#include // for shared_ptr, unique_ptr +#include // for string +#include // for move +#include // for vector namespace xgboost { - +class FeatureMap; class Metric; class GradientBooster; class ObjFunction; class DMatrix; class Json; struct XGBAPIThreadLocalEntry; +template +class HostDeviceVector; enum class PredictionType : std::uint8_t { // NOLINT kValue = 0, @@ -143,7 +147,10 @@ class Learner : public Model, public Configurable, public dmlc::Serializable { * \brief Get number of boosted rounds from gradient booster. */ virtual int32_t BoostedRounds() const = 0; - virtual uint32_t Groups() const = 0; + /** + * \brief Get the number of output groups from the model. + */ + virtual std::uint32_t Groups() const = 0; void LoadModel(Json const& in) override = 0; void SaveModel(Json* out) const override = 0; @@ -275,8 +282,16 @@ class Learner : public Model, public Configurable, public dmlc::Serializable { struct LearnerModelParamLegacy; -/* - * \brief Basic Model Parameters, used to describe the booster. +/** + * \brief Strategy for building multi-target models. + */ +enum class MultiStrategy : std::int32_t { + kComposite = 0, + kMonolithic = 1, +}; + +/** + * \brief Basic model parameters, used to describe the booster. */ struct LearnerModelParam { private: @@ -287,30 +302,51 @@ struct LearnerModelParam { linalg::Tensor base_score_; public: - /* \brief number of features */ - uint32_t num_feature { 0 }; - /* \brief number of classes, if it is multi-class classification */ - uint32_t num_output_group { 0 }; - /* \brief Current task, determined by objective. */ + /** + * \brief The number of features. + */ + bst_feature_t num_feature{0}; + /** + * \brief The number of classes or targets. + */ + std::uint32_t num_output_group{0}; + /** + * \brief Current task, determined by objective. + */ ObjInfo task{ObjInfo::kRegression}; + /** + * \brief Strategy for building multi-target models. + */ + MultiStrategy multi_strategy{MultiStrategy::kComposite}; LearnerModelParam() = default; // As the old `LearnerModelParamLegacy` is still used by binary IO, we keep // this one as an immutable copy. LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param, - linalg::Tensor base_margin, ObjInfo t); - LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t); - LearnerModelParam(bst_feature_t n_features, linalg::Tensor base_margin, - uint32_t n_groups) - : base_score_{std::move(base_margin)}, num_feature{n_features}, num_output_group{n_groups} {} + linalg::Tensor base_margin, ObjInfo t, MultiStrategy multi_strategy); + LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t, + MultiStrategy multi_strategy); + LearnerModelParam(bst_feature_t n_features, linalg::Tensor base_score, + std::uint32_t n_groups, bst_target_t n_targets, MultiStrategy multi_strategy) + : base_score_{std::move(base_score)}, + num_feature{n_features}, + num_output_group{std::max(n_groups, n_targets)}, + multi_strategy{multi_strategy} {} linalg::TensorView BaseScore(Context const* ctx) const; - linalg::TensorView BaseScore(int32_t device) const; + [[nodiscard]] linalg::TensorView BaseScore(std::int32_t device) const; void Copy(LearnerModelParam const& that); + [[nodiscard]] bool IsVectorLeaf() const noexcept { + return multi_strategy == MultiStrategy::kMonolithic; + } + [[nodiscard]] bst_target_t OutputLength() const noexcept { return this->num_output_group; } + [[nodiscard]] bst_target_t LeafLength() const noexcept { + return this->IsVectorLeaf() ? this->OutputLength() : 1; + } /* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */ - bool Initialized() const { return num_feature != 0 && num_output_group != 0; } + [[nodiscard]] bool Initialized() const { return num_feature != 0 && num_output_group != 0; } }; } // namespace xgboost diff --git a/src/c_api/c_api_utils.h b/src/c_api/c_api_utils.h index 78c477f42..8908364f2 100644 --- a/src/c_api/c_api_utils.h +++ b/src/c_api/c_api_utils.h @@ -12,10 +12,11 @@ #include #include "xgboost/c_api.h" -#include "xgboost/data.h" // DMatrix +#include "xgboost/data.h" // DMatrix +#include "xgboost/feature_map.h" // for FeatureMap #include "xgboost/json.h" #include "xgboost/learner.h" -#include "xgboost/linalg.h" // ArrayInterfaceHandler +#include "xgboost/linalg.h" // ArrayInterfaceHandler #include "xgboost/logging.h" #include "xgboost/string_view.h" // StringView diff --git a/src/learner.cc b/src/learner.cc index 0e47c694c..454855355 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -6,54 +6,67 @@ */ #include "xgboost/learner.h" -#include -#include -#include -#include +#include // for Stream +#include // for FieldEntry, DMLC_DECLARE_FIELD, Parameter, DMLC... +#include // for ThreadLocalStore -#include -#include -#include -#include -#include // std::numeric_limits -#include -#include -#include -#include -#include -#include // for as_const -#include +#include // for equal, max, transform, sort, find_if, all_of +#include // for array +#include // for atomic +#include // for isalpha, isspace +#include // for isnan, isinf +#include // for int32_t, uint32_t, int64_t, uint64_t +#include // for atoi +#include // for memcpy, size_t, memset +#include // for less +#include // for operator<<, setiosflags +#include // for back_insert_iterator, distance, back_inserter +#include // for numeric_limits +#include // for allocator, unique_ptr, shared_ptr, operator== +#include // for mutex, lock_guard +#include // for set +#include // for operator<<, basic_ostream, basic_ostream::opera... +#include // for stack +#include // for basic_string, char_traits, operator<, string +#include // for errc +#include // for get +#include // for operator!=, unordered_map +#include // for pair, as_const, move, swap +#include // for vector -#include "collective/communicator-inl.h" -#include "common/api_entry.h" // XGBAPIThreadLocalEntry -#include "common/charconv.h" -#include "common/common.h" -#include "common/io.h" -#include "common/observer.h" -#include "common/random.h" -#include "common/threading_utils.h" -#include "common/timer.h" -#include "common/version.h" -#include "xgboost/base.h" -#include "xgboost/c_api.h" -#include "xgboost/context.h" // Context -#include "xgboost/data.h" -#include "xgboost/feature_map.h" -#include "xgboost/gbm.h" -#include "xgboost/host_device_vector.h" -#include "xgboost/json.h" -#include "xgboost/logging.h" -#include "xgboost/metric.h" -#include "xgboost/model.h" -#include "xgboost/objective.h" -#include "xgboost/parameter.h" -#include "xgboost/predictor.h" +#include "collective/communicator-inl.h" // for Allreduce, Broadcast, GetRank, IsDistributed +#include "collective/communicator.h" // for Operation +#include "common/api_entry.h" // for XGBAPIThreadLocalEntry +#include "common/charconv.h" // for to_chars, to_chars_result, NumericLimits, from_... +#include "common/common.h" // for ToString, Split +#include "common/io.h" // for PeekableInStream, ReadAll, FixedSizeStream, Mem... +#include "common/observer.h" // for TrainingObserver +#include "common/random.h" // for GlobalRandom +#include "common/timer.h" // for Monitor +#include "common/version.h" // for Version +#include "dmlc/endian.h" // for ByteSwap, DMLC_IO_NO_ENDIAN_SWAP +#include "xgboost/base.h" // for Args, bst_float, GradientPair, bst_feature_t +#include "xgboost/context.h" // for Context +#include "xgboost/data.h" // for DMatrix, MetaInfo +#include "xgboost/gbm.h" // for GradientBooster +#include "xgboost/global_config.h" // for GlobalConfiguration, GlobalConfigThreadLocalStore +#include "xgboost/host_device_vector.h" // for HostDeviceVector +#include "xgboost/json.h" // for Json, get, Object, String, IsA, Array, ToJson +#include "xgboost/linalg.h" // for Tensor, TensorView +#include "xgboost/logging.h" // for CHECK, LOG, CHECK_EQ +#include "xgboost/metric.h" // for Metric +#include "xgboost/objective.h" // for ObjFunction +#include "xgboost/parameter.h" // for DECLARE_FIELD_ENUM_CLASS, XGBoostParameter +#include "xgboost/predictor.h" // for PredictionContainer, PredictionCacheEntry +#include "xgboost/string_view.h" // for operator<<, StringView +#include "xgboost/task.h" // for ObjInfo namespace { - const char* kMaxDeltaStepDefaultValue = "0.7"; } // anonymous namespace +DECLARE_FIELD_ENUM_CLASS(xgboost::MultiStrategy); + namespace xgboost { Learner::~Learner() = default; namespace { @@ -86,8 +99,10 @@ struct LearnerModelParamLegacy : public dmlc::Parameter /*! \brief the version of XGBoost. */ std::uint32_t major_version; std::uint32_t minor_version; - - uint32_t num_target{1}; + /** + * \brief Number of target variables. + */ + bst_target_t num_target; /** * \brief Whether we should calculate the base score from training data. * @@ -113,7 +128,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter } // Skip other legacy fields. - Json ToJson() const { + [[nodiscard]] Json ToJson() const { Json obj{Object{}}; char floats[NumericLimits::kToCharsSize]; auto ret = to_chars(floats, floats + NumericLimits::kToCharsSize, base_score); @@ -163,7 +178,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter from_chars(str.c_str(), str.c_str() + str.size(), base_score); } - LearnerModelParamLegacy ByteSwap() const { + [[nodiscard]] LearnerModelParamLegacy ByteSwap() const { LearnerModelParamLegacy x = *this; dmlc::ByteSwap(&x.base_score, sizeof(x.base_score), 1); dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1); @@ -226,35 +241,38 @@ struct LearnerModelParamLegacy : public dmlc::Parameter DMLC_DECLARE_FIELD(num_feature) .set_default(0) .describe( - "Number of features in training data," - " this parameter will be automatically detected by learner."); + "Number of features in training data, this parameter will be automatically detected by " + "learner."); DMLC_DECLARE_FIELD(num_class).set_default(0).set_lower_bound(0).describe( "Number of class option for multi-class classifier. " " By default equals 0 and corresponds to binary classifier."); DMLC_DECLARE_FIELD(num_target) .set_default(1) .set_lower_bound(1) - .describe("Number of target for multi-target regression."); + .describe("Number of output targets. Can be set automatically if not specified."); DMLC_DECLARE_FIELD(boost_from_average) .set_default(true) .describe("Whether we should calculate the base score from training data."); } }; -LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t) - : num_feature{user_param.num_feature}, task{t} { - auto n_classes = std::max(static_cast(user_param.num_class), 1u); - auto n_targets = user_param.num_target; - num_output_group = std::max(n_classes, n_targets); - // For version < 1.6, n_targets == 0 - CHECK(n_classes <= 1 || n_targets <= 1) - << "Multi-class multi-output is not yet supported. n_classes:" << n_classes - << ", n_targets:" << n_targets; +LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t, + MultiStrategy multi_strategy) + : num_feature{user_param.num_feature}, + num_output_group{ + std::max(static_cast(user_param.num_class), user_param.num_target)}, + task{t}, + multi_strategy{multi_strategy} { + if (user_param.num_class > 1 && user_param.num_target > 1) { + LOG(FATAL) << "multi-target-multi-class is not yet supported. Output classes:" + << user_param.num_class << ", output targets:" << user_param.num_target; + } } LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param, - linalg::Tensor base_margin, ObjInfo t) - : LearnerModelParam{user_param, t} { + linalg::Tensor base_margin, ObjInfo t, + MultiStrategy multi_strategy) + : LearnerModelParam{user_param, t, multi_strategy} { std::swap(base_score_, base_margin); // Make sure read access everywhere for thread-safe prediction. std::as_const(base_score_).HostView(); @@ -297,6 +315,7 @@ void LearnerModelParam::Copy(LearnerModelParam const& that) { num_feature = that.num_feature; num_output_group = that.num_output_group; task = that.task; + multi_strategy = that.multi_strategy; } struct LearnerTrainParam : public XGBoostParameter { @@ -306,18 +325,26 @@ struct LearnerTrainParam : public XGBoostParameter { // specified by users. Move them to model parameter once we can get rid of binary IO. std::string booster; std::string objective; + // This is a training parameter and is not saved (nor loaded) in the model. + MultiStrategy multi_strategy{MultiStrategy::kComposite}; // declare parameters DMLC_DECLARE_PARAMETER(LearnerTrainParam) { DMLC_DECLARE_FIELD(disable_default_eval_metric) .set_default(false) .describe("Flag to disable default metric. Set to >0 to disable"); - DMLC_DECLARE_FIELD(booster) - .set_default("gbtree") - .describe("Gradient booster used for training."); + DMLC_DECLARE_FIELD(booster).set_default("gbtree").describe( + "Gradient booster used for training."); DMLC_DECLARE_FIELD(objective) .set_default("reg:squarederror") .describe("Objective function used for obtaining gradient."); + DMLC_DECLARE_FIELD(multi_strategy) + .add_enum("composite", MultiStrategy::kComposite) + .add_enum("monolithic", MultiStrategy::kMonolithic) + .set_default(MultiStrategy::kComposite) + .describe( + "Strategy used for training multi-target models. `mono` means building one single tree " + "for all targets."); } }; @@ -379,8 +406,10 @@ class LearnerConfiguration : public Learner { // transform to margin h_base_score(0) = obj_->ProbToMargin(mparam_.base_score); + CHECK(tparam_.GetInitialised()); // move it to model param, which is shared with all other components. - learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task); + learner_model_param_ = + LearnerModelParam(Ctx(), mparam_, std::move(base_score), task, tparam_.multi_strategy); CHECK(learner_model_param_.Initialized()); CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0); } @@ -748,7 +777,6 @@ class LearnerConfiguration : public Learner { << "0 feature is supplied. Are you using raw Booster interface?"; // Remove these once binary IO is gone. cfg_["num_feature"] = common::ToString(mparam_.num_feature); - cfg_["num_class"] = common::ToString(mparam_.num_class); } void ConfigureGBM(LearnerTrainParam const& old, Args const& args) { @@ -779,9 +807,17 @@ class LearnerConfiguration : public Learner { if (obj_ == nullptr || tparam_.objective != old.objective) { obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_)); } + + bool has_nc {cfg_.find("num_class") != cfg_.cend()}; + // Inject num_class into configuration. + // FIXME(jiamingy): Remove the duplicated parameter in softmax + cfg_["num_class"] = common::ToString(mparam_.num_class); auto& args = *p_args; args = {cfg_.cbegin(), cfg_.cend()}; // renew obj_->Configure(args); + if (!has_nc) { + cfg_.erase("num_class"); + } } void ConfigureMetrics(Args const& args) { @@ -805,7 +841,7 @@ class LearnerConfiguration : public Learner { void ConfigureTargets() { CHECK(this->obj_); auto const& cache = prediction_container_.Container(); - size_t n_targets = 1; + bst_target_t n_targets = 1; for (auto const& d : cache) { if (n_targets == 1) { n_targets = this->obj_->Targets(d.first.ptr->Info()); @@ -814,7 +850,8 @@ class LearnerConfiguration : public Learner { CHECK(n_targets == t || 1 == t) << "Inconsistent labels."; } } - if (mparam_.num_target != 1) { + + if (mparam_.num_target > 1) { CHECK(n_targets == 1 || n_targets == mparam_.num_target) << "Inconsistent configuration of num_target. Configuration result from input data:" << n_targets << ", configuration from parameter:" << mparam_.num_target; @@ -974,9 +1011,6 @@ class LearnerIO : public LearnerConfiguration { if (!DMLC_IO_NO_ENDIAN_SWAP) { mparam_ = mparam_.ByteSwap(); } - if (mparam_.num_target == 0) { - mparam_.num_target = 1; - } CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format"; CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format"; @@ -1030,7 +1064,7 @@ class LearnerIO : public LearnerConfiguration { : obj_->ProbToMargin(mparam_.base_score)}, {1}, Context::kCpuId}, - obj_->Task()); + obj_->Task(), tparam_.multi_strategy); if (attributes_.find("objective") != attributes_.cend()) { auto obj_str = attributes_.at("objective"); @@ -1058,7 +1092,6 @@ class LearnerIO : public LearnerConfiguration { mparam_.major_version = std::get<0>(Version::Self()); mparam_.minor_version = std::get<1>(Version::Self()); - cfg_["num_class"] = common::ToString(mparam_.num_class); cfg_["num_feature"] = common::ToString(mparam_.num_feature); auto n = tparam_.__DICT__(); @@ -1071,6 +1104,8 @@ class LearnerIO : public LearnerConfiguration { // JSON serialization format. void SaveModel(dmlc::Stream* fo) const override { this->CheckModelInitialized(); + CHECK(!this->learner_model_param_.IsVectorLeaf()) + << "Please use JSON/UBJ format for model serialization with multi-output models."; LearnerModelParamLegacy mparam = mparam_; // make a copy to potentially modify std::vector > extra_attr; diff --git a/src/predictor/predictor.cc b/src/predictor/predictor.cc index c6ef7fe51..2559447f3 100644 --- a/src/predictor/predictor.cc +++ b/src/predictor/predictor.cc @@ -3,18 +3,19 @@ */ #include "xgboost/predictor.h" -#include +#include // for DMLC_REGISTRY_LINK_TAG -#include // std::string +#include // for int32_t +#include // for string, to_string -#include "../gbm/gbtree.h" // GBTreeModel -#include "xgboost/base.h" // bst_row_t,bst_group_t -#include "xgboost/context.h" // Context -#include "xgboost/data.h" // MetaInfo -#include "xgboost/host_device_vector.h" // HostDeviceVector -#include "xgboost/learner.h" // LearnerModelParam -#include "xgboost/linalg.h" // Tensor -#include "xgboost/logging.h" +#include "../gbm/gbtree_model.h" // for GBTreeModel +#include "xgboost/base.h" // for bst_float, Args, bst_group_t, bst_row_t +#include "xgboost/context.h" // for Context +#include "xgboost/data.h" // for MetaInfo +#include "xgboost/host_device_vector.h" // for HostDeviceVector +#include "xgboost/learner.h" // for LearnerModelParam +#include "xgboost/linalg.h" // for Tensor, TensorView +#include "xgboost/logging.h" // for CHECK_EQ, CHECK_NE, LOG namespace dmlc { DMLC_REGISTRY_ENABLE(::xgboost::PredictorReg); @@ -45,15 +46,16 @@ void ValidateBaseMarginShape(linalg::Tensor const& margin, bst_row_t n void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector* out_preds, const gbm::GBTreeModel& model) const { CHECK_NE(model.learner_model_param->num_output_group, 0); - size_t n_classes = model.learner_model_param->num_output_group; - size_t n = n_classes * info.num_row_; + std::size_t n{model.learner_model_param->OutputLength() * info.num_row_}; + const HostDeviceVector* base_margin = info.base_margin_.Data(); if (ctx_->gpu_id >= 0) { out_preds->SetDevice(ctx_->gpu_id); } if (!base_margin->Empty()) { out_preds->Resize(n); - ValidateBaseMarginShape(info.base_margin_, info.num_row_, n_classes); + ValidateBaseMarginShape(info.base_margin_, info.num_row_, + model.learner_model_param->OutputLength()); out_preds->Copy(*base_margin); } else { // cannot rely on the Resize to fill as it might skip if the size is already correct. @@ -64,12 +66,10 @@ void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector{{base_score}, shape, device}, - n_groups); + n_groups, 1, MultiStrategy::kComposite); return mparam; } diff --git a/tests/cpp/test_multi_target.cc b/tests/cpp/test_multi_target.cc index 79d822601..d2e34235c 100644 --- a/tests/cpp/test_multi_target.cc +++ b/tests/cpp/test_multi_target.cc @@ -2,24 +2,26 @@ * Copyright 2023 by XGBoost Contributors */ #include -#include // bst_target_t -#include // DMatrix -#include // Json,Object,Number,get -#include // Learner +#include // for Args, bst_target_t +#include // for DMatrix, MetaInfo +#include // for Json, get, Object, String +#include // for Learner -#include // size_t -#include // shared_ptr,unique_ptr -#include -#include // stod -#include +#include // for copy +#include // for size_t +#include // for shared_ptr, allocator, __shared_ptr_access +#include // for accumulate +#include // for stod, string +#include // for vector -#include "../../src/common/linalg_op.h" // cbegin,cend -#include "../../src/common/stats.h" // Median -#include "helpers.h" // RandomDataGenerator -#include "xgboost/linalg.h" +#include "../../src/common/linalg_op.h" // for begin, cbegin, cend +#include "../../src/common/stats.h" // for Median +#include "../../src/common/transform_iterator.h" // for IndexTransformIter +#include "helpers.h" // for RandomDataGenerator +#include "xgboost/host_device_vector.h" // for HostDeviceVector +#include "xgboost/linalg.h" // for Tensor, All, TensorView, Vector namespace xgboost { - class TestL1MultiTarget : public ::testing::Test { std::shared_ptr Xy_; std::shared_ptr Xyw_; @@ -117,4 +119,16 @@ TEST_F(TestL1MultiTarget, Approx) { this->RunTest("approx"); } #if defined(XGBOOST_USE_CUDA) TEST_F(TestL1MultiTarget, GpuHist) { this->RunTest("gpu_hist"); } #endif // defined(XGBOOST_USE_CUDA) + +TEST(MultiStrategy, Configure) { + auto p_fmat = RandomDataGenerator{12ul, 3ul, 0.0}.GenerateDMatrix(); + p_fmat->Info().labels.Reshape(p_fmat->Info().num_row_, 2); + std::unique_ptr learner{Learner::Create({p_fmat})}; + learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "2"}}); + learner->Configure(); + ASSERT_EQ(learner->Groups(), 2); + + learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "0"}}); + ASSERT_THROW({ learner->Configure(); }, dmlc::Error); +} } // namespace xgboost