diff --git a/include/xgboost/gbm.h b/include/xgboost/gbm.h index f06a22044..7b291aa62 100644 --- a/include/xgboost/gbm.h +++ b/include/xgboost/gbm.h @@ -146,6 +146,10 @@ class GradientBooster { virtual std::vector DumpModel(const FeatureMap& fmap, bool with_stats, std::string format) const = 0; + /*! + * \brief Whether the current booster use GPU. + */ + virtual bool UseGPU() const = 0; /*! * \brief create a gradient booster from given name * \param name name of gradient booster diff --git a/include/xgboost/generic_parameters.h b/include/xgboost/generic_parameters.h index a3fce865a..8f9836cf9 100644 --- a/include/xgboost/generic_parameters.h +++ b/include/xgboost/generic_parameters.h @@ -8,18 +8,15 @@ #include #include +#include + namespace xgboost { -enum class TreeMethod : int { - kAuto = 0, kApprox = 1, kExact = 2, kHist = 3, - kGPUExact = 4, kGPUHist = 5 -}; enum class DataSplitMode : int { kAuto = 0, kCol = 1, kRow = 2 }; } // namespace xgboost -DECLARE_FIELD_ENUM_CLASS(xgboost::TreeMethod); DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode); namespace xgboost { @@ -30,8 +27,6 @@ struct LearnerTrainParam : public dmlc::Parameter { bool seed_per_iteration; // data split mode, can be row, col, or none. DataSplitMode dsplit; - // tree construction method - TreeMethod tree_method; // number of threads to use if OpenMP is enabled // if equals 0, use system default int nthread; @@ -42,6 +37,8 @@ struct LearnerTrainParam : public dmlc::Parameter { // number of devices to use, -1 implies using all available devices. int n_gpus; + std::string booster; + // declare parameters DMLC_DECLARE_PARAMETER(LearnerTrainParam) { DMLC_DECLARE_FIELD(seed).set_default(0).describe( @@ -58,15 +55,6 @@ struct LearnerTrainParam : public dmlc::Parameter { .add_enum("col", DataSplitMode::kCol) .add_enum("row", DataSplitMode::kRow) .describe("Data split mode for distributed training."); - DMLC_DECLARE_FIELD(tree_method) - .set_default(TreeMethod::kAuto) - .add_enum("auto", TreeMethod::kAuto) - .add_enum("approx", TreeMethod::kApprox) - .add_enum("exact", TreeMethod::kExact) - .add_enum("hist", TreeMethod::kHist) - .add_enum("gpu_exact", TreeMethod::kGPUExact) - .add_enum("gpu_hist", TreeMethod::kGPUHist) - .describe("Choice of tree construction method."); DMLC_DECLARE_FIELD(nthread).set_default(0).describe( "Number of threads to use."); DMLC_DECLARE_FIELD(disable_default_eval_metric) @@ -79,6 +67,9 @@ struct LearnerTrainParam : public dmlc::Parameter { .set_default(0) .set_lower_bound(-1) .describe("Number of GPUs to use for multi-gpu algorithms."); + DMLC_DECLARE_FIELD(booster) + .set_default("gbtree") + .describe("Gradient booster used for training."); } }; } // namespace xgboost diff --git a/src/common/common.cc b/src/common/common.cc index b823f6a18..60ba3e16e 100644 --- a/src/common/common.cc +++ b/src/common/common.cc @@ -37,6 +37,7 @@ GPUSet GPUSet::All(GpuIdType gpu_id, GpuIdType n_gpus, int32_t n_rows) { CHECK_GE(n_gpus, -1) << "n_gpus must be >= -1."; GpuIdType const n_devices_visible = AllVisible().Size(); + CHECK_LE(n_gpus, n_devices_visible); if (n_devices_visible == 0 || n_gpus == 0 || n_rows == 0) { LOG(DEBUG) << "Runing on CPU."; return Empty(); diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu index 487ae1436..872fffc77 100644 --- a/src/common/host_device_vector.cu +++ b/src/common/host_device_vector.cu @@ -360,7 +360,9 @@ struct HostDeviceVectorImpl { void Shard(const GPUDistribution& distribution) { if (distribution_ == distribution) { return; } - CHECK(distribution_.IsEmpty()); + CHECK(distribution_.IsEmpty()) + << "This: " << distribution_.Devices().Size() << ", " + << "Others: " << distribution.Devices().Size(); distribution_ = distribution; InitShards(); } diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc index 66c934ff9..f2c1af2d3 100644 --- a/src/gbm/gblinear.cc +++ b/src/gbm/gblinear.cc @@ -180,6 +180,14 @@ class GBLinear : public GradientBooster { return model_.DumpModel(fmap, with_stats, format); } + bool UseGPU() const override { + if (param_.updater == "gpu_coord_descent") { + return true; + } else { + return false; + } + } + protected: void PredictBatchInternal(DMatrix *p_fmat, std::vector *out_preds) { @@ -257,6 +265,7 @@ class GBLinear : public GradientBooster { } preds[gid] = psum; } + // biase margin score bst_float base_margin_; // model field diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc index 497e5cacc..3ff0f475f 100644 --- a/src/gbm/gbtree.cc +++ b/src/gbm/gbtree.cc @@ -1,5 +1,5 @@ /*! - * Copyright 2014 by Contributors + * Copyright 2014-2019 by Contributors * \file gbtree.cc * \brief gradient boosted tree implementation. * \author Tianqi Chen @@ -11,309 +11,280 @@ #include #include #include + #include #include #include #include #include #include + #include "../common/common.h" #include "../common/host_device_vector.h" #include "../common/random.h" +#include "gbtree.h" #include "gbtree_model.h" #include "../common/timer.h" + namespace xgboost { namespace gbm { DMLC_REGISTRY_FILE_TAG(gbtree); -// boosting process types -enum TreeProcessType { - kDefault, - kUpdate -}; +void GBTree::Configure(const std::vector >& cfg) { + this->cfg_ = cfg; + tparam_.InitAllowUnknown(cfg); + std::string updater_seq = tparam_.updater_seq; -/*! \brief training parameters */ -struct GBTreeTrainParam : public dmlc::Parameter { - /*! - * \brief number of parallel trees constructed each iteration - * use this option to support boosted random forest - */ - int num_parallel_tree; - /*! \brief tree updater sequence */ - std::string updater_seq; - /*! \brief type of boosting process to run */ - int process_type; - std::string predictor; - // declare parameters - DMLC_DECLARE_PARAMETER(GBTreeTrainParam) { - DMLC_DECLARE_FIELD(num_parallel_tree) - .set_default(1) - .set_lower_bound(1) - .describe("Number of parallel trees constructed during each iteration."\ - " This option is used to support boosted random forest."); - DMLC_DECLARE_FIELD(updater_seq) - .set_default("grow_colmaker,prune") - .describe("Tree updater sequence."); - DMLC_DECLARE_FIELD(process_type) - .set_default(kDefault) - .add_enum("default", kDefault) - .add_enum("update", kUpdate) - .describe("Whether to run the normal boosting process that creates new trees,"\ - " or to update the trees in an existing model."); - // add alias - DMLC_DECLARE_ALIAS(updater_seq, updater); - DMLC_DECLARE_FIELD(predictor) - .set_default("cpu_predictor") - .describe("Predictor algorithm type"); - } -}; + ConfigureUpdaters({cfg.begin(), cfg.cend()}); -/*! \brief training parameters */ -struct DartTrainParam : public dmlc::Parameter { - /*! \brief type of sampling algorithm */ - int sample_type; - /*! \brief type of normalization algorithm */ - int normalize_type; - /*! \brief fraction of trees to drop during the dropout */ - float rate_drop; - /*! \brief whether at least one tree should always be dropped during the dropout */ - bool one_drop; - /*! \brief probability of skipping the dropout during an iteration */ - float skip_drop; - /*! \brief learning step size for a time */ - float learning_rate; - // declare parameters - DMLC_DECLARE_PARAMETER(DartTrainParam) { - DMLC_DECLARE_FIELD(sample_type) - .set_default(0) - .add_enum("uniform", 0) - .add_enum("weighted", 1) - .describe("Different types of sampling algorithm."); - DMLC_DECLARE_FIELD(normalize_type) - .set_default(0) - .add_enum("tree", 0) - .add_enum("forest", 1) - .describe("Different types of normalization algorithm."); - DMLC_DECLARE_FIELD(rate_drop) - .set_range(0.0f, 1.0f) - .set_default(0.0f) - .describe("Fraction of trees to drop during the dropout."); - DMLC_DECLARE_FIELD(one_drop) - .set_default(false) - .describe("Whether at least one tree should always be dropped during the dropout."); - DMLC_DECLARE_FIELD(skip_drop) - .set_range(0.0f, 1.0f) - .set_default(0.0f) - .describe("Probability of skipping the dropout during a boosting iteration."); - DMLC_DECLARE_FIELD(learning_rate) - .set_lower_bound(0.0f) - .set_default(0.3f) - .describe("Learning rate(step size) of update."); - DMLC_DECLARE_ALIAS(learning_rate, eta); - } -}; + model_.Configure(cfg); - -// cache entry -struct CacheEntry { - std::shared_ptr data; - std::vector predictions; -}; - -// gradient boosted trees -class GBTree : public GradientBooster { - public: - explicit GBTree(bst_float base_margin) : model_(base_margin) {} - - void InitCache(const std::vector > &cache) { - cache_ = cache; + // for the 'update' process_type, move trees into trees_to_update + if (tparam_.process_type == TreeProcessType::kUpdate) { + model_.InitTreesToUpdate(); } - void Configure(const std::vector >& cfg) override { - this->cfg_ = cfg; - model_.Configure(cfg); - // initialize the updaters only when needed. - std::string updater_seq = tparam_.updater_seq; - tparam_.InitAllowUnknown(cfg); - if (updater_seq != tparam_.updater_seq) updaters_.clear(); - for (const auto& up : updaters_) { - up->Init(cfg); + // configure predictor + predictor_ = std::unique_ptr( + Predictor::Create(tparam_.predictor, this->learner_param_)); + predictor_->Init(cfg, cache_); + monitor_.Init("GBTree"); +} + +void GBTree::PerformTreeMethodHeuristic(DMatrix* p_train, + std::map cfg) { + if (cfg.find("updater") != cfg.cend()) { + // This method is disabled when `updater` parameter is explicitly + // set, since only experts are expected to do so. + return; + } + + const TreeMethod current_tree_method = tparam_.tree_method; + + if (rabit::IsDistributed()) { + switch (current_tree_method) { + case TreeMethod::kAuto: + LOG(WARNING) << + "Tree method is automatically selected to be 'approx' " + "for distributed training."; + break; + case TreeMethod::kApprox: + case TreeMethod::kHist: + // things are okay, do nothing + break; + case TreeMethod::kExact: + LOG(WARNING) << "Tree method was set to be " + << "exact" + << "', but only 'approx' and 'hist' is available for distributed " + "training. The `tree_method` parameter is now being " + "changed to 'approx'"; + break; + case TreeMethod::kGPUExact: + // FIXME(trivialfis): Remove this line once GPU Exact is removed. + LOG(FATAL) << "Distributed training is not available with GPU Exact algorithm."; + break; + case TreeMethod::kGPUHist: + break; + default: + LOG(FATAL) << "Unknown tree_method (" + << static_cast(current_tree_method) << ") detected"; } - // for the 'update' process_type, move trees into trees_to_update - if (tparam_.process_type == kUpdate) { - model_.InitTreesToUpdate(); - } - - // configure predictor - predictor_ = std::unique_ptr(Predictor::Create(tparam_.predictor, learner_param_)); - predictor_->Init(cfg, cache_); - monitor_.Init("GBTree"); - } - - void Load(dmlc::Stream* fi) override { - model_.Load(fi); - - this->cfg_.clear(); - this->cfg_.emplace_back(std::string("num_feature"), - common::ToString(model_.param.num_feature)); - } - - void Save(dmlc::Stream* fo) const override { - model_.Save(fo); - } - - bool AllowLazyCheckPoint() const override { - return model_.param.num_output_group == 1 || - tparam_.updater_seq.find("distcol") != std::string::npos; - } - - void DoBoost(DMatrix* p_fmat, - HostDeviceVector* in_gpair, - ObjFunction* obj) override { - std::vector > > new_trees; - const int ngroup = model_.param.num_output_group; - monitor_.Start("BoostNewTrees"); - if (ngroup == 1) { - std::vector > ret; - BoostNewTrees(in_gpair, p_fmat, 0, &ret); - new_trees.push_back(std::move(ret)); + if (current_tree_method != TreeMethod::kHist) { + LOG(WARNING) << "Tree method is automatically selected to be 'approx'" + " for distributed training."; + tparam_.tree_method = TreeMethod::kApprox; } else { - CHECK_EQ(in_gpair->Size() % ngroup, 0U) - << "must have exactly ngroup*nrow gpairs"; - // TODO(canonizer): perform this on GPU if HostDeviceVector has device set. - HostDeviceVector tmp + LOG(WARNING) << "Tree method is specified to be 'hist'" + " for distributed training."; + tparam_.tree_method = TreeMethod::kHist; + } + } else if (!p_train->SingleColBlock()) { + /* Some tree methods are not available for external-memory DMatrix */ + switch (current_tree_method) { + case TreeMethod::kAuto: + LOG(WARNING) << "Tree method is automatically set to 'approx' " + "since external-memory data matrix is used."; + break; + case TreeMethod::kApprox: + // things are okay, do nothing + break; + case TreeMethod::kExact: + LOG(WARNING) << "Tree method was set to be 'exact', " + "but currently we are only able to proceed with " + "approximate algorithm ('approx') because external-" + "memory data matrix is used."; + break; + case TreeMethod::kHist: + // things are okay, do nothing + break; + case TreeMethod::kGPUExact: + case TreeMethod::kGPUHist: + LOG(FATAL) + << "External-memory data matrix is not available with GPU algorithms"; + break; + default: + LOG(FATAL) << "Unknown tree_method (" + << static_cast(current_tree_method) << ") detected"; + } + tparam_.tree_method = TreeMethod::kApprox; + } else if (p_train->Info().num_row_ >= (4UL << 20UL) + && current_tree_method == TreeMethod::kAuto) { + /* Choose tree_method='approx' automatically for large data matrix */ + LOG(WARNING) << "Tree method is automatically selected to be " + "'approx' for faster speed. To use old behavior " + "(exact greedy algorithm on single machine), " + "set tree_method to 'exact'."; + tparam_.tree_method = TreeMethod::kApprox; + } + LOG(DEBUG) << "Using predictor: " << tparam_.predictor; +} + +void GBTree::ConfigureUpdaters(const std::map& cfg) { + // `updater` parameter was manually specified + if (cfg.find("updater") != cfg.cend()) { + LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` " + "parameter. The `tree_method` parameter will be ignored. " + "Incorrect sequence of updaters will produce undefined " + "behavior. For common uses, we recommend using " + "`tree_method` parameter instead."; + return; + } + + /* Choose updaters according to tree_method parameters */ + switch (tparam_.tree_method) { + case TreeMethod::kAuto: + // Use heuristic to choose between 'exact' and 'approx' + // This choice is deferred to PerformTreeMethodHeuristic(). + break; + case TreeMethod::kApprox: + tparam_.updater_seq = "grow_histmaker,prune"; + break; + case TreeMethod::kExact: + tparam_.updater_seq = "grow_colmaker,prune"; + break; + case TreeMethod::kHist: + LOG(INFO) << + "Tree method is selected to be 'hist', which uses a " + "single updater grow_quantile_histmaker."; + tparam_.updater_seq = "grow_quantile_histmaker"; + break; + case TreeMethod::kGPUExact: + this->AssertGPUSupport(); + tparam_.updater_seq = "grow_gpu,prune"; + if (cfg.find("predictor") == cfg.cend()) { + tparam_.predictor = "gpu_predictor"; + } + break; + case TreeMethod::kGPUHist: + this->AssertGPUSupport(); + tparam_.updater_seq = "grow_gpu_hist"; + if (cfg.find("predictor") == cfg.cend()) { + tparam_.predictor = "gpu_predictor"; + } + break; + default: + LOG(FATAL) << "Unknown tree_method (" + << static_cast(tparam_.tree_method) << ") detected"; + } +} + +void GBTree::DoBoost(DMatrix* p_fmat, + HostDeviceVector* in_gpair, + ObjFunction* obj) { + std::string updater_seq = tparam_.updater_seq; + this->PerformTreeMethodHeuristic(p_fmat, {this->cfg_.begin(), this->cfg_.end()}); + this->ConfigureUpdaters({this->cfg_.begin(), this->cfg_.end()}); + LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq; + // initialize the updaters only when needed. + if (updater_seq != tparam_.updater_seq) { + this->updaters_.clear(); + } + + std::vector > > new_trees; + const int ngroup = model_.param.num_output_group; + monitor_.Start("BoostNewTrees"); + if (ngroup == 1) { + std::vector > ret; + BoostNewTrees(in_gpair, p_fmat, 0, &ret); + new_trees.push_back(std::move(ret)); + } else { + CHECK_EQ(in_gpair->Size() % ngroup, 0U) + << "must have exactly ngroup*nrow gpairs"; + // TODO(canonizer): perform this on GPU if HostDeviceVector has device set. + HostDeviceVector tmp (in_gpair->Size() / ngroup, GradientPair(), GPUDistribution::Block(in_gpair->Distribution().Devices())); - const auto& gpair_h = in_gpair->ConstHostVector(); - auto nsize = static_cast(tmp.Size()); - for (int gid = 0; gid < ngroup; ++gid) { - std::vector& tmp_h = tmp.HostVector(); - #pragma omp parallel for schedule(static) - for (bst_omp_uint i = 0; i < nsize; ++i) { - tmp_h[i] = gpair_h[i * ngroup + gid]; - } - std::vector > ret; - BoostNewTrees(&tmp, p_fmat, gid, &ret); - new_trees.push_back(std::move(ret)); + const auto& gpair_h = in_gpair->ConstHostVector(); + auto nsize = static_cast(tmp.Size()); + for (int gid = 0; gid < ngroup; ++gid) { + std::vector& tmp_h = tmp.HostVector(); +#pragma omp parallel for schedule(static) + for (bst_omp_uint i = 0; i < nsize; ++i) { + tmp_h[i] = gpair_h[i * ngroup + gid]; } - } - monitor_.Stop("BoostNewTrees"); - monitor_.Start("CommitModel"); - this->CommitModel(std::move(new_trees)); - monitor_.Stop("CommitModel"); - } - - void PredictBatch(DMatrix* p_fmat, - HostDeviceVector* out_preds, - unsigned ntree_limit) override { - predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit); - } - - void PredictInstance(const SparsePage::Inst& inst, - std::vector* out_preds, - unsigned ntree_limit, - unsigned root_index) override { - predictor_->PredictInstance(inst, out_preds, model_, - ntree_limit, root_index); - } - - void PredictLeaf(DMatrix* p_fmat, - std::vector* out_preds, - unsigned ntree_limit) override { - predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit); - } - - void PredictContribution(DMatrix* p_fmat, - std::vector* out_contribs, - unsigned ntree_limit, bool approximate, int condition, - unsigned condition_feature) override { - predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate); - } - - void PredictInteractionContributions(DMatrix* p_fmat, - std::vector* out_contribs, - unsigned ntree_limit, bool approximate) override { - predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_, - ntree_limit, approximate); - } - - std::vector DumpModel(const FeatureMap& fmap, - bool with_stats, - std::string format) const override { - return model_.DumpModel(fmap, with_stats, format); - } - - protected: - // initialize updater before using them - inline void InitUpdater() { - if (updaters_.size() != 0) return; - std::string tval = tparam_.updater_seq; - std::vector ups = common::Split(tval, ','); - for (const std::string& pstr : ups) { - std::unique_ptr up(TreeUpdater::Create(pstr.c_str(), learner_param_)); - up->Init(this->cfg_); - updaters_.push_back(std::move(up)); + std::vector > ret; + BoostNewTrees(&tmp, p_fmat, gid, &ret); + new_trees.push_back(std::move(ret)); } } - - // do group specific group - inline void BoostNewTrees(HostDeviceVector* gpair, - DMatrix *p_fmat, - int bst_group, - std::vector >* ret) { - this->InitUpdater(); - std::vector new_trees; - ret->clear(); - // create the trees - for (int i = 0; i < tparam_.num_parallel_tree; ++i) { - if (tparam_.process_type == kDefault) { - // create new tree - std::unique_ptr ptr(new RegTree()); - ptr->param.InitAllowUnknown(this->cfg_); - new_trees.push_back(ptr.get()); - ret->push_back(std::move(ptr)); - } else if (tparam_.process_type == kUpdate) { - CHECK_LT(model_.trees.size(), model_.trees_to_update.size()); - // move an existing tree from trees_to_update - auto t = std::move(model_.trees_to_update[model_.trees.size() + - bst_group * tparam_.num_parallel_tree + i]); - new_trees.push_back(t.get()); - ret->push_back(std::move(t)); - } - } - // update the trees - for (auto& up : updaters_) { - up->Update(gpair, p_fmat, new_trees); + monitor_.Stop("BoostNewTrees"); + monitor_.Start("CommitModel"); + this->CommitModel(std::move(new_trees)); + monitor_.Stop("CommitModel"); } - } - // commit new trees all at once - virtual void - CommitModel(std::vector>>&& new_trees) { - int num_new_trees = 0; - for (int gid = 0; gid < model_.param.num_output_group; ++gid) { - num_new_trees += new_trees[gid].size(); - model_.CommitModel(std::move(new_trees[gid]), gid); +void GBTree::InitUpdater() { + if (updaters_.size() != 0) return; + std::string tval = tparam_.updater_seq; + std::vector ups = common::Split(tval, ','); + for (const std::string& pstr : ups) { + std::unique_ptr up(TreeUpdater::Create(pstr.c_str(), learner_param_)); + up->Init(this->cfg_); + updaters_.push_back(std::move(up)); + } +} + +void GBTree::BoostNewTrees(HostDeviceVector* gpair, + DMatrix *p_fmat, + int bst_group, + std::vector >* ret) { + this->InitUpdater(); + std::vector new_trees; + ret->clear(); + // create the trees + for (int i = 0; i < tparam_.num_parallel_tree; ++i) { + if (tparam_.process_type == TreeProcessType::kDefault) { + // create new tree + std::unique_ptr ptr(new RegTree()); + ptr->param.InitAllowUnknown(this->cfg_); + new_trees.push_back(ptr.get()); + ret->push_back(std::move(ptr)); + } else if (tparam_.process_type == TreeProcessType::kUpdate) { + CHECK_LT(model_.trees.size(), model_.trees_to_update.size()); + // move an existing tree from trees_to_update + auto t = std::move(model_.trees_to_update[model_.trees.size() + + bst_group * tparam_.num_parallel_tree + i]); + new_trees.push_back(t.get()); + ret->push_back(std::move(t)); } - predictor_->UpdatePredictionCache(model_, &updaters_, num_new_trees); } + // update the trees + for (auto& up : updaters_) { + up->Update(gpair, p_fmat, new_trees); + } +} + +void GBTree::CommitModel(std::vector>>&& new_trees) { + int num_new_trees = 0; + for (int gid = 0; gid < model_.param.num_output_group; ++gid) { + num_new_trees += new_trees[gid].size(); + model_.CommitModel(std::move(new_trees[gid]), gid); + } + predictor_->UpdatePredictionCache(model_, &updaters_, num_new_trees); +} - // --- data structure --- - GBTreeModel model_; - // training parameter - GBTreeTrainParam tparam_; - // ----training fields---- - // configurations for tree - std::vector > cfg_; - // the updaters that can be applied to each of tree - std::vector> updaters_; - // Cached matrices - std::vector> cache_; - std::unique_ptr predictor_; - common::Monitor monitor_; -}; // dart class Dart : public GBTree { @@ -372,6 +343,10 @@ class Dart : public GBTree { } } + bool UseGPU() const override { + return false; + } + protected: friend class GBTree; // internal prediction loop diff --git a/src/gbm/gbtree.h b/src/gbm/gbtree.h new file mode 100644 index 000000000..5f5456eab --- /dev/null +++ b/src/gbm/gbtree.h @@ -0,0 +1,269 @@ +/*! + * Copyright 2014-2019 by Contributors + * \file gbtree.cc + * \brief gradient boosted tree implementation. + * \author Tianqi Chen + */ +#ifndef XGBOOST_GBM_GBTREE_H_ +#define XGBOOST_GBM_GBTREE_H_ + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "gbtree_model.h" +#include "../common/common.h" +#include "../common/host_device_vector.h" +#include "../common/timer.h" + +namespace xgboost { +enum class TreeMethod : int { + kAuto = 0, kApprox = 1, kExact = 2, kHist = 3, + kGPUExact = 4, kGPUHist = 5 +}; + +// boosting process types +enum class TreeProcessType : int { + kDefault = 0, + kUpdate = 1 +}; +} // namespace xgboost + +DECLARE_FIELD_ENUM_CLASS(xgboost::TreeMethod); +DECLARE_FIELD_ENUM_CLASS(xgboost::TreeProcessType); + +namespace xgboost { +namespace gbm { + +/*! \brief training parameters */ +struct GBTreeTrainParam : public dmlc::Parameter { + /*! + * \brief number of parallel trees constructed each iteration + * use this option to support boosted random forest + */ + int num_parallel_tree; + /*! \brief tree updater sequence */ + std::string updater_seq; + /*! \brief type of boosting process to run */ + TreeProcessType process_type; + // predictor name + std::string predictor; + // tree construction method + TreeMethod tree_method; + // declare parameters + DMLC_DECLARE_PARAMETER(GBTreeTrainParam) { + DMLC_DECLARE_FIELD(num_parallel_tree) + .set_default(1) + .set_lower_bound(1) + .describe("Number of parallel trees constructed during each iteration."\ + " This option is used to support boosted random forest."); + DMLC_DECLARE_FIELD(updater_seq) + .set_default("grow_colmaker,prune") + .describe("Tree updater sequence."); + DMLC_DECLARE_FIELD(process_type) + .set_default(TreeProcessType::kDefault) + .add_enum("default", TreeProcessType::kDefault) + .add_enum("update", TreeProcessType::kUpdate) + .describe("Whether to run the normal boosting process that creates new trees,"\ + " or to update the trees in an existing model."); + // add alias + DMLC_DECLARE_ALIAS(updater_seq, updater); + DMLC_DECLARE_FIELD(predictor) + .set_default("cpu_predictor") + .describe("Predictor algorithm type"); + DMLC_DECLARE_FIELD(tree_method) + .set_default(TreeMethod::kAuto) + .add_enum("auto", TreeMethod::kAuto) + .add_enum("approx", TreeMethod::kApprox) + .add_enum("exact", TreeMethod::kExact) + .add_enum("hist", TreeMethod::kHist) + .add_enum("gpu_exact", TreeMethod::kGPUExact) + .add_enum("gpu_hist", TreeMethod::kGPUHist) + .describe("Choice of tree construction method."); + } +}; + +/*! \brief training parameters */ +struct DartTrainParam : public dmlc::Parameter { + /*! \brief type of sampling algorithm */ + int sample_type; + /*! \brief type of normalization algorithm */ + int normalize_type; + /*! \brief fraction of trees to drop during the dropout */ + float rate_drop; + /*! \brief whether at least one tree should always be dropped during the dropout */ + bool one_drop; + /*! \brief probability of skipping the dropout during an iteration */ + float skip_drop; + /*! \brief learning step size for a time */ + float learning_rate; + // declare parameters + DMLC_DECLARE_PARAMETER(DartTrainParam) { + DMLC_DECLARE_FIELD(sample_type) + .set_default(0) + .add_enum("uniform", 0) + .add_enum("weighted", 1) + .describe("Different types of sampling algorithm."); + DMLC_DECLARE_FIELD(normalize_type) + .set_default(0) + .add_enum("tree", 0) + .add_enum("forest", 1) + .describe("Different types of normalization algorithm."); + DMLC_DECLARE_FIELD(rate_drop) + .set_range(0.0f, 1.0f) + .set_default(0.0f) + .describe("Fraction of trees to drop during the dropout."); + DMLC_DECLARE_FIELD(one_drop) + .set_default(false) + .describe("Whether at least one tree should always be dropped during the dropout."); + DMLC_DECLARE_FIELD(skip_drop) + .set_range(0.0f, 1.0f) + .set_default(0.0f) + .describe("Probability of skipping the dropout during a boosting iteration."); + DMLC_DECLARE_FIELD(learning_rate) + .set_lower_bound(0.0f) + .set_default(0.3f) + .describe("Learning rate(step size) of update."); + DMLC_DECLARE_ALIAS(learning_rate, eta); + } +}; + +// gradient boosted trees +class GBTree : public GradientBooster { + public: + explicit GBTree(bst_float base_margin) : model_(base_margin) {} + + void InitCache(const std::vector > &cache) { + cache_ = cache; + } + + static void AssertGPUSupport() { +#ifndef XGBOOST_USE_CUDA + LOG(FATAL) << "XGBoost version not compiled with GPU support."; +#endif // XGBOOST_USE_CUDA + } + + void Configure(const std::vector >& cfg) override; + // Revise `tree_method` and `updater` parameters after seeing the training + // data matrix + void PerformTreeMethodHeuristic(DMatrix* p_train, + std::map cfg); + /*! \brief Map `tree_method` parameter to `updater` parameter */ + void ConfigureUpdaters(const std::map& cfg); + /*! \brief Carry out one iteration of boosting */ + void DoBoost(DMatrix* p_fmat, + HostDeviceVector* in_gpair, + ObjFunction* obj) override; + + bool UseGPU() const override { + return + tparam_.predictor == "gpu_predictor" || + tparam_.tree_method == TreeMethod::kGPUHist || + tparam_.tree_method == TreeMethod::kGPUExact; + } + + void Load(dmlc::Stream* fi) override { + model_.Load(fi); + + this->cfg_.clear(); + this->cfg_.emplace_back(std::string("num_feature"), + common::ToString(model_.param.num_feature)); + } + + GBTreeTrainParam const& GetTrainParam() const { + return tparam_; + } + + void Save(dmlc::Stream* fo) const override { + model_.Save(fo); + } + + bool AllowLazyCheckPoint() const override { + return model_.param.num_output_group == 1 || + tparam_.updater_seq.find("distcol") != std::string::npos; + } + + void PredictBatch(DMatrix* p_fmat, + HostDeviceVector* out_preds, + unsigned ntree_limit) override { + predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit); + } + + void PredictInstance(const SparsePage::Inst& inst, + std::vector* out_preds, + unsigned ntree_limit, + unsigned root_index) override { + predictor_->PredictInstance(inst, out_preds, model_, + ntree_limit, root_index); + } + + void PredictLeaf(DMatrix* p_fmat, + std::vector* out_preds, + unsigned ntree_limit) override { + predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit); + } + + void PredictContribution(DMatrix* p_fmat, + std::vector* out_contribs, + unsigned ntree_limit, bool approximate, int condition, + unsigned condition_feature) override { + predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate); + } + + void PredictInteractionContributions(DMatrix* p_fmat, + std::vector* out_contribs, + unsigned ntree_limit, bool approximate) override { + predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_, + ntree_limit, approximate); + } + + std::vector DumpModel(const FeatureMap& fmap, + bool with_stats, + std::string format) const override { + return model_.DumpModel(fmap, with_stats, format); + } + + protected: + // initialize updater before using them + void InitUpdater(); + + // do group specific group + void BoostNewTrees(HostDeviceVector* gpair, + DMatrix *p_fmat, + int bst_group, + std::vector >* ret); + + // commit new trees all at once + virtual void CommitModel( + std::vector>>&& new_trees); + + // --- data structure --- + GBTreeModel model_; + // training parameter + GBTreeTrainParam tparam_; + // ----training fields---- + // configurations for tree + std::vector > cfg_; + // the updaters that can be applied to each of tree + std::vector> updaters_; + // Cached matrices + std::vector> cache_; + std::unique_ptr predictor_; + common::Monitor monitor_; +}; + +} // namespace gbm +} // namespace xgboost + +#endif // XGBOOST_GBM_GBTREE_H_ diff --git a/src/learner.cc b/src/learner.cc index bf845611b..f928fbfdd 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -113,68 +113,6 @@ class LearnerImpl : public Learner { name_gbm_ = "gbtree"; } - static void AssertGPUSupport() { -#ifndef XGBOOST_USE_CUDA - LOG(FATAL) << "XGBoost version not compiled with GPU support."; -#endif // XGBOOST_USE_CUDA - } - - - /*! \brief Map `tree_method` parameter to `updater` parameter */ - void ConfigureUpdaters() { - // This method is not applicable to non-tree learners - if (cfg_.find("booster") != cfg_.cend() && - (cfg_.at("booster") != "gbtree" && cfg_.at("booster") != "dart")) { - return; - } - // `updater` parameter was manually specified - if (cfg_.count("updater") > 0) { - LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` " - "parameter. The `tree_method` parameter will be ignored. " - "Incorrect sequence of updaters will produce undefined " - "behavior. For common uses, we recommend using " - "`tree_method` parameter instead."; - return; - } - - /* Choose updaters according to tree_method parameters */ - switch (tparam_.tree_method) { - case TreeMethod::kAuto: - // Use heuristic to choose between 'exact' and 'approx' - // This choice is deferred to PerformTreeMethodHeuristic(). - break; - case TreeMethod::kApprox: - cfg_["updater"] = "grow_histmaker,prune"; - break; - case TreeMethod::kExact: - cfg_["updater"] = "grow_colmaker,prune"; - break; - case TreeMethod::kHist: - LOG(INFO) << - "Tree method is selected to be 'hist', which uses a " - "single updater grow_quantile_histmaker."; - cfg_["updater"] = "grow_quantile_histmaker"; - break; - case TreeMethod::kGPUExact: - this->AssertGPUSupport(); - cfg_["updater"] = "grow_gpu,prune"; - if (cfg_.count("predictor") == 0) { - cfg_["predictor"] = "gpu_predictor"; - } - break; - case TreeMethod::kGPUHist: - this->AssertGPUSupport(); - cfg_["updater"] = "grow_gpu_hist"; - if (cfg_.count("predictor") == 0) { - cfg_["predictor"] = "gpu_predictor"; - } - break; - default: - LOG(FATAL) << "Unknown tree_method (" - << static_cast(tparam_.tree_method) << ") detected"; - } - } - void ConfigureObjective() { if (cfg_.count("num_class") != 0) { cfg_["num_output_group"] = cfg_["num_class"]; @@ -192,9 +130,6 @@ class LearnerImpl : public Learner { if (cfg_.count("objective") == 0) { cfg_["objective"] = "reg:squarederror"; } - if (cfg_.count("booster") == 0) { - cfg_["booster"] = "gbtree"; - } } // Configuration before data is known. @@ -231,13 +166,12 @@ class LearnerImpl : public Learner { } ConfigureObjective(); - ConfigureUpdaters(); + name_gbm_ = tparam_.booster; // FIXME(trivialfis): So which one should go first? Init or Configure? if (!this->ModelInitialized()) { mparam_.InitAllowUnknown(args); name_obj_ = cfg_["objective"]; - name_gbm_ = cfg_["booster"]; // set seed only before the model is initialized common::GlobalRandom().seed(tparam_.seed); } @@ -263,18 +197,11 @@ class LearnerImpl : public Learner { // Configuration can only be done after data is known void ConfigurationWithKnownData(DMatrix* dmat) { CHECK(ModelInitialized()) - << "Always call InitModel or Load before any evaluation."; + << " Internal Error: Always call InitModel or Load before any evaluation."; this->ValidateDMatrix(dmat); - // Configure GPU parameters - // FIXME(trivialfis): How do we know dependent parameters are all set? - if (tparam_.tree_method == TreeMethod::kGPUHist || - tparam_.tree_method == TreeMethod::kGPUExact || - (cfg_.find("updater") != cfg_.cend() && cfg_.at("updater") == "gpu_coord_descent") || - (cfg_.find("predictor") != cfg_.cend() && - cfg_.at("predictor") == "gpu_predictor")) { - if (cfg_.find("n_gpus") == cfg_.cend()) { - tparam_.n_gpus = 1; - } + CHECK(this->gbm_) << " Internal: GBM is not set"; + if (this->gbm_->UseGPU() && cfg_.find("n_gpus") == cfg_.cend()) { + tparam_.n_gpus = 1; } } @@ -443,13 +370,26 @@ class LearnerImpl : public Learner { } } + void CheckDataSplitMode() { + if (rabit::IsDistributed()) { + CHECK(tparam_.dsplit != DataSplitMode::kAuto) + << "Precondition violated; dsplit cannot be 'auto' in distributed mode"; + if (tparam_.dsplit == DataSplitMode::kCol) { + // 'distcol' updater hidden until it becomes functional again + // See discussion at https://github.com/dmlc/xgboost/issues/1832 + LOG(FATAL) << "Column-wise data split is currently not supported."; + } + } + } + void UpdateOneIter(int iter, DMatrix* train) override { monitor_.Start("UpdateOneIter"); if (tparam_.seed_per_iteration || rabit::IsDistributed()) { common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter); } - this->PerformTreeMethodHeuristic(train); + // this->PerformTreeMethodHeuristic(train); + this->CheckDataSplitMode(); this->ConfigurationWithKnownData(train); monitor_.Start("PredictRaw"); @@ -468,7 +408,8 @@ class LearnerImpl : public Learner { if (tparam_.seed_per_iteration || rabit::IsDistributed()) { common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter); } - this->PerformTreeMethodHeuristic(train); + this->CheckDataSplitMode(); + // this->PerformTreeMethodHeuristic(train); this->ConfigurationWithKnownData(train); gbm_->DoBoost(train, in_gpair); @@ -573,108 +514,6 @@ class LearnerImpl : public Learner { } protected: - // Revise `tree_method` and `updater` parameters after seeing the training - // data matrix - inline void PerformTreeMethodHeuristic(DMatrix* p_train) { - if (name_gbm_ != "gbtree" || cfg_.count("updater") > 0) { - // 1. This method is not applicable for non-tree learners - // 2. This method is disabled when `updater` parameter is explicitly - // set, since only experts are expected to do so. - return; - } - - const TreeMethod current_tree_method = tparam_.tree_method; - - if (rabit::IsDistributed()) { - CHECK(tparam_.dsplit != DataSplitMode::kAuto) - << "Precondition violated; dsplit cannot be 'auto' in distributed mode"; - if (tparam_.dsplit == DataSplitMode::kCol) { - // 'distcol' updater hidden until it becomes functional again - // See discussion at https://github.com/dmlc/xgboost/issues/1832 - LOG(FATAL) << "Column-wise data split is currently not supported."; - } - switch (current_tree_method) { - case TreeMethod::kAuto: - LOG(WARNING) << - "Tree method is automatically selected to be 'approx' " - "for distributed training."; - break; - case TreeMethod::kApprox: - case TreeMethod::kHist: - // things are okay, do nothing - break; - case TreeMethod::kExact: - LOG(WARNING) << "Tree method was set to be " - << "exact" - << "', but only 'approx' and 'hist' is available for distributed " - "training. The `tree_method` parameter is now being " - "changed to 'approx'"; - break; - case TreeMethod::kGPUExact: - case TreeMethod::kGPUHist: - LOG(FATAL) << "Distributed training is not available with GPU algoritms"; - break; - default: - LOG(FATAL) << "Unknown tree_method (" - << static_cast(current_tree_method) << ") detected"; - } - if (current_tree_method != TreeMethod::kHist) { - LOG(WARNING) << "Tree method is automatically selected to be 'approx'" - " for distributed training."; - tparam_.tree_method = TreeMethod::kApprox; - } else { - LOG(WARNING) << "Tree method is specified to be 'hist'" - " for distributed training."; - tparam_.tree_method = TreeMethod::kHist; - } - } else if (!p_train->SingleColBlock()) { - /* Some tree methods are not available for external-memory DMatrix */ - switch (current_tree_method) { - case TreeMethod::kAuto: - LOG(WARNING) << "Tree method is automatically set to 'approx' " - "since external-memory data matrix is used."; - break; - case TreeMethod::kApprox: - // things are okay, do nothing - break; - case TreeMethod::kExact: - LOG(WARNING) << "Tree method was set to be 'exact', " - "but currently we are only able to proceed with " - "approximate algorithm ('approx') because external-" - "memory data matrix is used."; - break; - case TreeMethod::kHist: - // things are okay, do nothing - break; - case TreeMethod::kGPUExact: - case TreeMethod::kGPUHist: - LOG(FATAL) - << "External-memory data matrix is not available with GPU algorithms"; - break; - default: - LOG(FATAL) << "Unknown tree_method (" - << static_cast(current_tree_method) << ") detected"; - } - tparam_.tree_method = TreeMethod::kApprox; - } else if (p_train->Info().num_row_ >= (4UL << 20UL) - && current_tree_method == TreeMethod::kAuto) { - /* Choose tree_method='approx' automatically for large data matrix */ - LOG(WARNING) << "Tree method is automatically selected to be " - "'approx' for faster speed. To use old behavior " - "(exact greedy algorithm on single machine), " - "set tree_method to 'exact'."; - tparam_.tree_method = TreeMethod::kApprox; - } - - /* If tree_method was changed, re-configure updaters and gradient boosters */ - if (tparam_.tree_method != current_tree_method) { - ConfigureUpdaters(); - if (gbm_ != nullptr) { - gbm_->Configure(cfg_.begin(), cfg_.end()); - } - } - } - // return whether model is already initialized. inline bool ModelInitialized() const { return gbm_ != nullptr; } // lazily initialize the model based on configuration if it haven't yet been initialized. diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc index abf4d2034..3bc5b58c3 100644 --- a/src/tree/updater_colmaker.cc +++ b/src/tree/updater_colmaker.cc @@ -6,12 +6,13 @@ */ #include #include +#include #include #include #include #include -#include "./param.h" +#include "param.h" #include "../common/random.h" #include "../common/bitmap.h" #include "split_evaluator.h" @@ -603,7 +604,7 @@ class ColMaker: public TreeUpdater { poption = static_cast(num_features) * 2 < this->nthread_ ? 1 : 0; } if (poption == 0) { - #pragma omp parallel for schedule(dynamic, batch_size) +#pragma omp parallel for schedule(dynamic, batch_size) for (bst_omp_uint i = 0; i < num_features; ++i) { int fid = feat_set[i]; const int tid = omp_get_thread_num(); diff --git a/tests/cpp/gbm/test_gbtree.cc b/tests/cpp/gbm/test_gbtree.cc new file mode 100644 index 000000000..9aeb2e700 --- /dev/null +++ b/tests/cpp/gbm/test_gbtree.cc @@ -0,0 +1,49 @@ +#include +#include +#include "../helpers.h" +#include "../../../src/gbm/gbtree.h" + +namespace xgboost { +TEST(GBTree, SelectTreeMethod) { + using Arg = std::pair; + size_t constexpr kRows = 10; + size_t constexpr kCols = 10; + auto mat_ptr = CreateDMatrix(kRows, kCols, 0); + std::vector> mat = {*mat_ptr}; + + LearnerTrainParam learner_param; + learner_param.InitAllowUnknown(std::vector{Arg("n_gpus", "0")}); + std::unique_ptr p_gbm{ + GradientBooster::Create("gbtree", &learner_param, {}, 0)}; + auto& gbtree = dynamic_cast (*p_gbm); + + // Test if `tree_method` can be set + std::string n_feat = std::to_string(kCols); + gbtree.Configure({Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}}); + auto const& tparam = gbtree.GetTrainParam(); + ASSERT_EQ(tparam.updater_seq, "grow_histmaker,prune"); + gbtree.Configure({Arg("tree_method", "exact"), Arg("num_feature", n_feat)}); + ASSERT_EQ(tparam.updater_seq, "grow_colmaker,prune"); + gbtree.Configure({Arg("tree_method", "hist"), Arg("num_feature", n_feat)}); + ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker"); + ASSERT_EQ(tparam.predictor, "cpu_predictor"); + gbtree.Configure({Arg{"booster", "dart"}, Arg{"tree_method", "hist"}, + Arg{"num_feature", n_feat}}); + ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker"); +#ifdef XGBOOST_USE_CUDA + learner_param.InitAllowUnknown(std::vector{Arg{"n_gpus", "1"}}); + gbtree.Configure({Arg("tree_method", "gpu_exact"), + Arg("num_feature", n_feat)}); + ASSERT_EQ(tparam.updater_seq, "grow_gpu,prune"); + ASSERT_EQ(tparam.predictor, "gpu_predictor"); + gbtree.Configure({Arg("tree_method", "gpu_hist"), Arg("num_feature", n_feat)}); + ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist"); + ASSERT_EQ(tparam.predictor, "gpu_predictor"); + gbtree.Configure({Arg{"booster", "dart"}, Arg{"tree_method", "gpu_hist"}, + Arg{"num_feature", n_feat}}); + ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist"); +#endif + + delete mat_ptr; +} +} // namespace xgboost diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu index d5d9eb775..bc4f759ab 100644 --- a/tests/cpp/predictor/test_gpu_predictor.cu +++ b/tests/cpp/predictor/test_gpu_predictor.cu @@ -145,25 +145,30 @@ TEST(gpu_predictor, MGPU_PicklingTest) { } // Load data matrix - CheckCAPICall(XGDMatrixCreateFromFile(tmp_file.c_str(), 0, &dmat[0])); - CheckCAPICall(XGDMatrixSetFloatInfo(dmat[0], "label", label.data(), 200)); + ASSERT_EQ(XGDMatrixCreateFromFile( + tmp_file.c_str(), 0, &dmat[0]), 0) << XGBGetLastError(); + ASSERT_EQ(XGDMatrixSetFloatInfo( + dmat[0], "label", label.data(), 200), 0) << XGBGetLastError(); // Create booster - CheckCAPICall(XGBoosterCreate(dmat, 1, &bst)); + ASSERT_EQ(XGBoosterCreate(dmat, 1, &bst), 0) << XGBGetLastError(); // Set parameters - CheckCAPICall(XGBoosterSetParam(bst, "seed", "0")); - CheckCAPICall(XGBoosterSetParam(bst, "base_score", "0.5")); - CheckCAPICall(XGBoosterSetParam(bst, "booster", "gbtree")); - CheckCAPICall(XGBoosterSetParam(bst, "learning_rate", "0.01")); - CheckCAPICall(XGBoosterSetParam(bst, "max_depth", "8")); - CheckCAPICall(XGBoosterSetParam(bst, "objective", "binary:logistic")); - CheckCAPICall(XGBoosterSetParam(bst, "seed", "123")); - CheckCAPICall(XGBoosterSetParam(bst, "tree_method", "gpu_hist")); - CheckCAPICall(XGBoosterSetParam(bst, "n_gpus", std::to_string(ngpu).c_str())); - CheckCAPICall(XGBoosterSetParam(bst, "predictor", "gpu_predictor")); + ASSERT_EQ(XGBoosterSetParam(bst, "seed", "0"), 0) << XGBGetLastError(); + ASSERT_EQ(XGBoosterSetParam(bst, "base_score", "0.5"), 0) << XGBGetLastError(); + ASSERT_EQ(XGBoosterSetParam(bst, "booster", "gbtree"), 0) << XGBGetLastError(); + ASSERT_EQ(XGBoosterSetParam(bst, "learning_rate", "0.01"), 0) << XGBGetLastError(); + ASSERT_EQ(XGBoosterSetParam(bst, "max_depth", "8"), 0) << XGBGetLastError(); + ASSERT_EQ(XGBoosterSetParam( + bst, "objective", "binary:logistic"), 0) << XGBGetLastError(); + ASSERT_EQ(XGBoosterSetParam(bst, "seed", "123"), 0) << XGBGetLastError(); + ASSERT_EQ(XGBoosterSetParam( + bst, "tree_method", "gpu_hist"), 0) << XGBGetLastError(); + ASSERT_EQ(XGBoosterSetParam( + bst, "n_gpus", std::to_string(ngpu).c_str()), 0) << XGBGetLastError(); + ASSERT_EQ(XGBoosterSetParam(bst, "predictor", "gpu_predictor"), 0) << XGBGetLastError(); // Run boosting iterations for (int i = 0; i < 10; ++i) { - CheckCAPICall(XGBoosterUpdateOneIter(bst, i, dmat[0])); + ASSERT_EQ(XGBoosterUpdateOneIter(bst, i, dmat[0]), 0) << XGBGetLastError(); } // Delete matrix diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc index 4bb29784b..5ff1e4b8a 100644 --- a/tests/cpp/test_learner.cc +++ b/tests/cpp/test_learner.cc @@ -19,40 +19,6 @@ TEST(Learner, Basic) { delete mat_ptr; } -TEST(Learner, SelectTreeMethod) { - using Arg = std::pair; - auto mat_ptr = CreateDMatrix(10, 10, 0); - std::vector> mat = {*mat_ptr}; - auto learner = std::unique_ptr(Learner::Create(mat)); - - // Test if `tree_method` can be set - learner->Configure({Arg("tree_method", "approx")}); - ASSERT_EQ(learner->GetConfigurationArguments().at("updater"), - "grow_histmaker,prune"); - learner->Configure({Arg("tree_method", "exact")}); - ASSERT_EQ(learner->GetConfigurationArguments().at("updater"), - "grow_colmaker,prune"); - learner->Configure({Arg("tree_method", "hist")}); - ASSERT_EQ(learner->GetConfigurationArguments().at("updater"), - "grow_quantile_histmaker"); - learner->Configure({Arg{"booster", "dart"}, Arg{"tree_method", "hist"}}); - ASSERT_EQ(learner->GetConfigurationArguments().at("updater"), - "grow_quantile_histmaker"); -#ifdef XGBOOST_USE_CUDA - learner->Configure({Arg("tree_method", "gpu_exact")}); - ASSERT_EQ(learner->GetConfigurationArguments().at("updater"), - "grow_gpu,prune"); - learner->Configure({Arg("tree_method", "gpu_hist")}); - ASSERT_EQ(learner->GetConfigurationArguments().at("updater"), - "grow_gpu_hist"); - learner->Configure({Arg{"booster", "dart"}, Arg{"tree_method", "gpu_hist"}}); - ASSERT_EQ(learner->GetConfigurationArguments().at("updater"), - "grow_gpu_hist"); -#endif - - delete mat_ptr; -} - TEST(Learner, CheckGroup) { using Arg = std::pair; size_t constexpr kNumGroups = 4;