Offload some configurations into GBM. (#4553)

This is part 1 of refactoring configuration.

* Move tree heuristic configurations.
* Split up declarations and definitions for GBTree.
* Implement UseGPU in gbm.
This commit is contained in:
Jiaming Yuan 2019-06-14 09:18:51 +08:00 committed by GitHub
parent a2042b685a
commit c5719cc457
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 630 additions and 519 deletions

View File

@ -146,6 +146,10 @@ class GradientBooster {
virtual std::vector<std::string> DumpModel(const FeatureMap& fmap,
bool with_stats,
std::string format) const = 0;
/*!
* \brief Whether the current booster use GPU.
*/
virtual bool UseGPU() const = 0;
/*!
* \brief create a gradient booster from given name
* \param name name of gradient booster

View File

@ -8,18 +8,15 @@
#include <dmlc/parameter.h>
#include <xgboost/enum_class_param.h>
#include <string>
namespace xgboost {
enum class TreeMethod : int {
kAuto = 0, kApprox = 1, kExact = 2, kHist = 3,
kGPUExact = 4, kGPUHist = 5
};
enum class DataSplitMode : int {
kAuto = 0, kCol = 1, kRow = 2
};
} // namespace xgboost
DECLARE_FIELD_ENUM_CLASS(xgboost::TreeMethod);
DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);
namespace xgboost {
@ -30,8 +27,6 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
bool seed_per_iteration;
// data split mode, can be row, col, or none.
DataSplitMode dsplit;
// tree construction method
TreeMethod tree_method;
// number of threads to use if OpenMP is enabled
// if equals 0, use system default
int nthread;
@ -42,6 +37,8 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
// number of devices to use, -1 implies using all available devices.
int n_gpus;
std::string booster;
// declare parameters
DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
DMLC_DECLARE_FIELD(seed).set_default(0).describe(
@ -58,15 +55,6 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
.add_enum("col", DataSplitMode::kCol)
.add_enum("row", DataSplitMode::kRow)
.describe("Data split mode for distributed training.");
DMLC_DECLARE_FIELD(tree_method)
.set_default(TreeMethod::kAuto)
.add_enum("auto", TreeMethod::kAuto)
.add_enum("approx", TreeMethod::kApprox)
.add_enum("exact", TreeMethod::kExact)
.add_enum("hist", TreeMethod::kHist)
.add_enum("gpu_exact", TreeMethod::kGPUExact)
.add_enum("gpu_hist", TreeMethod::kGPUHist)
.describe("Choice of tree construction method.");
DMLC_DECLARE_FIELD(nthread).set_default(0).describe(
"Number of threads to use.");
DMLC_DECLARE_FIELD(disable_default_eval_metric)
@ -79,6 +67,9 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
.set_default(0)
.set_lower_bound(-1)
.describe("Number of GPUs to use for multi-gpu algorithms.");
DMLC_DECLARE_FIELD(booster)
.set_default("gbtree")
.describe("Gradient booster used for training.");
}
};
} // namespace xgboost

View File

@ -37,6 +37,7 @@ GPUSet GPUSet::All(GpuIdType gpu_id, GpuIdType n_gpus, int32_t n_rows) {
CHECK_GE(n_gpus, -1) << "n_gpus must be >= -1.";
GpuIdType const n_devices_visible = AllVisible().Size();
CHECK_LE(n_gpus, n_devices_visible);
if (n_devices_visible == 0 || n_gpus == 0 || n_rows == 0) {
LOG(DEBUG) << "Runing on CPU.";
return Empty();

View File

@ -360,7 +360,9 @@ struct HostDeviceVectorImpl {
void Shard(const GPUDistribution& distribution) {
if (distribution_ == distribution) { return; }
CHECK(distribution_.IsEmpty());
CHECK(distribution_.IsEmpty())
<< "This: " << distribution_.Devices().Size() << ", "
<< "Others: " << distribution.Devices().Size();
distribution_ = distribution;
InitShards();
}

View File

@ -180,6 +180,14 @@ class GBLinear : public GradientBooster {
return model_.DumpModel(fmap, with_stats, format);
}
bool UseGPU() const override {
if (param_.updater == "gpu_coord_descent") {
return true;
} else {
return false;
}
}
protected:
void PredictBatchInternal(DMatrix *p_fmat,
std::vector<bst_float> *out_preds) {
@ -257,6 +265,7 @@ class GBLinear : public GradientBooster {
}
preds[gid] = psum;
}
// biase margin score
bst_float base_margin_;
// model field

View File

@ -1,5 +1,5 @@
/*!
* Copyright 2014 by Contributors
* Copyright 2014-2019 by Contributors
* \file gbtree.cc
* \brief gradient boosted tree implementation.
* \author Tianqi Chen
@ -11,309 +11,280 @@
#include <xgboost/gbm.h>
#include <xgboost/predictor.h>
#include <xgboost/tree_updater.h>
#include <vector>
#include <memory>
#include <utility>
#include <string>
#include <limits>
#include <algorithm>
#include "../common/common.h"
#include "../common/host_device_vector.h"
#include "../common/random.h"
#include "gbtree.h"
#include "gbtree_model.h"
#include "../common/timer.h"
namespace xgboost {
namespace gbm {
DMLC_REGISTRY_FILE_TAG(gbtree);
// boosting process types
enum TreeProcessType {
kDefault,
kUpdate
};
void GBTree::Configure(const std::vector<std::pair<std::string, std::string> >& cfg) {
this->cfg_ = cfg;
tparam_.InitAllowUnknown(cfg);
std::string updater_seq = tparam_.updater_seq;
/*! \brief training parameters */
struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
/*!
* \brief number of parallel trees constructed each iteration
* use this option to support boosted random forest
*/
int num_parallel_tree;
/*! \brief tree updater sequence */
std::string updater_seq;
/*! \brief type of boosting process to run */
int process_type;
std::string predictor;
// declare parameters
DMLC_DECLARE_PARAMETER(GBTreeTrainParam) {
DMLC_DECLARE_FIELD(num_parallel_tree)
.set_default(1)
.set_lower_bound(1)
.describe("Number of parallel trees constructed during each iteration."\
" This option is used to support boosted random forest.");
DMLC_DECLARE_FIELD(updater_seq)
.set_default("grow_colmaker,prune")
.describe("Tree updater sequence.");
DMLC_DECLARE_FIELD(process_type)
.set_default(kDefault)
.add_enum("default", kDefault)
.add_enum("update", kUpdate)
.describe("Whether to run the normal boosting process that creates new trees,"\
" or to update the trees in an existing model.");
// add alias
DMLC_DECLARE_ALIAS(updater_seq, updater);
DMLC_DECLARE_FIELD(predictor)
.set_default("cpu_predictor")
.describe("Predictor algorithm type");
}
};
ConfigureUpdaters({cfg.begin(), cfg.cend()});
/*! \brief training parameters */
struct DartTrainParam : public dmlc::Parameter<DartTrainParam> {
/*! \brief type of sampling algorithm */
int sample_type;
/*! \brief type of normalization algorithm */
int normalize_type;
/*! \brief fraction of trees to drop during the dropout */
float rate_drop;
/*! \brief whether at least one tree should always be dropped during the dropout */
bool one_drop;
/*! \brief probability of skipping the dropout during an iteration */
float skip_drop;
/*! \brief learning step size for a time */
float learning_rate;
// declare parameters
DMLC_DECLARE_PARAMETER(DartTrainParam) {
DMLC_DECLARE_FIELD(sample_type)
.set_default(0)
.add_enum("uniform", 0)
.add_enum("weighted", 1)
.describe("Different types of sampling algorithm.");
DMLC_DECLARE_FIELD(normalize_type)
.set_default(0)
.add_enum("tree", 0)
.add_enum("forest", 1)
.describe("Different types of normalization algorithm.");
DMLC_DECLARE_FIELD(rate_drop)
.set_range(0.0f, 1.0f)
.set_default(0.0f)
.describe("Fraction of trees to drop during the dropout.");
DMLC_DECLARE_FIELD(one_drop)
.set_default(false)
.describe("Whether at least one tree should always be dropped during the dropout.");
DMLC_DECLARE_FIELD(skip_drop)
.set_range(0.0f, 1.0f)
.set_default(0.0f)
.describe("Probability of skipping the dropout during a boosting iteration.");
DMLC_DECLARE_FIELD(learning_rate)
.set_lower_bound(0.0f)
.set_default(0.3f)
.describe("Learning rate(step size) of update.");
DMLC_DECLARE_ALIAS(learning_rate, eta);
}
};
model_.Configure(cfg);
// cache entry
struct CacheEntry {
std::shared_ptr<DMatrix> data;
std::vector<bst_float> predictions;
};
// gradient boosted trees
class GBTree : public GradientBooster {
public:
explicit GBTree(bst_float base_margin) : model_(base_margin) {}
void InitCache(const std::vector<std::shared_ptr<DMatrix> > &cache) {
cache_ = cache;
// for the 'update' process_type, move trees into trees_to_update
if (tparam_.process_type == TreeProcessType::kUpdate) {
model_.InitTreesToUpdate();
}
void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
this->cfg_ = cfg;
model_.Configure(cfg);
// initialize the updaters only when needed.
std::string updater_seq = tparam_.updater_seq;
tparam_.InitAllowUnknown(cfg);
if (updater_seq != tparam_.updater_seq) updaters_.clear();
for (const auto& up : updaters_) {
up->Init(cfg);
// configure predictor
predictor_ = std::unique_ptr<Predictor>(
Predictor::Create(tparam_.predictor, this->learner_param_));
predictor_->Init(cfg, cache_);
monitor_.Init("GBTree");
}
void GBTree::PerformTreeMethodHeuristic(DMatrix* p_train,
std::map<std::string, std::string> cfg) {
if (cfg.find("updater") != cfg.cend()) {
// This method is disabled when `updater` parameter is explicitly
// set, since only experts are expected to do so.
return;
}
const TreeMethod current_tree_method = tparam_.tree_method;
if (rabit::IsDistributed()) {
switch (current_tree_method) {
case TreeMethod::kAuto:
LOG(WARNING) <<
"Tree method is automatically selected to be 'approx' "
"for distributed training.";
break;
case TreeMethod::kApprox:
case TreeMethod::kHist:
// things are okay, do nothing
break;
case TreeMethod::kExact:
LOG(WARNING) << "Tree method was set to be "
<< "exact"
<< "', but only 'approx' and 'hist' is available for distributed "
"training. The `tree_method` parameter is now being "
"changed to 'approx'";
break;
case TreeMethod::kGPUExact:
// FIXME(trivialfis): Remove this line once GPU Exact is removed.
LOG(FATAL) << "Distributed training is not available with GPU Exact algorithm.";
break;
case TreeMethod::kGPUHist:
break;
default:
LOG(FATAL) << "Unknown tree_method ("
<< static_cast<int>(current_tree_method) << ") detected";
}
// for the 'update' process_type, move trees into trees_to_update
if (tparam_.process_type == kUpdate) {
model_.InitTreesToUpdate();
}
// configure predictor
predictor_ = std::unique_ptr<Predictor>(Predictor::Create(tparam_.predictor, learner_param_));
predictor_->Init(cfg, cache_);
monitor_.Init("GBTree");
}
void Load(dmlc::Stream* fi) override {
model_.Load(fi);
this->cfg_.clear();
this->cfg_.emplace_back(std::string("num_feature"),
common::ToString(model_.param.num_feature));
}
void Save(dmlc::Stream* fo) const override {
model_.Save(fo);
}
bool AllowLazyCheckPoint() const override {
return model_.param.num_output_group == 1 ||
tparam_.updater_seq.find("distcol") != std::string::npos;
}
void DoBoost(DMatrix* p_fmat,
HostDeviceVector<GradientPair>* in_gpair,
ObjFunction* obj) override {
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
const int ngroup = model_.param.num_output_group;
monitor_.Start("BoostNewTrees");
if (ngroup == 1) {
std::vector<std::unique_ptr<RegTree> > ret;
BoostNewTrees(in_gpair, p_fmat, 0, &ret);
new_trees.push_back(std::move(ret));
if (current_tree_method != TreeMethod::kHist) {
LOG(WARNING) << "Tree method is automatically selected to be 'approx'"
" for distributed training.";
tparam_.tree_method = TreeMethod::kApprox;
} else {
CHECK_EQ(in_gpair->Size() % ngroup, 0U)
<< "must have exactly ngroup*nrow gpairs";
// TODO(canonizer): perform this on GPU if HostDeviceVector has device set.
HostDeviceVector<GradientPair> tmp
LOG(WARNING) << "Tree method is specified to be 'hist'"
" for distributed training.";
tparam_.tree_method = TreeMethod::kHist;
}
} else if (!p_train->SingleColBlock()) {
/* Some tree methods are not available for external-memory DMatrix */
switch (current_tree_method) {
case TreeMethod::kAuto:
LOG(WARNING) << "Tree method is automatically set to 'approx' "
"since external-memory data matrix is used.";
break;
case TreeMethod::kApprox:
// things are okay, do nothing
break;
case TreeMethod::kExact:
LOG(WARNING) << "Tree method was set to be 'exact', "
"but currently we are only able to proceed with "
"approximate algorithm ('approx') because external-"
"memory data matrix is used.";
break;
case TreeMethod::kHist:
// things are okay, do nothing
break;
case TreeMethod::kGPUExact:
case TreeMethod::kGPUHist:
LOG(FATAL)
<< "External-memory data matrix is not available with GPU algorithms";
break;
default:
LOG(FATAL) << "Unknown tree_method ("
<< static_cast<int>(current_tree_method) << ") detected";
}
tparam_.tree_method = TreeMethod::kApprox;
} else if (p_train->Info().num_row_ >= (4UL << 20UL)
&& current_tree_method == TreeMethod::kAuto) {
/* Choose tree_method='approx' automatically for large data matrix */
LOG(WARNING) << "Tree method is automatically selected to be "
"'approx' for faster speed. To use old behavior "
"(exact greedy algorithm on single machine), "
"set tree_method to 'exact'.";
tparam_.tree_method = TreeMethod::kApprox;
}
LOG(DEBUG) << "Using predictor: " << tparam_.predictor;
}
void GBTree::ConfigureUpdaters(const std::map<std::string, std::string>& cfg) {
// `updater` parameter was manually specified
if (cfg.find("updater") != cfg.cend()) {
LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
"parameter. The `tree_method` parameter will be ignored. "
"Incorrect sequence of updaters will produce undefined "
"behavior. For common uses, we recommend using "
"`tree_method` parameter instead.";
return;
}
/* Choose updaters according to tree_method parameters */
switch (tparam_.tree_method) {
case TreeMethod::kAuto:
// Use heuristic to choose between 'exact' and 'approx'
// This choice is deferred to PerformTreeMethodHeuristic().
break;
case TreeMethod::kApprox:
tparam_.updater_seq = "grow_histmaker,prune";
break;
case TreeMethod::kExact:
tparam_.updater_seq = "grow_colmaker,prune";
break;
case TreeMethod::kHist:
LOG(INFO) <<
"Tree method is selected to be 'hist', which uses a "
"single updater grow_quantile_histmaker.";
tparam_.updater_seq = "grow_quantile_histmaker";
break;
case TreeMethod::kGPUExact:
this->AssertGPUSupport();
tparam_.updater_seq = "grow_gpu,prune";
if (cfg.find("predictor") == cfg.cend()) {
tparam_.predictor = "gpu_predictor";
}
break;
case TreeMethod::kGPUHist:
this->AssertGPUSupport();
tparam_.updater_seq = "grow_gpu_hist";
if (cfg.find("predictor") == cfg.cend()) {
tparam_.predictor = "gpu_predictor";
}
break;
default:
LOG(FATAL) << "Unknown tree_method ("
<< static_cast<int>(tparam_.tree_method) << ") detected";
}
}
void GBTree::DoBoost(DMatrix* p_fmat,
HostDeviceVector<GradientPair>* in_gpair,
ObjFunction* obj) {
std::string updater_seq = tparam_.updater_seq;
this->PerformTreeMethodHeuristic(p_fmat, {this->cfg_.begin(), this->cfg_.end()});
this->ConfigureUpdaters({this->cfg_.begin(), this->cfg_.end()});
LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
// initialize the updaters only when needed.
if (updater_seq != tparam_.updater_seq) {
this->updaters_.clear();
}
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
const int ngroup = model_.param.num_output_group;
monitor_.Start("BoostNewTrees");
if (ngroup == 1) {
std::vector<std::unique_ptr<RegTree> > ret;
BoostNewTrees(in_gpair, p_fmat, 0, &ret);
new_trees.push_back(std::move(ret));
} else {
CHECK_EQ(in_gpair->Size() % ngroup, 0U)
<< "must have exactly ngroup*nrow gpairs";
// TODO(canonizer): perform this on GPU if HostDeviceVector has device set.
HostDeviceVector<GradientPair> tmp
(in_gpair->Size() / ngroup, GradientPair(),
GPUDistribution::Block(in_gpair->Distribution().Devices()));
const auto& gpair_h = in_gpair->ConstHostVector();
auto nsize = static_cast<bst_omp_uint>(tmp.Size());
for (int gid = 0; gid < ngroup; ++gid) {
std::vector<GradientPair>& tmp_h = tmp.HostVector();
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) {
tmp_h[i] = gpair_h[i * ngroup + gid];
}
std::vector<std::unique_ptr<RegTree> > ret;
BoostNewTrees(&tmp, p_fmat, gid, &ret);
new_trees.push_back(std::move(ret));
const auto& gpair_h = in_gpair->ConstHostVector();
auto nsize = static_cast<bst_omp_uint>(tmp.Size());
for (int gid = 0; gid < ngroup; ++gid) {
std::vector<GradientPair>& tmp_h = tmp.HostVector();
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) {
tmp_h[i] = gpair_h[i * ngroup + gid];
}
}
monitor_.Stop("BoostNewTrees");
monitor_.Start("CommitModel");
this->CommitModel(std::move(new_trees));
monitor_.Stop("CommitModel");
}
void PredictBatch(DMatrix* p_fmat,
HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit) override {
predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
}
void PredictInstance(const SparsePage::Inst& inst,
std::vector<bst_float>* out_preds,
unsigned ntree_limit,
unsigned root_index) override {
predictor_->PredictInstance(inst, out_preds, model_,
ntree_limit, root_index);
}
void PredictLeaf(DMatrix* p_fmat,
std::vector<bst_float>* out_preds,
unsigned ntree_limit) override {
predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
}
void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate, int condition,
unsigned condition_feature) override {
predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
}
void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
ntree_limit, approximate);
}
std::vector<std::string> DumpModel(const FeatureMap& fmap,
bool with_stats,
std::string format) const override {
return model_.DumpModel(fmap, with_stats, format);
}
protected:
// initialize updater before using them
inline void InitUpdater() {
if (updaters_.size() != 0) return;
std::string tval = tparam_.updater_seq;
std::vector<std::string> ups = common::Split(tval, ',');
for (const std::string& pstr : ups) {
std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str(), learner_param_));
up->Init(this->cfg_);
updaters_.push_back(std::move(up));
std::vector<std::unique_ptr<RegTree> > ret;
BoostNewTrees(&tmp, p_fmat, gid, &ret);
new_trees.push_back(std::move(ret));
}
}
// do group specific group
inline void BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
DMatrix *p_fmat,
int bst_group,
std::vector<std::unique_ptr<RegTree> >* ret) {
this->InitUpdater();
std::vector<RegTree*> new_trees;
ret->clear();
// create the trees
for (int i = 0; i < tparam_.num_parallel_tree; ++i) {
if (tparam_.process_type == kDefault) {
// create new tree
std::unique_ptr<RegTree> ptr(new RegTree());
ptr->param.InitAllowUnknown(this->cfg_);
new_trees.push_back(ptr.get());
ret->push_back(std::move(ptr));
} else if (tparam_.process_type == kUpdate) {
CHECK_LT(model_.trees.size(), model_.trees_to_update.size());
// move an existing tree from trees_to_update
auto t = std::move(model_.trees_to_update[model_.trees.size() +
bst_group * tparam_.num_parallel_tree + i]);
new_trees.push_back(t.get());
ret->push_back(std::move(t));
}
}
// update the trees
for (auto& up : updaters_) {
up->Update(gpair, p_fmat, new_trees);
monitor_.Stop("BoostNewTrees");
monitor_.Start("CommitModel");
this->CommitModel(std::move(new_trees));
monitor_.Stop("CommitModel");
}
}
// commit new trees all at once
virtual void
CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
int num_new_trees = 0;
for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
num_new_trees += new_trees[gid].size();
model_.CommitModel(std::move(new_trees[gid]), gid);
void GBTree::InitUpdater() {
if (updaters_.size() != 0) return;
std::string tval = tparam_.updater_seq;
std::vector<std::string> ups = common::Split(tval, ',');
for (const std::string& pstr : ups) {
std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str(), learner_param_));
up->Init(this->cfg_);
updaters_.push_back(std::move(up));
}
}
void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
DMatrix *p_fmat,
int bst_group,
std::vector<std::unique_ptr<RegTree> >* ret) {
this->InitUpdater();
std::vector<RegTree*> new_trees;
ret->clear();
// create the trees
for (int i = 0; i < tparam_.num_parallel_tree; ++i) {
if (tparam_.process_type == TreeProcessType::kDefault) {
// create new tree
std::unique_ptr<RegTree> ptr(new RegTree());
ptr->param.InitAllowUnknown(this->cfg_);
new_trees.push_back(ptr.get());
ret->push_back(std::move(ptr));
} else if (tparam_.process_type == TreeProcessType::kUpdate) {
CHECK_LT(model_.trees.size(), model_.trees_to_update.size());
// move an existing tree from trees_to_update
auto t = std::move(model_.trees_to_update[model_.trees.size() +
bst_group * tparam_.num_parallel_tree + i]);
new_trees.push_back(t.get());
ret->push_back(std::move(t));
}
predictor_->UpdatePredictionCache(model_, &updaters_, num_new_trees);
}
// update the trees
for (auto& up : updaters_) {
up->Update(gpair, p_fmat, new_trees);
}
}
void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
int num_new_trees = 0;
for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
num_new_trees += new_trees[gid].size();
model_.CommitModel(std::move(new_trees[gid]), gid);
}
predictor_->UpdatePredictionCache(model_, &updaters_, num_new_trees);
}
// --- data structure ---
GBTreeModel model_;
// training parameter
GBTreeTrainParam tparam_;
// ----training fields----
// configurations for tree
std::vector<std::pair<std::string, std::string> > cfg_;
// the updaters that can be applied to each of tree
std::vector<std::unique_ptr<TreeUpdater>> updaters_;
// Cached matrices
std::vector<std::shared_ptr<DMatrix>> cache_;
std::unique_ptr<Predictor> predictor_;
common::Monitor monitor_;
};
// dart
class Dart : public GBTree {
@ -372,6 +343,10 @@ class Dart : public GBTree {
}
}
bool UseGPU() const override {
return false;
}
protected:
friend class GBTree;
// internal prediction loop

269
src/gbm/gbtree.h Normal file
View File

@ -0,0 +1,269 @@
/*!
* Copyright 2014-2019 by Contributors
* \file gbtree.cc
* \brief gradient boosted tree implementation.
* \author Tianqi Chen
*/
#ifndef XGBOOST_GBM_GBTREE_H_
#define XGBOOST_GBM_GBTREE_H_
#include <dmlc/omp.h>
#include <dmlc/parameter.h>
#include <xgboost/logging.h>
#include <xgboost/gbm.h>
#include <xgboost/predictor.h>
#include <xgboost/tree_updater.h>
#include <xgboost/enum_class_param.h>
#include <vector>
#include <map>
#include <memory>
#include <utility>
#include <string>
#include "gbtree_model.h"
#include "../common/common.h"
#include "../common/host_device_vector.h"
#include "../common/timer.h"
namespace xgboost {
enum class TreeMethod : int {
kAuto = 0, kApprox = 1, kExact = 2, kHist = 3,
kGPUExact = 4, kGPUHist = 5
};
// boosting process types
enum class TreeProcessType : int {
kDefault = 0,
kUpdate = 1
};
} // namespace xgboost
DECLARE_FIELD_ENUM_CLASS(xgboost::TreeMethod);
DECLARE_FIELD_ENUM_CLASS(xgboost::TreeProcessType);
namespace xgboost {
namespace gbm {
/*! \brief training parameters */
struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
/*!
* \brief number of parallel trees constructed each iteration
* use this option to support boosted random forest
*/
int num_parallel_tree;
/*! \brief tree updater sequence */
std::string updater_seq;
/*! \brief type of boosting process to run */
TreeProcessType process_type;
// predictor name
std::string predictor;
// tree construction method
TreeMethod tree_method;
// declare parameters
DMLC_DECLARE_PARAMETER(GBTreeTrainParam) {
DMLC_DECLARE_FIELD(num_parallel_tree)
.set_default(1)
.set_lower_bound(1)
.describe("Number of parallel trees constructed during each iteration."\
" This option is used to support boosted random forest.");
DMLC_DECLARE_FIELD(updater_seq)
.set_default("grow_colmaker,prune")
.describe("Tree updater sequence.");
DMLC_DECLARE_FIELD(process_type)
.set_default(TreeProcessType::kDefault)
.add_enum("default", TreeProcessType::kDefault)
.add_enum("update", TreeProcessType::kUpdate)
.describe("Whether to run the normal boosting process that creates new trees,"\
" or to update the trees in an existing model.");
// add alias
DMLC_DECLARE_ALIAS(updater_seq, updater);
DMLC_DECLARE_FIELD(predictor)
.set_default("cpu_predictor")
.describe("Predictor algorithm type");
DMLC_DECLARE_FIELD(tree_method)
.set_default(TreeMethod::kAuto)
.add_enum("auto", TreeMethod::kAuto)
.add_enum("approx", TreeMethod::kApprox)
.add_enum("exact", TreeMethod::kExact)
.add_enum("hist", TreeMethod::kHist)
.add_enum("gpu_exact", TreeMethod::kGPUExact)
.add_enum("gpu_hist", TreeMethod::kGPUHist)
.describe("Choice of tree construction method.");
}
};
/*! \brief training parameters */
struct DartTrainParam : public dmlc::Parameter<DartTrainParam> {
/*! \brief type of sampling algorithm */
int sample_type;
/*! \brief type of normalization algorithm */
int normalize_type;
/*! \brief fraction of trees to drop during the dropout */
float rate_drop;
/*! \brief whether at least one tree should always be dropped during the dropout */
bool one_drop;
/*! \brief probability of skipping the dropout during an iteration */
float skip_drop;
/*! \brief learning step size for a time */
float learning_rate;
// declare parameters
DMLC_DECLARE_PARAMETER(DartTrainParam) {
DMLC_DECLARE_FIELD(sample_type)
.set_default(0)
.add_enum("uniform", 0)
.add_enum("weighted", 1)
.describe("Different types of sampling algorithm.");
DMLC_DECLARE_FIELD(normalize_type)
.set_default(0)
.add_enum("tree", 0)
.add_enum("forest", 1)
.describe("Different types of normalization algorithm.");
DMLC_DECLARE_FIELD(rate_drop)
.set_range(0.0f, 1.0f)
.set_default(0.0f)
.describe("Fraction of trees to drop during the dropout.");
DMLC_DECLARE_FIELD(one_drop)
.set_default(false)
.describe("Whether at least one tree should always be dropped during the dropout.");
DMLC_DECLARE_FIELD(skip_drop)
.set_range(0.0f, 1.0f)
.set_default(0.0f)
.describe("Probability of skipping the dropout during a boosting iteration.");
DMLC_DECLARE_FIELD(learning_rate)
.set_lower_bound(0.0f)
.set_default(0.3f)
.describe("Learning rate(step size) of update.");
DMLC_DECLARE_ALIAS(learning_rate, eta);
}
};
// gradient boosted trees
class GBTree : public GradientBooster {
public:
explicit GBTree(bst_float base_margin) : model_(base_margin) {}
void InitCache(const std::vector<std::shared_ptr<DMatrix> > &cache) {
cache_ = cache;
}
static void AssertGPUSupport() {
#ifndef XGBOOST_USE_CUDA
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
#endif // XGBOOST_USE_CUDA
}
void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override;
// Revise `tree_method` and `updater` parameters after seeing the training
// data matrix
void PerformTreeMethodHeuristic(DMatrix* p_train,
std::map<std::string, std::string> cfg);
/*! \brief Map `tree_method` parameter to `updater` parameter */
void ConfigureUpdaters(const std::map<std::string, std::string>& cfg);
/*! \brief Carry out one iteration of boosting */
void DoBoost(DMatrix* p_fmat,
HostDeviceVector<GradientPair>* in_gpair,
ObjFunction* obj) override;
bool UseGPU() const override {
return
tparam_.predictor == "gpu_predictor" ||
tparam_.tree_method == TreeMethod::kGPUHist ||
tparam_.tree_method == TreeMethod::kGPUExact;
}
void Load(dmlc::Stream* fi) override {
model_.Load(fi);
this->cfg_.clear();
this->cfg_.emplace_back(std::string("num_feature"),
common::ToString(model_.param.num_feature));
}
GBTreeTrainParam const& GetTrainParam() const {
return tparam_;
}
void Save(dmlc::Stream* fo) const override {
model_.Save(fo);
}
bool AllowLazyCheckPoint() const override {
return model_.param.num_output_group == 1 ||
tparam_.updater_seq.find("distcol") != std::string::npos;
}
void PredictBatch(DMatrix* p_fmat,
HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit) override {
predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
}
void PredictInstance(const SparsePage::Inst& inst,
std::vector<bst_float>* out_preds,
unsigned ntree_limit,
unsigned root_index) override {
predictor_->PredictInstance(inst, out_preds, model_,
ntree_limit, root_index);
}
void PredictLeaf(DMatrix* p_fmat,
std::vector<bst_float>* out_preds,
unsigned ntree_limit) override {
predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
}
void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate, int condition,
unsigned condition_feature) override {
predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
}
void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
ntree_limit, approximate);
}
std::vector<std::string> DumpModel(const FeatureMap& fmap,
bool with_stats,
std::string format) const override {
return model_.DumpModel(fmap, with_stats, format);
}
protected:
// initialize updater before using them
void InitUpdater();
// do group specific group
void BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
DMatrix *p_fmat,
int bst_group,
std::vector<std::unique_ptr<RegTree> >* ret);
// commit new trees all at once
virtual void CommitModel(
std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees);
// --- data structure ---
GBTreeModel model_;
// training parameter
GBTreeTrainParam tparam_;
// ----training fields----
// configurations for tree
std::vector<std::pair<std::string, std::string> > cfg_;
// the updaters that can be applied to each of tree
std::vector<std::unique_ptr<TreeUpdater>> updaters_;
// Cached matrices
std::vector<std::shared_ptr<DMatrix>> cache_;
std::unique_ptr<Predictor> predictor_;
common::Monitor monitor_;
};
} // namespace gbm
} // namespace xgboost
#endif // XGBOOST_GBM_GBTREE_H_

View File

@ -113,68 +113,6 @@ class LearnerImpl : public Learner {
name_gbm_ = "gbtree";
}
static void AssertGPUSupport() {
#ifndef XGBOOST_USE_CUDA
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
#endif // XGBOOST_USE_CUDA
}
/*! \brief Map `tree_method` parameter to `updater` parameter */
void ConfigureUpdaters() {
// This method is not applicable to non-tree learners
if (cfg_.find("booster") != cfg_.cend() &&
(cfg_.at("booster") != "gbtree" && cfg_.at("booster") != "dart")) {
return;
}
// `updater` parameter was manually specified
if (cfg_.count("updater") > 0) {
LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
"parameter. The `tree_method` parameter will be ignored. "
"Incorrect sequence of updaters will produce undefined "
"behavior. For common uses, we recommend using "
"`tree_method` parameter instead.";
return;
}
/* Choose updaters according to tree_method parameters */
switch (tparam_.tree_method) {
case TreeMethod::kAuto:
// Use heuristic to choose between 'exact' and 'approx'
// This choice is deferred to PerformTreeMethodHeuristic().
break;
case TreeMethod::kApprox:
cfg_["updater"] = "grow_histmaker,prune";
break;
case TreeMethod::kExact:
cfg_["updater"] = "grow_colmaker,prune";
break;
case TreeMethod::kHist:
LOG(INFO) <<
"Tree method is selected to be 'hist', which uses a "
"single updater grow_quantile_histmaker.";
cfg_["updater"] = "grow_quantile_histmaker";
break;
case TreeMethod::kGPUExact:
this->AssertGPUSupport();
cfg_["updater"] = "grow_gpu,prune";
if (cfg_.count("predictor") == 0) {
cfg_["predictor"] = "gpu_predictor";
}
break;
case TreeMethod::kGPUHist:
this->AssertGPUSupport();
cfg_["updater"] = "grow_gpu_hist";
if (cfg_.count("predictor") == 0) {
cfg_["predictor"] = "gpu_predictor";
}
break;
default:
LOG(FATAL) << "Unknown tree_method ("
<< static_cast<int>(tparam_.tree_method) << ") detected";
}
}
void ConfigureObjective() {
if (cfg_.count("num_class") != 0) {
cfg_["num_output_group"] = cfg_["num_class"];
@ -192,9 +130,6 @@ class LearnerImpl : public Learner {
if (cfg_.count("objective") == 0) {
cfg_["objective"] = "reg:squarederror";
}
if (cfg_.count("booster") == 0) {
cfg_["booster"] = "gbtree";
}
}
// Configuration before data is known.
@ -231,13 +166,12 @@ class LearnerImpl : public Learner {
}
ConfigureObjective();
ConfigureUpdaters();
name_gbm_ = tparam_.booster;
// FIXME(trivialfis): So which one should go first? Init or Configure?
if (!this->ModelInitialized()) {
mparam_.InitAllowUnknown(args);
name_obj_ = cfg_["objective"];
name_gbm_ = cfg_["booster"];
// set seed only before the model is initialized
common::GlobalRandom().seed(tparam_.seed);
}
@ -263,18 +197,11 @@ class LearnerImpl : public Learner {
// Configuration can only be done after data is known
void ConfigurationWithKnownData(DMatrix* dmat) {
CHECK(ModelInitialized())
<< "Always call InitModel or Load before any evaluation.";
<< " Internal Error: Always call InitModel or Load before any evaluation.";
this->ValidateDMatrix(dmat);
// Configure GPU parameters
// FIXME(trivialfis): How do we know dependent parameters are all set?
if (tparam_.tree_method == TreeMethod::kGPUHist ||
tparam_.tree_method == TreeMethod::kGPUExact ||
(cfg_.find("updater") != cfg_.cend() && cfg_.at("updater") == "gpu_coord_descent") ||
(cfg_.find("predictor") != cfg_.cend() &&
cfg_.at("predictor") == "gpu_predictor")) {
if (cfg_.find("n_gpus") == cfg_.cend()) {
tparam_.n_gpus = 1;
}
CHECK(this->gbm_) << " Internal: GBM is not set";
if (this->gbm_->UseGPU() && cfg_.find("n_gpus") == cfg_.cend()) {
tparam_.n_gpus = 1;
}
}
@ -443,13 +370,26 @@ class LearnerImpl : public Learner {
}
}
void CheckDataSplitMode() {
if (rabit::IsDistributed()) {
CHECK(tparam_.dsplit != DataSplitMode::kAuto)
<< "Precondition violated; dsplit cannot be 'auto' in distributed mode";
if (tparam_.dsplit == DataSplitMode::kCol) {
// 'distcol' updater hidden until it becomes functional again
// See discussion at https://github.com/dmlc/xgboost/issues/1832
LOG(FATAL) << "Column-wise data split is currently not supported.";
}
}
}
void UpdateOneIter(int iter, DMatrix* train) override {
monitor_.Start("UpdateOneIter");
if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
}
this->PerformTreeMethodHeuristic(train);
// this->PerformTreeMethodHeuristic(train);
this->CheckDataSplitMode();
this->ConfigurationWithKnownData(train);
monitor_.Start("PredictRaw");
@ -468,7 +408,8 @@ class LearnerImpl : public Learner {
if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
}
this->PerformTreeMethodHeuristic(train);
this->CheckDataSplitMode();
// this->PerformTreeMethodHeuristic(train);
this->ConfigurationWithKnownData(train);
gbm_->DoBoost(train, in_gpair);
@ -573,108 +514,6 @@ class LearnerImpl : public Learner {
}
protected:
// Revise `tree_method` and `updater` parameters after seeing the training
// data matrix
inline void PerformTreeMethodHeuristic(DMatrix* p_train) {
if (name_gbm_ != "gbtree" || cfg_.count("updater") > 0) {
// 1. This method is not applicable for non-tree learners
// 2. This method is disabled when `updater` parameter is explicitly
// set, since only experts are expected to do so.
return;
}
const TreeMethod current_tree_method = tparam_.tree_method;
if (rabit::IsDistributed()) {
CHECK(tparam_.dsplit != DataSplitMode::kAuto)
<< "Precondition violated; dsplit cannot be 'auto' in distributed mode";
if (tparam_.dsplit == DataSplitMode::kCol) {
// 'distcol' updater hidden until it becomes functional again
// See discussion at https://github.com/dmlc/xgboost/issues/1832
LOG(FATAL) << "Column-wise data split is currently not supported.";
}
switch (current_tree_method) {
case TreeMethod::kAuto:
LOG(WARNING) <<
"Tree method is automatically selected to be 'approx' "
"for distributed training.";
break;
case TreeMethod::kApprox:
case TreeMethod::kHist:
// things are okay, do nothing
break;
case TreeMethod::kExact:
LOG(WARNING) << "Tree method was set to be "
<< "exact"
<< "', but only 'approx' and 'hist' is available for distributed "
"training. The `tree_method` parameter is now being "
"changed to 'approx'";
break;
case TreeMethod::kGPUExact:
case TreeMethod::kGPUHist:
LOG(FATAL) << "Distributed training is not available with GPU algoritms";
break;
default:
LOG(FATAL) << "Unknown tree_method ("
<< static_cast<int>(current_tree_method) << ") detected";
}
if (current_tree_method != TreeMethod::kHist) {
LOG(WARNING) << "Tree method is automatically selected to be 'approx'"
" for distributed training.";
tparam_.tree_method = TreeMethod::kApprox;
} else {
LOG(WARNING) << "Tree method is specified to be 'hist'"
" for distributed training.";
tparam_.tree_method = TreeMethod::kHist;
}
} else if (!p_train->SingleColBlock()) {
/* Some tree methods are not available for external-memory DMatrix */
switch (current_tree_method) {
case TreeMethod::kAuto:
LOG(WARNING) << "Tree method is automatically set to 'approx' "
"since external-memory data matrix is used.";
break;
case TreeMethod::kApprox:
// things are okay, do nothing
break;
case TreeMethod::kExact:
LOG(WARNING) << "Tree method was set to be 'exact', "
"but currently we are only able to proceed with "
"approximate algorithm ('approx') because external-"
"memory data matrix is used.";
break;
case TreeMethod::kHist:
// things are okay, do nothing
break;
case TreeMethod::kGPUExact:
case TreeMethod::kGPUHist:
LOG(FATAL)
<< "External-memory data matrix is not available with GPU algorithms";
break;
default:
LOG(FATAL) << "Unknown tree_method ("
<< static_cast<int>(current_tree_method) << ") detected";
}
tparam_.tree_method = TreeMethod::kApprox;
} else if (p_train->Info().num_row_ >= (4UL << 20UL)
&& current_tree_method == TreeMethod::kAuto) {
/* Choose tree_method='approx' automatically for large data matrix */
LOG(WARNING) << "Tree method is automatically selected to be "
"'approx' for faster speed. To use old behavior "
"(exact greedy algorithm on single machine), "
"set tree_method to 'exact'.";
tparam_.tree_method = TreeMethod::kApprox;
}
/* If tree_method was changed, re-configure updaters and gradient boosters */
if (tparam_.tree_method != current_tree_method) {
ConfigureUpdaters();
if (gbm_ != nullptr) {
gbm_->Configure(cfg_.begin(), cfg_.end());
}
}
}
// return whether model is already initialized.
inline bool ModelInitialized() const { return gbm_ != nullptr; }
// lazily initialize the model based on configuration if it haven't yet been initialized.

View File

@ -6,12 +6,13 @@
*/
#include <rabit/rabit.h>
#include <xgboost/tree_updater.h>
#include <xgboost/logging.h>
#include <memory>
#include <vector>
#include <cmath>
#include <algorithm>
#include "./param.h"
#include "param.h"
#include "../common/random.h"
#include "../common/bitmap.h"
#include "split_evaluator.h"
@ -603,7 +604,7 @@ class ColMaker: public TreeUpdater {
poption = static_cast<int>(num_features) * 2 < this->nthread_ ? 1 : 0;
}
if (poption == 0) {
#pragma omp parallel for schedule(dynamic, batch_size)
#pragma omp parallel for schedule(dynamic, batch_size)
for (bst_omp_uint i = 0; i < num_features; ++i) {
int fid = feat_set[i];
const int tid = omp_get_thread_num();

View File

@ -0,0 +1,49 @@
#include <gtest/gtest.h>
#include <xgboost/generic_parameters.h>
#include "../helpers.h"
#include "../../../src/gbm/gbtree.h"
namespace xgboost {
TEST(GBTree, SelectTreeMethod) {
using Arg = std::pair<std::string, std::string>;
size_t constexpr kRows = 10;
size_t constexpr kCols = 10;
auto mat_ptr = CreateDMatrix(kRows, kCols, 0);
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {*mat_ptr};
LearnerTrainParam learner_param;
learner_param.InitAllowUnknown(std::vector<Arg>{Arg("n_gpus", "0")});
std::unique_ptr<GradientBooster> p_gbm{
GradientBooster::Create("gbtree", &learner_param, {}, 0)};
auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
// Test if `tree_method` can be set
std::string n_feat = std::to_string(kCols);
gbtree.Configure({Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}});
auto const& tparam = gbtree.GetTrainParam();
ASSERT_EQ(tparam.updater_seq, "grow_histmaker,prune");
gbtree.Configure({Arg("tree_method", "exact"), Arg("num_feature", n_feat)});
ASSERT_EQ(tparam.updater_seq, "grow_colmaker,prune");
gbtree.Configure({Arg("tree_method", "hist"), Arg("num_feature", n_feat)});
ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
ASSERT_EQ(tparam.predictor, "cpu_predictor");
gbtree.Configure({Arg{"booster", "dart"}, Arg{"tree_method", "hist"},
Arg{"num_feature", n_feat}});
ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
#ifdef XGBOOST_USE_CUDA
learner_param.InitAllowUnknown(std::vector<Arg>{Arg{"n_gpus", "1"}});
gbtree.Configure({Arg("tree_method", "gpu_exact"),
Arg("num_feature", n_feat)});
ASSERT_EQ(tparam.updater_seq, "grow_gpu,prune");
ASSERT_EQ(tparam.predictor, "gpu_predictor");
gbtree.Configure({Arg("tree_method", "gpu_hist"), Arg("num_feature", n_feat)});
ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
ASSERT_EQ(tparam.predictor, "gpu_predictor");
gbtree.Configure({Arg{"booster", "dart"}, Arg{"tree_method", "gpu_hist"},
Arg{"num_feature", n_feat}});
ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
#endif
delete mat_ptr;
}
} // namespace xgboost

View File

@ -145,25 +145,30 @@ TEST(gpu_predictor, MGPU_PicklingTest) {
}
// Load data matrix
CheckCAPICall(XGDMatrixCreateFromFile(tmp_file.c_str(), 0, &dmat[0]));
CheckCAPICall(XGDMatrixSetFloatInfo(dmat[0], "label", label.data(), 200));
ASSERT_EQ(XGDMatrixCreateFromFile(
tmp_file.c_str(), 0, &dmat[0]), 0) << XGBGetLastError();
ASSERT_EQ(XGDMatrixSetFloatInfo(
dmat[0], "label", label.data(), 200), 0) << XGBGetLastError();
// Create booster
CheckCAPICall(XGBoosterCreate(dmat, 1, &bst));
ASSERT_EQ(XGBoosterCreate(dmat, 1, &bst), 0) << XGBGetLastError();
// Set parameters
CheckCAPICall(XGBoosterSetParam(bst, "seed", "0"));
CheckCAPICall(XGBoosterSetParam(bst, "base_score", "0.5"));
CheckCAPICall(XGBoosterSetParam(bst, "booster", "gbtree"));
CheckCAPICall(XGBoosterSetParam(bst, "learning_rate", "0.01"));
CheckCAPICall(XGBoosterSetParam(bst, "max_depth", "8"));
CheckCAPICall(XGBoosterSetParam(bst, "objective", "binary:logistic"));
CheckCAPICall(XGBoosterSetParam(bst, "seed", "123"));
CheckCAPICall(XGBoosterSetParam(bst, "tree_method", "gpu_hist"));
CheckCAPICall(XGBoosterSetParam(bst, "n_gpus", std::to_string(ngpu).c_str()));
CheckCAPICall(XGBoosterSetParam(bst, "predictor", "gpu_predictor"));
ASSERT_EQ(XGBoosterSetParam(bst, "seed", "0"), 0) << XGBGetLastError();
ASSERT_EQ(XGBoosterSetParam(bst, "base_score", "0.5"), 0) << XGBGetLastError();
ASSERT_EQ(XGBoosterSetParam(bst, "booster", "gbtree"), 0) << XGBGetLastError();
ASSERT_EQ(XGBoosterSetParam(bst, "learning_rate", "0.01"), 0) << XGBGetLastError();
ASSERT_EQ(XGBoosterSetParam(bst, "max_depth", "8"), 0) << XGBGetLastError();
ASSERT_EQ(XGBoosterSetParam(
bst, "objective", "binary:logistic"), 0) << XGBGetLastError();
ASSERT_EQ(XGBoosterSetParam(bst, "seed", "123"), 0) << XGBGetLastError();
ASSERT_EQ(XGBoosterSetParam(
bst, "tree_method", "gpu_hist"), 0) << XGBGetLastError();
ASSERT_EQ(XGBoosterSetParam(
bst, "n_gpus", std::to_string(ngpu).c_str()), 0) << XGBGetLastError();
ASSERT_EQ(XGBoosterSetParam(bst, "predictor", "gpu_predictor"), 0) << XGBGetLastError();
// Run boosting iterations
for (int i = 0; i < 10; ++i) {
CheckCAPICall(XGBoosterUpdateOneIter(bst, i, dmat[0]));
ASSERT_EQ(XGBoosterUpdateOneIter(bst, i, dmat[0]), 0) << XGBGetLastError();
}
// Delete matrix

View File

@ -19,40 +19,6 @@ TEST(Learner, Basic) {
delete mat_ptr;
}
TEST(Learner, SelectTreeMethod) {
using Arg = std::pair<std::string, std::string>;
auto mat_ptr = CreateDMatrix(10, 10, 0);
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {*mat_ptr};
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
// Test if `tree_method` can be set
learner->Configure({Arg("tree_method", "approx")});
ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
"grow_histmaker,prune");
learner->Configure({Arg("tree_method", "exact")});
ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
"grow_colmaker,prune");
learner->Configure({Arg("tree_method", "hist")});
ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
"grow_quantile_histmaker");
learner->Configure({Arg{"booster", "dart"}, Arg{"tree_method", "hist"}});
ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
"grow_quantile_histmaker");
#ifdef XGBOOST_USE_CUDA
learner->Configure({Arg("tree_method", "gpu_exact")});
ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
"grow_gpu,prune");
learner->Configure({Arg("tree_method", "gpu_hist")});
ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
"grow_gpu_hist");
learner->Configure({Arg{"booster", "dart"}, Arg{"tree_method", "gpu_hist"}});
ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
"grow_gpu_hist");
#endif
delete mat_ptr;
}
TEST(Learner, CheckGroup) {
using Arg = std::pair<std::string, std::string>;
size_t constexpr kNumGroups = 4;