Offload some configurations into GBM. (#4553)
This is part 1 of refactoring configuration. * Move tree heuristic configurations. * Split up declarations and definitions for GBTree. * Implement UseGPU in gbm.
This commit is contained in:
parent
a2042b685a
commit
c5719cc457
@ -146,6 +146,10 @@ class GradientBooster {
|
|||||||
virtual std::vector<std::string> DumpModel(const FeatureMap& fmap,
|
virtual std::vector<std::string> DumpModel(const FeatureMap& fmap,
|
||||||
bool with_stats,
|
bool with_stats,
|
||||||
std::string format) const = 0;
|
std::string format) const = 0;
|
||||||
|
/*!
|
||||||
|
* \brief Whether the current booster use GPU.
|
||||||
|
*/
|
||||||
|
virtual bool UseGPU() const = 0;
|
||||||
/*!
|
/*!
|
||||||
* \brief create a gradient booster from given name
|
* \brief create a gradient booster from given name
|
||||||
* \param name name of gradient booster
|
* \param name name of gradient booster
|
||||||
|
|||||||
@ -8,18 +8,15 @@
|
|||||||
#include <dmlc/parameter.h>
|
#include <dmlc/parameter.h>
|
||||||
#include <xgboost/enum_class_param.h>
|
#include <xgboost/enum_class_param.h>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
enum class TreeMethod : int {
|
|
||||||
kAuto = 0, kApprox = 1, kExact = 2, kHist = 3,
|
|
||||||
kGPUExact = 4, kGPUHist = 5
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class DataSplitMode : int {
|
enum class DataSplitMode : int {
|
||||||
kAuto = 0, kCol = 1, kRow = 2
|
kAuto = 0, kCol = 1, kRow = 2
|
||||||
};
|
};
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|
||||||
DECLARE_FIELD_ENUM_CLASS(xgboost::TreeMethod);
|
|
||||||
DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);
|
DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
@ -30,8 +27,6 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
|
|||||||
bool seed_per_iteration;
|
bool seed_per_iteration;
|
||||||
// data split mode, can be row, col, or none.
|
// data split mode, can be row, col, or none.
|
||||||
DataSplitMode dsplit;
|
DataSplitMode dsplit;
|
||||||
// tree construction method
|
|
||||||
TreeMethod tree_method;
|
|
||||||
// number of threads to use if OpenMP is enabled
|
// number of threads to use if OpenMP is enabled
|
||||||
// if equals 0, use system default
|
// if equals 0, use system default
|
||||||
int nthread;
|
int nthread;
|
||||||
@ -42,6 +37,8 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
|
|||||||
// number of devices to use, -1 implies using all available devices.
|
// number of devices to use, -1 implies using all available devices.
|
||||||
int n_gpus;
|
int n_gpus;
|
||||||
|
|
||||||
|
std::string booster;
|
||||||
|
|
||||||
// declare parameters
|
// declare parameters
|
||||||
DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
|
DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
|
||||||
DMLC_DECLARE_FIELD(seed).set_default(0).describe(
|
DMLC_DECLARE_FIELD(seed).set_default(0).describe(
|
||||||
@ -58,15 +55,6 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
|
|||||||
.add_enum("col", DataSplitMode::kCol)
|
.add_enum("col", DataSplitMode::kCol)
|
||||||
.add_enum("row", DataSplitMode::kRow)
|
.add_enum("row", DataSplitMode::kRow)
|
||||||
.describe("Data split mode for distributed training.");
|
.describe("Data split mode for distributed training.");
|
||||||
DMLC_DECLARE_FIELD(tree_method)
|
|
||||||
.set_default(TreeMethod::kAuto)
|
|
||||||
.add_enum("auto", TreeMethod::kAuto)
|
|
||||||
.add_enum("approx", TreeMethod::kApprox)
|
|
||||||
.add_enum("exact", TreeMethod::kExact)
|
|
||||||
.add_enum("hist", TreeMethod::kHist)
|
|
||||||
.add_enum("gpu_exact", TreeMethod::kGPUExact)
|
|
||||||
.add_enum("gpu_hist", TreeMethod::kGPUHist)
|
|
||||||
.describe("Choice of tree construction method.");
|
|
||||||
DMLC_DECLARE_FIELD(nthread).set_default(0).describe(
|
DMLC_DECLARE_FIELD(nthread).set_default(0).describe(
|
||||||
"Number of threads to use.");
|
"Number of threads to use.");
|
||||||
DMLC_DECLARE_FIELD(disable_default_eval_metric)
|
DMLC_DECLARE_FIELD(disable_default_eval_metric)
|
||||||
@ -79,6 +67,9 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
|
|||||||
.set_default(0)
|
.set_default(0)
|
||||||
.set_lower_bound(-1)
|
.set_lower_bound(-1)
|
||||||
.describe("Number of GPUs to use for multi-gpu algorithms.");
|
.describe("Number of GPUs to use for multi-gpu algorithms.");
|
||||||
|
DMLC_DECLARE_FIELD(booster)
|
||||||
|
.set_default("gbtree")
|
||||||
|
.describe("Gradient booster used for training.");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@ -37,6 +37,7 @@ GPUSet GPUSet::All(GpuIdType gpu_id, GpuIdType n_gpus, int32_t n_rows) {
|
|||||||
CHECK_GE(n_gpus, -1) << "n_gpus must be >= -1.";
|
CHECK_GE(n_gpus, -1) << "n_gpus must be >= -1.";
|
||||||
|
|
||||||
GpuIdType const n_devices_visible = AllVisible().Size();
|
GpuIdType const n_devices_visible = AllVisible().Size();
|
||||||
|
CHECK_LE(n_gpus, n_devices_visible);
|
||||||
if (n_devices_visible == 0 || n_gpus == 0 || n_rows == 0) {
|
if (n_devices_visible == 0 || n_gpus == 0 || n_rows == 0) {
|
||||||
LOG(DEBUG) << "Runing on CPU.";
|
LOG(DEBUG) << "Runing on CPU.";
|
||||||
return Empty();
|
return Empty();
|
||||||
|
|||||||
@ -360,7 +360,9 @@ struct HostDeviceVectorImpl {
|
|||||||
|
|
||||||
void Shard(const GPUDistribution& distribution) {
|
void Shard(const GPUDistribution& distribution) {
|
||||||
if (distribution_ == distribution) { return; }
|
if (distribution_ == distribution) { return; }
|
||||||
CHECK(distribution_.IsEmpty());
|
CHECK(distribution_.IsEmpty())
|
||||||
|
<< "This: " << distribution_.Devices().Size() << ", "
|
||||||
|
<< "Others: " << distribution.Devices().Size();
|
||||||
distribution_ = distribution;
|
distribution_ = distribution;
|
||||||
InitShards();
|
InitShards();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -180,6 +180,14 @@ class GBLinear : public GradientBooster {
|
|||||||
return model_.DumpModel(fmap, with_stats, format);
|
return model_.DumpModel(fmap, with_stats, format);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool UseGPU() const override {
|
||||||
|
if (param_.updater == "gpu_coord_descent") {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void PredictBatchInternal(DMatrix *p_fmat,
|
void PredictBatchInternal(DMatrix *p_fmat,
|
||||||
std::vector<bst_float> *out_preds) {
|
std::vector<bst_float> *out_preds) {
|
||||||
@ -257,6 +265,7 @@ class GBLinear : public GradientBooster {
|
|||||||
}
|
}
|
||||||
preds[gid] = psum;
|
preds[gid] = psum;
|
||||||
}
|
}
|
||||||
|
|
||||||
// biase margin score
|
// biase margin score
|
||||||
bst_float base_margin_;
|
bst_float base_margin_;
|
||||||
// model field
|
// model field
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/*!
|
/*!
|
||||||
* Copyright 2014 by Contributors
|
* Copyright 2014-2019 by Contributors
|
||||||
* \file gbtree.cc
|
* \file gbtree.cc
|
||||||
* \brief gradient boosted tree implementation.
|
* \brief gradient boosted tree implementation.
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
@ -11,167 +11,197 @@
|
|||||||
#include <xgboost/gbm.h>
|
#include <xgboost/gbm.h>
|
||||||
#include <xgboost/predictor.h>
|
#include <xgboost/predictor.h>
|
||||||
#include <xgboost/tree_updater.h>
|
#include <xgboost/tree_updater.h>
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
#include "../common/common.h"
|
#include "../common/common.h"
|
||||||
#include "../common/host_device_vector.h"
|
#include "../common/host_device_vector.h"
|
||||||
#include "../common/random.h"
|
#include "../common/random.h"
|
||||||
|
#include "gbtree.h"
|
||||||
#include "gbtree_model.h"
|
#include "gbtree_model.h"
|
||||||
#include "../common/timer.h"
|
#include "../common/timer.h"
|
||||||
|
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace gbm {
|
namespace gbm {
|
||||||
|
|
||||||
DMLC_REGISTRY_FILE_TAG(gbtree);
|
DMLC_REGISTRY_FILE_TAG(gbtree);
|
||||||
|
|
||||||
// boosting process types
|
void GBTree::Configure(const std::vector<std::pair<std::string, std::string> >& cfg) {
|
||||||
enum TreeProcessType {
|
|
||||||
kDefault,
|
|
||||||
kUpdate
|
|
||||||
};
|
|
||||||
|
|
||||||
/*! \brief training parameters */
|
|
||||||
struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
|
|
||||||
/*!
|
|
||||||
* \brief number of parallel trees constructed each iteration
|
|
||||||
* use this option to support boosted random forest
|
|
||||||
*/
|
|
||||||
int num_parallel_tree;
|
|
||||||
/*! \brief tree updater sequence */
|
|
||||||
std::string updater_seq;
|
|
||||||
/*! \brief type of boosting process to run */
|
|
||||||
int process_type;
|
|
||||||
std::string predictor;
|
|
||||||
// declare parameters
|
|
||||||
DMLC_DECLARE_PARAMETER(GBTreeTrainParam) {
|
|
||||||
DMLC_DECLARE_FIELD(num_parallel_tree)
|
|
||||||
.set_default(1)
|
|
||||||
.set_lower_bound(1)
|
|
||||||
.describe("Number of parallel trees constructed during each iteration."\
|
|
||||||
" This option is used to support boosted random forest.");
|
|
||||||
DMLC_DECLARE_FIELD(updater_seq)
|
|
||||||
.set_default("grow_colmaker,prune")
|
|
||||||
.describe("Tree updater sequence.");
|
|
||||||
DMLC_DECLARE_FIELD(process_type)
|
|
||||||
.set_default(kDefault)
|
|
||||||
.add_enum("default", kDefault)
|
|
||||||
.add_enum("update", kUpdate)
|
|
||||||
.describe("Whether to run the normal boosting process that creates new trees,"\
|
|
||||||
" or to update the trees in an existing model.");
|
|
||||||
// add alias
|
|
||||||
DMLC_DECLARE_ALIAS(updater_seq, updater);
|
|
||||||
DMLC_DECLARE_FIELD(predictor)
|
|
||||||
.set_default("cpu_predictor")
|
|
||||||
.describe("Predictor algorithm type");
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/*! \brief training parameters */
|
|
||||||
struct DartTrainParam : public dmlc::Parameter<DartTrainParam> {
|
|
||||||
/*! \brief type of sampling algorithm */
|
|
||||||
int sample_type;
|
|
||||||
/*! \brief type of normalization algorithm */
|
|
||||||
int normalize_type;
|
|
||||||
/*! \brief fraction of trees to drop during the dropout */
|
|
||||||
float rate_drop;
|
|
||||||
/*! \brief whether at least one tree should always be dropped during the dropout */
|
|
||||||
bool one_drop;
|
|
||||||
/*! \brief probability of skipping the dropout during an iteration */
|
|
||||||
float skip_drop;
|
|
||||||
/*! \brief learning step size for a time */
|
|
||||||
float learning_rate;
|
|
||||||
// declare parameters
|
|
||||||
DMLC_DECLARE_PARAMETER(DartTrainParam) {
|
|
||||||
DMLC_DECLARE_FIELD(sample_type)
|
|
||||||
.set_default(0)
|
|
||||||
.add_enum("uniform", 0)
|
|
||||||
.add_enum("weighted", 1)
|
|
||||||
.describe("Different types of sampling algorithm.");
|
|
||||||
DMLC_DECLARE_FIELD(normalize_type)
|
|
||||||
.set_default(0)
|
|
||||||
.add_enum("tree", 0)
|
|
||||||
.add_enum("forest", 1)
|
|
||||||
.describe("Different types of normalization algorithm.");
|
|
||||||
DMLC_DECLARE_FIELD(rate_drop)
|
|
||||||
.set_range(0.0f, 1.0f)
|
|
||||||
.set_default(0.0f)
|
|
||||||
.describe("Fraction of trees to drop during the dropout.");
|
|
||||||
DMLC_DECLARE_FIELD(one_drop)
|
|
||||||
.set_default(false)
|
|
||||||
.describe("Whether at least one tree should always be dropped during the dropout.");
|
|
||||||
DMLC_DECLARE_FIELD(skip_drop)
|
|
||||||
.set_range(0.0f, 1.0f)
|
|
||||||
.set_default(0.0f)
|
|
||||||
.describe("Probability of skipping the dropout during a boosting iteration.");
|
|
||||||
DMLC_DECLARE_FIELD(learning_rate)
|
|
||||||
.set_lower_bound(0.0f)
|
|
||||||
.set_default(0.3f)
|
|
||||||
.describe("Learning rate(step size) of update.");
|
|
||||||
DMLC_DECLARE_ALIAS(learning_rate, eta);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
// cache entry
|
|
||||||
struct CacheEntry {
|
|
||||||
std::shared_ptr<DMatrix> data;
|
|
||||||
std::vector<bst_float> predictions;
|
|
||||||
};
|
|
||||||
|
|
||||||
// gradient boosted trees
|
|
||||||
class GBTree : public GradientBooster {
|
|
||||||
public:
|
|
||||||
explicit GBTree(bst_float base_margin) : model_(base_margin) {}
|
|
||||||
|
|
||||||
void InitCache(const std::vector<std::shared_ptr<DMatrix> > &cache) {
|
|
||||||
cache_ = cache;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
|
|
||||||
this->cfg_ = cfg;
|
this->cfg_ = cfg;
|
||||||
model_.Configure(cfg);
|
|
||||||
// initialize the updaters only when needed.
|
|
||||||
std::string updater_seq = tparam_.updater_seq;
|
|
||||||
tparam_.InitAllowUnknown(cfg);
|
tparam_.InitAllowUnknown(cfg);
|
||||||
if (updater_seq != tparam_.updater_seq) updaters_.clear();
|
std::string updater_seq = tparam_.updater_seq;
|
||||||
for (const auto& up : updaters_) {
|
|
||||||
up->Init(cfg);
|
ConfigureUpdaters({cfg.begin(), cfg.cend()});
|
||||||
}
|
|
||||||
|
model_.Configure(cfg);
|
||||||
|
|
||||||
// for the 'update' process_type, move trees into trees_to_update
|
// for the 'update' process_type, move trees into trees_to_update
|
||||||
if (tparam_.process_type == kUpdate) {
|
if (tparam_.process_type == TreeProcessType::kUpdate) {
|
||||||
model_.InitTreesToUpdate();
|
model_.InitTreesToUpdate();
|
||||||
}
|
}
|
||||||
|
|
||||||
// configure predictor
|
// configure predictor
|
||||||
predictor_ = std::unique_ptr<Predictor>(Predictor::Create(tparam_.predictor, learner_param_));
|
predictor_ = std::unique_ptr<Predictor>(
|
||||||
|
Predictor::Create(tparam_.predictor, this->learner_param_));
|
||||||
predictor_->Init(cfg, cache_);
|
predictor_->Init(cfg, cache_);
|
||||||
monitor_.Init("GBTree");
|
monitor_.Init("GBTree");
|
||||||
}
|
}
|
||||||
|
|
||||||
void Load(dmlc::Stream* fi) override {
|
void GBTree::PerformTreeMethodHeuristic(DMatrix* p_train,
|
||||||
model_.Load(fi);
|
std::map<std::string, std::string> cfg) {
|
||||||
|
if (cfg.find("updater") != cfg.cend()) {
|
||||||
this->cfg_.clear();
|
// This method is disabled when `updater` parameter is explicitly
|
||||||
this->cfg_.emplace_back(std::string("num_feature"),
|
// set, since only experts are expected to do so.
|
||||||
common::ToString(model_.param.num_feature));
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Save(dmlc::Stream* fo) const override {
|
const TreeMethod current_tree_method = tparam_.tree_method;
|
||||||
model_.Save(fo);
|
|
||||||
|
if (rabit::IsDistributed()) {
|
||||||
|
switch (current_tree_method) {
|
||||||
|
case TreeMethod::kAuto:
|
||||||
|
LOG(WARNING) <<
|
||||||
|
"Tree method is automatically selected to be 'approx' "
|
||||||
|
"for distributed training.";
|
||||||
|
break;
|
||||||
|
case TreeMethod::kApprox:
|
||||||
|
case TreeMethod::kHist:
|
||||||
|
// things are okay, do nothing
|
||||||
|
break;
|
||||||
|
case TreeMethod::kExact:
|
||||||
|
LOG(WARNING) << "Tree method was set to be "
|
||||||
|
<< "exact"
|
||||||
|
<< "', but only 'approx' and 'hist' is available for distributed "
|
||||||
|
"training. The `tree_method` parameter is now being "
|
||||||
|
"changed to 'approx'";
|
||||||
|
break;
|
||||||
|
case TreeMethod::kGPUExact:
|
||||||
|
// FIXME(trivialfis): Remove this line once GPU Exact is removed.
|
||||||
|
LOG(FATAL) << "Distributed training is not available with GPU Exact algorithm.";
|
||||||
|
break;
|
||||||
|
case TreeMethod::kGPUHist:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG(FATAL) << "Unknown tree_method ("
|
||||||
|
<< static_cast<int>(current_tree_method) << ") detected";
|
||||||
|
}
|
||||||
|
if (current_tree_method != TreeMethod::kHist) {
|
||||||
|
LOG(WARNING) << "Tree method is automatically selected to be 'approx'"
|
||||||
|
" for distributed training.";
|
||||||
|
tparam_.tree_method = TreeMethod::kApprox;
|
||||||
|
} else {
|
||||||
|
LOG(WARNING) << "Tree method is specified to be 'hist'"
|
||||||
|
" for distributed training.";
|
||||||
|
tparam_.tree_method = TreeMethod::kHist;
|
||||||
|
}
|
||||||
|
} else if (!p_train->SingleColBlock()) {
|
||||||
|
/* Some tree methods are not available for external-memory DMatrix */
|
||||||
|
switch (current_tree_method) {
|
||||||
|
case TreeMethod::kAuto:
|
||||||
|
LOG(WARNING) << "Tree method is automatically set to 'approx' "
|
||||||
|
"since external-memory data matrix is used.";
|
||||||
|
break;
|
||||||
|
case TreeMethod::kApprox:
|
||||||
|
// things are okay, do nothing
|
||||||
|
break;
|
||||||
|
case TreeMethod::kExact:
|
||||||
|
LOG(WARNING) << "Tree method was set to be 'exact', "
|
||||||
|
"but currently we are only able to proceed with "
|
||||||
|
"approximate algorithm ('approx') because external-"
|
||||||
|
"memory data matrix is used.";
|
||||||
|
break;
|
||||||
|
case TreeMethod::kHist:
|
||||||
|
// things are okay, do nothing
|
||||||
|
break;
|
||||||
|
case TreeMethod::kGPUExact:
|
||||||
|
case TreeMethod::kGPUHist:
|
||||||
|
LOG(FATAL)
|
||||||
|
<< "External-memory data matrix is not available with GPU algorithms";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG(FATAL) << "Unknown tree_method ("
|
||||||
|
<< static_cast<int>(current_tree_method) << ") detected";
|
||||||
|
}
|
||||||
|
tparam_.tree_method = TreeMethod::kApprox;
|
||||||
|
} else if (p_train->Info().num_row_ >= (4UL << 20UL)
|
||||||
|
&& current_tree_method == TreeMethod::kAuto) {
|
||||||
|
/* Choose tree_method='approx' automatically for large data matrix */
|
||||||
|
LOG(WARNING) << "Tree method is automatically selected to be "
|
||||||
|
"'approx' for faster speed. To use old behavior "
|
||||||
|
"(exact greedy algorithm on single machine), "
|
||||||
|
"set tree_method to 'exact'.";
|
||||||
|
tparam_.tree_method = TreeMethod::kApprox;
|
||||||
|
}
|
||||||
|
LOG(DEBUG) << "Using predictor: " << tparam_.predictor;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AllowLazyCheckPoint() const override {
|
void GBTree::ConfigureUpdaters(const std::map<std::string, std::string>& cfg) {
|
||||||
return model_.param.num_output_group == 1 ||
|
// `updater` parameter was manually specified
|
||||||
tparam_.updater_seq.find("distcol") != std::string::npos;
|
if (cfg.find("updater") != cfg.cend()) {
|
||||||
|
LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
|
||||||
|
"parameter. The `tree_method` parameter will be ignored. "
|
||||||
|
"Incorrect sequence of updaters will produce undefined "
|
||||||
|
"behavior. For common uses, we recommend using "
|
||||||
|
"`tree_method` parameter instead.";
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DoBoost(DMatrix* p_fmat,
|
/* Choose updaters according to tree_method parameters */
|
||||||
|
switch (tparam_.tree_method) {
|
||||||
|
case TreeMethod::kAuto:
|
||||||
|
// Use heuristic to choose between 'exact' and 'approx'
|
||||||
|
// This choice is deferred to PerformTreeMethodHeuristic().
|
||||||
|
break;
|
||||||
|
case TreeMethod::kApprox:
|
||||||
|
tparam_.updater_seq = "grow_histmaker,prune";
|
||||||
|
break;
|
||||||
|
case TreeMethod::kExact:
|
||||||
|
tparam_.updater_seq = "grow_colmaker,prune";
|
||||||
|
break;
|
||||||
|
case TreeMethod::kHist:
|
||||||
|
LOG(INFO) <<
|
||||||
|
"Tree method is selected to be 'hist', which uses a "
|
||||||
|
"single updater grow_quantile_histmaker.";
|
||||||
|
tparam_.updater_seq = "grow_quantile_histmaker";
|
||||||
|
break;
|
||||||
|
case TreeMethod::kGPUExact:
|
||||||
|
this->AssertGPUSupport();
|
||||||
|
tparam_.updater_seq = "grow_gpu,prune";
|
||||||
|
if (cfg.find("predictor") == cfg.cend()) {
|
||||||
|
tparam_.predictor = "gpu_predictor";
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case TreeMethod::kGPUHist:
|
||||||
|
this->AssertGPUSupport();
|
||||||
|
tparam_.updater_seq = "grow_gpu_hist";
|
||||||
|
if (cfg.find("predictor") == cfg.cend()) {
|
||||||
|
tparam_.predictor = "gpu_predictor";
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG(FATAL) << "Unknown tree_method ("
|
||||||
|
<< static_cast<int>(tparam_.tree_method) << ") detected";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GBTree::DoBoost(DMatrix* p_fmat,
|
||||||
HostDeviceVector<GradientPair>* in_gpair,
|
HostDeviceVector<GradientPair>* in_gpair,
|
||||||
ObjFunction* obj) override {
|
ObjFunction* obj) {
|
||||||
|
std::string updater_seq = tparam_.updater_seq;
|
||||||
|
this->PerformTreeMethodHeuristic(p_fmat, {this->cfg_.begin(), this->cfg_.end()});
|
||||||
|
this->ConfigureUpdaters({this->cfg_.begin(), this->cfg_.end()});
|
||||||
|
LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
|
||||||
|
// initialize the updaters only when needed.
|
||||||
|
if (updater_seq != tparam_.updater_seq) {
|
||||||
|
this->updaters_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
|
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
|
||||||
const int ngroup = model_.param.num_output_group;
|
const int ngroup = model_.param.num_output_group;
|
||||||
monitor_.Start("BoostNewTrees");
|
monitor_.Start("BoostNewTrees");
|
||||||
@ -205,49 +235,7 @@ class GBTree : public GradientBooster {
|
|||||||
monitor_.Stop("CommitModel");
|
monitor_.Stop("CommitModel");
|
||||||
}
|
}
|
||||||
|
|
||||||
void PredictBatch(DMatrix* p_fmat,
|
void GBTree::InitUpdater() {
|
||||||
HostDeviceVector<bst_float>* out_preds,
|
|
||||||
unsigned ntree_limit) override {
|
|
||||||
predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
|
|
||||||
}
|
|
||||||
|
|
||||||
void PredictInstance(const SparsePage::Inst& inst,
|
|
||||||
std::vector<bst_float>* out_preds,
|
|
||||||
unsigned ntree_limit,
|
|
||||||
unsigned root_index) override {
|
|
||||||
predictor_->PredictInstance(inst, out_preds, model_,
|
|
||||||
ntree_limit, root_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
void PredictLeaf(DMatrix* p_fmat,
|
|
||||||
std::vector<bst_float>* out_preds,
|
|
||||||
unsigned ntree_limit) override {
|
|
||||||
predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
|
|
||||||
}
|
|
||||||
|
|
||||||
void PredictContribution(DMatrix* p_fmat,
|
|
||||||
std::vector<bst_float>* out_contribs,
|
|
||||||
unsigned ntree_limit, bool approximate, int condition,
|
|
||||||
unsigned condition_feature) override {
|
|
||||||
predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
|
|
||||||
}
|
|
||||||
|
|
||||||
void PredictInteractionContributions(DMatrix* p_fmat,
|
|
||||||
std::vector<bst_float>* out_contribs,
|
|
||||||
unsigned ntree_limit, bool approximate) override {
|
|
||||||
predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
|
|
||||||
ntree_limit, approximate);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::string> DumpModel(const FeatureMap& fmap,
|
|
||||||
bool with_stats,
|
|
||||||
std::string format) const override {
|
|
||||||
return model_.DumpModel(fmap, with_stats, format);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
// initialize updater before using them
|
|
||||||
inline void InitUpdater() {
|
|
||||||
if (updaters_.size() != 0) return;
|
if (updaters_.size() != 0) return;
|
||||||
std::string tval = tparam_.updater_seq;
|
std::string tval = tparam_.updater_seq;
|
||||||
std::vector<std::string> ups = common::Split(tval, ',');
|
std::vector<std::string> ups = common::Split(tval, ',');
|
||||||
@ -258,8 +246,7 @@ class GBTree : public GradientBooster {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// do group specific group
|
void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
|
||||||
inline void BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
|
|
||||||
DMatrix *p_fmat,
|
DMatrix *p_fmat,
|
||||||
int bst_group,
|
int bst_group,
|
||||||
std::vector<std::unique_ptr<RegTree> >* ret) {
|
std::vector<std::unique_ptr<RegTree> >* ret) {
|
||||||
@ -268,13 +255,13 @@ class GBTree : public GradientBooster {
|
|||||||
ret->clear();
|
ret->clear();
|
||||||
// create the trees
|
// create the trees
|
||||||
for (int i = 0; i < tparam_.num_parallel_tree; ++i) {
|
for (int i = 0; i < tparam_.num_parallel_tree; ++i) {
|
||||||
if (tparam_.process_type == kDefault) {
|
if (tparam_.process_type == TreeProcessType::kDefault) {
|
||||||
// create new tree
|
// create new tree
|
||||||
std::unique_ptr<RegTree> ptr(new RegTree());
|
std::unique_ptr<RegTree> ptr(new RegTree());
|
||||||
ptr->param.InitAllowUnknown(this->cfg_);
|
ptr->param.InitAllowUnknown(this->cfg_);
|
||||||
new_trees.push_back(ptr.get());
|
new_trees.push_back(ptr.get());
|
||||||
ret->push_back(std::move(ptr));
|
ret->push_back(std::move(ptr));
|
||||||
} else if (tparam_.process_type == kUpdate) {
|
} else if (tparam_.process_type == TreeProcessType::kUpdate) {
|
||||||
CHECK_LT(model_.trees.size(), model_.trees_to_update.size());
|
CHECK_LT(model_.trees.size(), model_.trees_to_update.size());
|
||||||
// move an existing tree from trees_to_update
|
// move an existing tree from trees_to_update
|
||||||
auto t = std::move(model_.trees_to_update[model_.trees.size() +
|
auto t = std::move(model_.trees_to_update[model_.trees.size() +
|
||||||
@ -289,9 +276,7 @@ class GBTree : public GradientBooster {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// commit new trees all at once
|
void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
|
||||||
virtual void
|
|
||||||
CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
|
|
||||||
int num_new_trees = 0;
|
int num_new_trees = 0;
|
||||||
for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
|
for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
|
||||||
num_new_trees += new_trees[gid].size();
|
num_new_trees += new_trees[gid].size();
|
||||||
@ -300,20 +285,6 @@ class GBTree : public GradientBooster {
|
|||||||
predictor_->UpdatePredictionCache(model_, &updaters_, num_new_trees);
|
predictor_->UpdatePredictionCache(model_, &updaters_, num_new_trees);
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- data structure ---
|
|
||||||
GBTreeModel model_;
|
|
||||||
// training parameter
|
|
||||||
GBTreeTrainParam tparam_;
|
|
||||||
// ----training fields----
|
|
||||||
// configurations for tree
|
|
||||||
std::vector<std::pair<std::string, std::string> > cfg_;
|
|
||||||
// the updaters that can be applied to each of tree
|
|
||||||
std::vector<std::unique_ptr<TreeUpdater>> updaters_;
|
|
||||||
// Cached matrices
|
|
||||||
std::vector<std::shared_ptr<DMatrix>> cache_;
|
|
||||||
std::unique_ptr<Predictor> predictor_;
|
|
||||||
common::Monitor monitor_;
|
|
||||||
};
|
|
||||||
|
|
||||||
// dart
|
// dart
|
||||||
class Dart : public GBTree {
|
class Dart : public GBTree {
|
||||||
@ -372,6 +343,10 @@ class Dart : public GBTree {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool UseGPU() const override {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
friend class GBTree;
|
friend class GBTree;
|
||||||
// internal prediction loop
|
// internal prediction loop
|
||||||
|
|||||||
269
src/gbm/gbtree.h
Normal file
269
src/gbm/gbtree.h
Normal file
@ -0,0 +1,269 @@
|
|||||||
|
/*!
|
||||||
|
* Copyright 2014-2019 by Contributors
|
||||||
|
* \file gbtree.cc
|
||||||
|
* \brief gradient boosted tree implementation.
|
||||||
|
* \author Tianqi Chen
|
||||||
|
*/
|
||||||
|
#ifndef XGBOOST_GBM_GBTREE_H_
|
||||||
|
#define XGBOOST_GBM_GBTREE_H_
|
||||||
|
|
||||||
|
#include <dmlc/omp.h>
|
||||||
|
#include <dmlc/parameter.h>
|
||||||
|
|
||||||
|
#include <xgboost/logging.h>
|
||||||
|
#include <xgboost/gbm.h>
|
||||||
|
#include <xgboost/predictor.h>
|
||||||
|
#include <xgboost/tree_updater.h>
|
||||||
|
#include <xgboost/enum_class_param.h>
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <utility>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "gbtree_model.h"
|
||||||
|
#include "../common/common.h"
|
||||||
|
#include "../common/host_device_vector.h"
|
||||||
|
#include "../common/timer.h"
|
||||||
|
|
||||||
|
namespace xgboost {
|
||||||
|
enum class TreeMethod : int {
|
||||||
|
kAuto = 0, kApprox = 1, kExact = 2, kHist = 3,
|
||||||
|
kGPUExact = 4, kGPUHist = 5
|
||||||
|
};
|
||||||
|
|
||||||
|
// boosting process types
|
||||||
|
enum class TreeProcessType : int {
|
||||||
|
kDefault = 0,
|
||||||
|
kUpdate = 1
|
||||||
|
};
|
||||||
|
} // namespace xgboost
|
||||||
|
|
||||||
|
DECLARE_FIELD_ENUM_CLASS(xgboost::TreeMethod);
|
||||||
|
DECLARE_FIELD_ENUM_CLASS(xgboost::TreeProcessType);
|
||||||
|
|
||||||
|
namespace xgboost {
|
||||||
|
namespace gbm {
|
||||||
|
|
||||||
|
/*! \brief training parameters */
|
||||||
|
struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
|
||||||
|
/*!
|
||||||
|
* \brief number of parallel trees constructed each iteration
|
||||||
|
* use this option to support boosted random forest
|
||||||
|
*/
|
||||||
|
int num_parallel_tree;
|
||||||
|
/*! \brief tree updater sequence */
|
||||||
|
std::string updater_seq;
|
||||||
|
/*! \brief type of boosting process to run */
|
||||||
|
TreeProcessType process_type;
|
||||||
|
// predictor name
|
||||||
|
std::string predictor;
|
||||||
|
// tree construction method
|
||||||
|
TreeMethod tree_method;
|
||||||
|
// declare parameters
|
||||||
|
DMLC_DECLARE_PARAMETER(GBTreeTrainParam) {
|
||||||
|
DMLC_DECLARE_FIELD(num_parallel_tree)
|
||||||
|
.set_default(1)
|
||||||
|
.set_lower_bound(1)
|
||||||
|
.describe("Number of parallel trees constructed during each iteration."\
|
||||||
|
" This option is used to support boosted random forest.");
|
||||||
|
DMLC_DECLARE_FIELD(updater_seq)
|
||||||
|
.set_default("grow_colmaker,prune")
|
||||||
|
.describe("Tree updater sequence.");
|
||||||
|
DMLC_DECLARE_FIELD(process_type)
|
||||||
|
.set_default(TreeProcessType::kDefault)
|
||||||
|
.add_enum("default", TreeProcessType::kDefault)
|
||||||
|
.add_enum("update", TreeProcessType::kUpdate)
|
||||||
|
.describe("Whether to run the normal boosting process that creates new trees,"\
|
||||||
|
" or to update the trees in an existing model.");
|
||||||
|
// add alias
|
||||||
|
DMLC_DECLARE_ALIAS(updater_seq, updater);
|
||||||
|
DMLC_DECLARE_FIELD(predictor)
|
||||||
|
.set_default("cpu_predictor")
|
||||||
|
.describe("Predictor algorithm type");
|
||||||
|
DMLC_DECLARE_FIELD(tree_method)
|
||||||
|
.set_default(TreeMethod::kAuto)
|
||||||
|
.add_enum("auto", TreeMethod::kAuto)
|
||||||
|
.add_enum("approx", TreeMethod::kApprox)
|
||||||
|
.add_enum("exact", TreeMethod::kExact)
|
||||||
|
.add_enum("hist", TreeMethod::kHist)
|
||||||
|
.add_enum("gpu_exact", TreeMethod::kGPUExact)
|
||||||
|
.add_enum("gpu_hist", TreeMethod::kGPUHist)
|
||||||
|
.describe("Choice of tree construction method.");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/*! \brief training parameters */
|
||||||
|
struct DartTrainParam : public dmlc::Parameter<DartTrainParam> {
|
||||||
|
/*! \brief type of sampling algorithm */
|
||||||
|
int sample_type;
|
||||||
|
/*! \brief type of normalization algorithm */
|
||||||
|
int normalize_type;
|
||||||
|
/*! \brief fraction of trees to drop during the dropout */
|
||||||
|
float rate_drop;
|
||||||
|
/*! \brief whether at least one tree should always be dropped during the dropout */
|
||||||
|
bool one_drop;
|
||||||
|
/*! \brief probability of skipping the dropout during an iteration */
|
||||||
|
float skip_drop;
|
||||||
|
/*! \brief learning step size for a time */
|
||||||
|
float learning_rate;
|
||||||
|
// declare parameters
|
||||||
|
DMLC_DECLARE_PARAMETER(DartTrainParam) {
|
||||||
|
DMLC_DECLARE_FIELD(sample_type)
|
||||||
|
.set_default(0)
|
||||||
|
.add_enum("uniform", 0)
|
||||||
|
.add_enum("weighted", 1)
|
||||||
|
.describe("Different types of sampling algorithm.");
|
||||||
|
DMLC_DECLARE_FIELD(normalize_type)
|
||||||
|
.set_default(0)
|
||||||
|
.add_enum("tree", 0)
|
||||||
|
.add_enum("forest", 1)
|
||||||
|
.describe("Different types of normalization algorithm.");
|
||||||
|
DMLC_DECLARE_FIELD(rate_drop)
|
||||||
|
.set_range(0.0f, 1.0f)
|
||||||
|
.set_default(0.0f)
|
||||||
|
.describe("Fraction of trees to drop during the dropout.");
|
||||||
|
DMLC_DECLARE_FIELD(one_drop)
|
||||||
|
.set_default(false)
|
||||||
|
.describe("Whether at least one tree should always be dropped during the dropout.");
|
||||||
|
DMLC_DECLARE_FIELD(skip_drop)
|
||||||
|
.set_range(0.0f, 1.0f)
|
||||||
|
.set_default(0.0f)
|
||||||
|
.describe("Probability of skipping the dropout during a boosting iteration.");
|
||||||
|
DMLC_DECLARE_FIELD(learning_rate)
|
||||||
|
.set_lower_bound(0.0f)
|
||||||
|
.set_default(0.3f)
|
||||||
|
.describe("Learning rate(step size) of update.");
|
||||||
|
DMLC_DECLARE_ALIAS(learning_rate, eta);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// gradient boosted trees
|
||||||
|
class GBTree : public GradientBooster {
|
||||||
|
public:
|
||||||
|
explicit GBTree(bst_float base_margin) : model_(base_margin) {}
|
||||||
|
|
||||||
|
void InitCache(const std::vector<std::shared_ptr<DMatrix> > &cache) {
|
||||||
|
cache_ = cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void AssertGPUSupport() {
|
||||||
|
#ifndef XGBOOST_USE_CUDA
|
||||||
|
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
|
||||||
|
#endif // XGBOOST_USE_CUDA
|
||||||
|
}
|
||||||
|
|
||||||
|
void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override;
|
||||||
|
// Revise `tree_method` and `updater` parameters after seeing the training
|
||||||
|
// data matrix
|
||||||
|
void PerformTreeMethodHeuristic(DMatrix* p_train,
|
||||||
|
std::map<std::string, std::string> cfg);
|
||||||
|
/*! \brief Map `tree_method` parameter to `updater` parameter */
|
||||||
|
void ConfigureUpdaters(const std::map<std::string, std::string>& cfg);
|
||||||
|
/*! \brief Carry out one iteration of boosting */
|
||||||
|
void DoBoost(DMatrix* p_fmat,
|
||||||
|
HostDeviceVector<GradientPair>* in_gpair,
|
||||||
|
ObjFunction* obj) override;
|
||||||
|
|
||||||
|
bool UseGPU() const override {
|
||||||
|
return
|
||||||
|
tparam_.predictor == "gpu_predictor" ||
|
||||||
|
tparam_.tree_method == TreeMethod::kGPUHist ||
|
||||||
|
tparam_.tree_method == TreeMethod::kGPUExact;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Load(dmlc::Stream* fi) override {
|
||||||
|
model_.Load(fi);
|
||||||
|
|
||||||
|
this->cfg_.clear();
|
||||||
|
this->cfg_.emplace_back(std::string("num_feature"),
|
||||||
|
common::ToString(model_.param.num_feature));
|
||||||
|
}
|
||||||
|
|
||||||
|
GBTreeTrainParam const& GetTrainParam() const {
|
||||||
|
return tparam_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Save(dmlc::Stream* fo) const override {
|
||||||
|
model_.Save(fo);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AllowLazyCheckPoint() const override {
|
||||||
|
return model_.param.num_output_group == 1 ||
|
||||||
|
tparam_.updater_seq.find("distcol") != std::string::npos;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PredictBatch(DMatrix* p_fmat,
|
||||||
|
HostDeviceVector<bst_float>* out_preds,
|
||||||
|
unsigned ntree_limit) override {
|
||||||
|
predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PredictInstance(const SparsePage::Inst& inst,
|
||||||
|
std::vector<bst_float>* out_preds,
|
||||||
|
unsigned ntree_limit,
|
||||||
|
unsigned root_index) override {
|
||||||
|
predictor_->PredictInstance(inst, out_preds, model_,
|
||||||
|
ntree_limit, root_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PredictLeaf(DMatrix* p_fmat,
|
||||||
|
std::vector<bst_float>* out_preds,
|
||||||
|
unsigned ntree_limit) override {
|
||||||
|
predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PredictContribution(DMatrix* p_fmat,
|
||||||
|
std::vector<bst_float>* out_contribs,
|
||||||
|
unsigned ntree_limit, bool approximate, int condition,
|
||||||
|
unsigned condition_feature) override {
|
||||||
|
predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PredictInteractionContributions(DMatrix* p_fmat,
|
||||||
|
std::vector<bst_float>* out_contribs,
|
||||||
|
unsigned ntree_limit, bool approximate) override {
|
||||||
|
predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
|
||||||
|
ntree_limit, approximate);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> DumpModel(const FeatureMap& fmap,
|
||||||
|
bool with_stats,
|
||||||
|
std::string format) const override {
|
||||||
|
return model_.DumpModel(fmap, with_stats, format);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
// initialize updater before using them
|
||||||
|
void InitUpdater();
|
||||||
|
|
||||||
|
// do group specific group
|
||||||
|
void BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
|
||||||
|
DMatrix *p_fmat,
|
||||||
|
int bst_group,
|
||||||
|
std::vector<std::unique_ptr<RegTree> >* ret);
|
||||||
|
|
||||||
|
// commit new trees all at once
|
||||||
|
virtual void CommitModel(
|
||||||
|
std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees);
|
||||||
|
|
||||||
|
// --- data structure ---
|
||||||
|
GBTreeModel model_;
|
||||||
|
// training parameter
|
||||||
|
GBTreeTrainParam tparam_;
|
||||||
|
// ----training fields----
|
||||||
|
// configurations for tree
|
||||||
|
std::vector<std::pair<std::string, std::string> > cfg_;
|
||||||
|
// the updaters that can be applied to each of tree
|
||||||
|
std::vector<std::unique_ptr<TreeUpdater>> updaters_;
|
||||||
|
// Cached matrices
|
||||||
|
std::vector<std::shared_ptr<DMatrix>> cache_;
|
||||||
|
std::unique_ptr<Predictor> predictor_;
|
||||||
|
common::Monitor monitor_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace gbm
|
||||||
|
} // namespace xgboost
|
||||||
|
|
||||||
|
#endif // XGBOOST_GBM_GBTREE_H_
|
||||||
201
src/learner.cc
201
src/learner.cc
@ -113,68 +113,6 @@ class LearnerImpl : public Learner {
|
|||||||
name_gbm_ = "gbtree";
|
name_gbm_ = "gbtree";
|
||||||
}
|
}
|
||||||
|
|
||||||
static void AssertGPUSupport() {
|
|
||||||
#ifndef XGBOOST_USE_CUDA
|
|
||||||
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
|
|
||||||
#endif // XGBOOST_USE_CUDA
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*! \brief Map `tree_method` parameter to `updater` parameter */
|
|
||||||
void ConfigureUpdaters() {
|
|
||||||
// This method is not applicable to non-tree learners
|
|
||||||
if (cfg_.find("booster") != cfg_.cend() &&
|
|
||||||
(cfg_.at("booster") != "gbtree" && cfg_.at("booster") != "dart")) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// `updater` parameter was manually specified
|
|
||||||
if (cfg_.count("updater") > 0) {
|
|
||||||
LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
|
|
||||||
"parameter. The `tree_method` parameter will be ignored. "
|
|
||||||
"Incorrect sequence of updaters will produce undefined "
|
|
||||||
"behavior. For common uses, we recommend using "
|
|
||||||
"`tree_method` parameter instead.";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Choose updaters according to tree_method parameters */
|
|
||||||
switch (tparam_.tree_method) {
|
|
||||||
case TreeMethod::kAuto:
|
|
||||||
// Use heuristic to choose between 'exact' and 'approx'
|
|
||||||
// This choice is deferred to PerformTreeMethodHeuristic().
|
|
||||||
break;
|
|
||||||
case TreeMethod::kApprox:
|
|
||||||
cfg_["updater"] = "grow_histmaker,prune";
|
|
||||||
break;
|
|
||||||
case TreeMethod::kExact:
|
|
||||||
cfg_["updater"] = "grow_colmaker,prune";
|
|
||||||
break;
|
|
||||||
case TreeMethod::kHist:
|
|
||||||
LOG(INFO) <<
|
|
||||||
"Tree method is selected to be 'hist', which uses a "
|
|
||||||
"single updater grow_quantile_histmaker.";
|
|
||||||
cfg_["updater"] = "grow_quantile_histmaker";
|
|
||||||
break;
|
|
||||||
case TreeMethod::kGPUExact:
|
|
||||||
this->AssertGPUSupport();
|
|
||||||
cfg_["updater"] = "grow_gpu,prune";
|
|
||||||
if (cfg_.count("predictor") == 0) {
|
|
||||||
cfg_["predictor"] = "gpu_predictor";
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case TreeMethod::kGPUHist:
|
|
||||||
this->AssertGPUSupport();
|
|
||||||
cfg_["updater"] = "grow_gpu_hist";
|
|
||||||
if (cfg_.count("predictor") == 0) {
|
|
||||||
cfg_["predictor"] = "gpu_predictor";
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
LOG(FATAL) << "Unknown tree_method ("
|
|
||||||
<< static_cast<int>(tparam_.tree_method) << ") detected";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void ConfigureObjective() {
|
void ConfigureObjective() {
|
||||||
if (cfg_.count("num_class") != 0) {
|
if (cfg_.count("num_class") != 0) {
|
||||||
cfg_["num_output_group"] = cfg_["num_class"];
|
cfg_["num_output_group"] = cfg_["num_class"];
|
||||||
@ -192,9 +130,6 @@ class LearnerImpl : public Learner {
|
|||||||
if (cfg_.count("objective") == 0) {
|
if (cfg_.count("objective") == 0) {
|
||||||
cfg_["objective"] = "reg:squarederror";
|
cfg_["objective"] = "reg:squarederror";
|
||||||
}
|
}
|
||||||
if (cfg_.count("booster") == 0) {
|
|
||||||
cfg_["booster"] = "gbtree";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Configuration before data is known.
|
// Configuration before data is known.
|
||||||
@ -231,13 +166,12 @@ class LearnerImpl : public Learner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ConfigureObjective();
|
ConfigureObjective();
|
||||||
ConfigureUpdaters();
|
name_gbm_ = tparam_.booster;
|
||||||
|
|
||||||
// FIXME(trivialfis): So which one should go first? Init or Configure?
|
// FIXME(trivialfis): So which one should go first? Init or Configure?
|
||||||
if (!this->ModelInitialized()) {
|
if (!this->ModelInitialized()) {
|
||||||
mparam_.InitAllowUnknown(args);
|
mparam_.InitAllowUnknown(args);
|
||||||
name_obj_ = cfg_["objective"];
|
name_obj_ = cfg_["objective"];
|
||||||
name_gbm_ = cfg_["booster"];
|
|
||||||
// set seed only before the model is initialized
|
// set seed only before the model is initialized
|
||||||
common::GlobalRandom().seed(tparam_.seed);
|
common::GlobalRandom().seed(tparam_.seed);
|
||||||
}
|
}
|
||||||
@ -263,20 +197,13 @@ class LearnerImpl : public Learner {
|
|||||||
// Configuration can only be done after data is known
|
// Configuration can only be done after data is known
|
||||||
void ConfigurationWithKnownData(DMatrix* dmat) {
|
void ConfigurationWithKnownData(DMatrix* dmat) {
|
||||||
CHECK(ModelInitialized())
|
CHECK(ModelInitialized())
|
||||||
<< "Always call InitModel or Load before any evaluation.";
|
<< " Internal Error: Always call InitModel or Load before any evaluation.";
|
||||||
this->ValidateDMatrix(dmat);
|
this->ValidateDMatrix(dmat);
|
||||||
// Configure GPU parameters
|
CHECK(this->gbm_) << " Internal: GBM is not set";
|
||||||
// FIXME(trivialfis): How do we know dependent parameters are all set?
|
if (this->gbm_->UseGPU() && cfg_.find("n_gpus") == cfg_.cend()) {
|
||||||
if (tparam_.tree_method == TreeMethod::kGPUHist ||
|
|
||||||
tparam_.tree_method == TreeMethod::kGPUExact ||
|
|
||||||
(cfg_.find("updater") != cfg_.cend() && cfg_.at("updater") == "gpu_coord_descent") ||
|
|
||||||
(cfg_.find("predictor") != cfg_.cend() &&
|
|
||||||
cfg_.at("predictor") == "gpu_predictor")) {
|
|
||||||
if (cfg_.find("n_gpus") == cfg_.cend()) {
|
|
||||||
tparam_.n_gpus = 1;
|
tparam_.n_gpus = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void Load(dmlc::Stream* fi) override {
|
void Load(dmlc::Stream* fi) override {
|
||||||
tparam_ = LearnerTrainParam();
|
tparam_ = LearnerTrainParam();
|
||||||
@ -443,13 +370,26 @@ class LearnerImpl : public Learner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CheckDataSplitMode() {
|
||||||
|
if (rabit::IsDistributed()) {
|
||||||
|
CHECK(tparam_.dsplit != DataSplitMode::kAuto)
|
||||||
|
<< "Precondition violated; dsplit cannot be 'auto' in distributed mode";
|
||||||
|
if (tparam_.dsplit == DataSplitMode::kCol) {
|
||||||
|
// 'distcol' updater hidden until it becomes functional again
|
||||||
|
// See discussion at https://github.com/dmlc/xgboost/issues/1832
|
||||||
|
LOG(FATAL) << "Column-wise data split is currently not supported.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void UpdateOneIter(int iter, DMatrix* train) override {
|
void UpdateOneIter(int iter, DMatrix* train) override {
|
||||||
monitor_.Start("UpdateOneIter");
|
monitor_.Start("UpdateOneIter");
|
||||||
|
|
||||||
if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
|
if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
|
||||||
common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
|
common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
|
||||||
}
|
}
|
||||||
this->PerformTreeMethodHeuristic(train);
|
// this->PerformTreeMethodHeuristic(train);
|
||||||
|
this->CheckDataSplitMode();
|
||||||
this->ConfigurationWithKnownData(train);
|
this->ConfigurationWithKnownData(train);
|
||||||
|
|
||||||
monitor_.Start("PredictRaw");
|
monitor_.Start("PredictRaw");
|
||||||
@ -468,7 +408,8 @@ class LearnerImpl : public Learner {
|
|||||||
if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
|
if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
|
||||||
common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
|
common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
|
||||||
}
|
}
|
||||||
this->PerformTreeMethodHeuristic(train);
|
this->CheckDataSplitMode();
|
||||||
|
// this->PerformTreeMethodHeuristic(train);
|
||||||
this->ConfigurationWithKnownData(train);
|
this->ConfigurationWithKnownData(train);
|
||||||
|
|
||||||
gbm_->DoBoost(train, in_gpair);
|
gbm_->DoBoost(train, in_gpair);
|
||||||
@ -573,108 +514,6 @@ class LearnerImpl : public Learner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// Revise `tree_method` and `updater` parameters after seeing the training
|
|
||||||
// data matrix
|
|
||||||
inline void PerformTreeMethodHeuristic(DMatrix* p_train) {
|
|
||||||
if (name_gbm_ != "gbtree" || cfg_.count("updater") > 0) {
|
|
||||||
// 1. This method is not applicable for non-tree learners
|
|
||||||
// 2. This method is disabled when `updater` parameter is explicitly
|
|
||||||
// set, since only experts are expected to do so.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const TreeMethod current_tree_method = tparam_.tree_method;
|
|
||||||
|
|
||||||
if (rabit::IsDistributed()) {
|
|
||||||
CHECK(tparam_.dsplit != DataSplitMode::kAuto)
|
|
||||||
<< "Precondition violated; dsplit cannot be 'auto' in distributed mode";
|
|
||||||
if (tparam_.dsplit == DataSplitMode::kCol) {
|
|
||||||
// 'distcol' updater hidden until it becomes functional again
|
|
||||||
// See discussion at https://github.com/dmlc/xgboost/issues/1832
|
|
||||||
LOG(FATAL) << "Column-wise data split is currently not supported.";
|
|
||||||
}
|
|
||||||
switch (current_tree_method) {
|
|
||||||
case TreeMethod::kAuto:
|
|
||||||
LOG(WARNING) <<
|
|
||||||
"Tree method is automatically selected to be 'approx' "
|
|
||||||
"for distributed training.";
|
|
||||||
break;
|
|
||||||
case TreeMethod::kApprox:
|
|
||||||
case TreeMethod::kHist:
|
|
||||||
// things are okay, do nothing
|
|
||||||
break;
|
|
||||||
case TreeMethod::kExact:
|
|
||||||
LOG(WARNING) << "Tree method was set to be "
|
|
||||||
<< "exact"
|
|
||||||
<< "', but only 'approx' and 'hist' is available for distributed "
|
|
||||||
"training. The `tree_method` parameter is now being "
|
|
||||||
"changed to 'approx'";
|
|
||||||
break;
|
|
||||||
case TreeMethod::kGPUExact:
|
|
||||||
case TreeMethod::kGPUHist:
|
|
||||||
LOG(FATAL) << "Distributed training is not available with GPU algoritms";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
LOG(FATAL) << "Unknown tree_method ("
|
|
||||||
<< static_cast<int>(current_tree_method) << ") detected";
|
|
||||||
}
|
|
||||||
if (current_tree_method != TreeMethod::kHist) {
|
|
||||||
LOG(WARNING) << "Tree method is automatically selected to be 'approx'"
|
|
||||||
" for distributed training.";
|
|
||||||
tparam_.tree_method = TreeMethod::kApprox;
|
|
||||||
} else {
|
|
||||||
LOG(WARNING) << "Tree method is specified to be 'hist'"
|
|
||||||
" for distributed training.";
|
|
||||||
tparam_.tree_method = TreeMethod::kHist;
|
|
||||||
}
|
|
||||||
} else if (!p_train->SingleColBlock()) {
|
|
||||||
/* Some tree methods are not available for external-memory DMatrix */
|
|
||||||
switch (current_tree_method) {
|
|
||||||
case TreeMethod::kAuto:
|
|
||||||
LOG(WARNING) << "Tree method is automatically set to 'approx' "
|
|
||||||
"since external-memory data matrix is used.";
|
|
||||||
break;
|
|
||||||
case TreeMethod::kApprox:
|
|
||||||
// things are okay, do nothing
|
|
||||||
break;
|
|
||||||
case TreeMethod::kExact:
|
|
||||||
LOG(WARNING) << "Tree method was set to be 'exact', "
|
|
||||||
"but currently we are only able to proceed with "
|
|
||||||
"approximate algorithm ('approx') because external-"
|
|
||||||
"memory data matrix is used.";
|
|
||||||
break;
|
|
||||||
case TreeMethod::kHist:
|
|
||||||
// things are okay, do nothing
|
|
||||||
break;
|
|
||||||
case TreeMethod::kGPUExact:
|
|
||||||
case TreeMethod::kGPUHist:
|
|
||||||
LOG(FATAL)
|
|
||||||
<< "External-memory data matrix is not available with GPU algorithms";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
LOG(FATAL) << "Unknown tree_method ("
|
|
||||||
<< static_cast<int>(current_tree_method) << ") detected";
|
|
||||||
}
|
|
||||||
tparam_.tree_method = TreeMethod::kApprox;
|
|
||||||
} else if (p_train->Info().num_row_ >= (4UL << 20UL)
|
|
||||||
&& current_tree_method == TreeMethod::kAuto) {
|
|
||||||
/* Choose tree_method='approx' automatically for large data matrix */
|
|
||||||
LOG(WARNING) << "Tree method is automatically selected to be "
|
|
||||||
"'approx' for faster speed. To use old behavior "
|
|
||||||
"(exact greedy algorithm on single machine), "
|
|
||||||
"set tree_method to 'exact'.";
|
|
||||||
tparam_.tree_method = TreeMethod::kApprox;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If tree_method was changed, re-configure updaters and gradient boosters */
|
|
||||||
if (tparam_.tree_method != current_tree_method) {
|
|
||||||
ConfigureUpdaters();
|
|
||||||
if (gbm_ != nullptr) {
|
|
||||||
gbm_->Configure(cfg_.begin(), cfg_.end());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// return whether model is already initialized.
|
// return whether model is already initialized.
|
||||||
inline bool ModelInitialized() const { return gbm_ != nullptr; }
|
inline bool ModelInitialized() const { return gbm_ != nullptr; }
|
||||||
// lazily initialize the model based on configuration if it haven't yet been initialized.
|
// lazily initialize the model based on configuration if it haven't yet been initialized.
|
||||||
|
|||||||
@ -6,12 +6,13 @@
|
|||||||
*/
|
*/
|
||||||
#include <rabit/rabit.h>
|
#include <rabit/rabit.h>
|
||||||
#include <xgboost/tree_updater.h>
|
#include <xgboost/tree_updater.h>
|
||||||
|
#include <xgboost/logging.h>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
#include "./param.h"
|
#include "param.h"
|
||||||
#include "../common/random.h"
|
#include "../common/random.h"
|
||||||
#include "../common/bitmap.h"
|
#include "../common/bitmap.h"
|
||||||
#include "split_evaluator.h"
|
#include "split_evaluator.h"
|
||||||
|
|||||||
49
tests/cpp/gbm/test_gbtree.cc
Normal file
49
tests/cpp/gbm/test_gbtree.cc
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <xgboost/generic_parameters.h>
|
||||||
|
#include "../helpers.h"
|
||||||
|
#include "../../../src/gbm/gbtree.h"
|
||||||
|
|
||||||
|
namespace xgboost {
|
||||||
|
TEST(GBTree, SelectTreeMethod) {
|
||||||
|
using Arg = std::pair<std::string, std::string>;
|
||||||
|
size_t constexpr kRows = 10;
|
||||||
|
size_t constexpr kCols = 10;
|
||||||
|
auto mat_ptr = CreateDMatrix(kRows, kCols, 0);
|
||||||
|
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {*mat_ptr};
|
||||||
|
|
||||||
|
LearnerTrainParam learner_param;
|
||||||
|
learner_param.InitAllowUnknown(std::vector<Arg>{Arg("n_gpus", "0")});
|
||||||
|
std::unique_ptr<GradientBooster> p_gbm{
|
||||||
|
GradientBooster::Create("gbtree", &learner_param, {}, 0)};
|
||||||
|
auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
|
||||||
|
|
||||||
|
// Test if `tree_method` can be set
|
||||||
|
std::string n_feat = std::to_string(kCols);
|
||||||
|
gbtree.Configure({Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}});
|
||||||
|
auto const& tparam = gbtree.GetTrainParam();
|
||||||
|
ASSERT_EQ(tparam.updater_seq, "grow_histmaker,prune");
|
||||||
|
gbtree.Configure({Arg("tree_method", "exact"), Arg("num_feature", n_feat)});
|
||||||
|
ASSERT_EQ(tparam.updater_seq, "grow_colmaker,prune");
|
||||||
|
gbtree.Configure({Arg("tree_method", "hist"), Arg("num_feature", n_feat)});
|
||||||
|
ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
|
||||||
|
ASSERT_EQ(tparam.predictor, "cpu_predictor");
|
||||||
|
gbtree.Configure({Arg{"booster", "dart"}, Arg{"tree_method", "hist"},
|
||||||
|
Arg{"num_feature", n_feat}});
|
||||||
|
ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
|
||||||
|
#ifdef XGBOOST_USE_CUDA
|
||||||
|
learner_param.InitAllowUnknown(std::vector<Arg>{Arg{"n_gpus", "1"}});
|
||||||
|
gbtree.Configure({Arg("tree_method", "gpu_exact"),
|
||||||
|
Arg("num_feature", n_feat)});
|
||||||
|
ASSERT_EQ(tparam.updater_seq, "grow_gpu,prune");
|
||||||
|
ASSERT_EQ(tparam.predictor, "gpu_predictor");
|
||||||
|
gbtree.Configure({Arg("tree_method", "gpu_hist"), Arg("num_feature", n_feat)});
|
||||||
|
ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
|
||||||
|
ASSERT_EQ(tparam.predictor, "gpu_predictor");
|
||||||
|
gbtree.Configure({Arg{"booster", "dart"}, Arg{"tree_method", "gpu_hist"},
|
||||||
|
Arg{"num_feature", n_feat}});
|
||||||
|
ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
delete mat_ptr;
|
||||||
|
}
|
||||||
|
} // namespace xgboost
|
||||||
@ -145,25 +145,30 @@ TEST(gpu_predictor, MGPU_PicklingTest) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Load data matrix
|
// Load data matrix
|
||||||
CheckCAPICall(XGDMatrixCreateFromFile(tmp_file.c_str(), 0, &dmat[0]));
|
ASSERT_EQ(XGDMatrixCreateFromFile(
|
||||||
CheckCAPICall(XGDMatrixSetFloatInfo(dmat[0], "label", label.data(), 200));
|
tmp_file.c_str(), 0, &dmat[0]), 0) << XGBGetLastError();
|
||||||
|
ASSERT_EQ(XGDMatrixSetFloatInfo(
|
||||||
|
dmat[0], "label", label.data(), 200), 0) << XGBGetLastError();
|
||||||
// Create booster
|
// Create booster
|
||||||
CheckCAPICall(XGBoosterCreate(dmat, 1, &bst));
|
ASSERT_EQ(XGBoosterCreate(dmat, 1, &bst), 0) << XGBGetLastError();
|
||||||
// Set parameters
|
// Set parameters
|
||||||
CheckCAPICall(XGBoosterSetParam(bst, "seed", "0"));
|
ASSERT_EQ(XGBoosterSetParam(bst, "seed", "0"), 0) << XGBGetLastError();
|
||||||
CheckCAPICall(XGBoosterSetParam(bst, "base_score", "0.5"));
|
ASSERT_EQ(XGBoosterSetParam(bst, "base_score", "0.5"), 0) << XGBGetLastError();
|
||||||
CheckCAPICall(XGBoosterSetParam(bst, "booster", "gbtree"));
|
ASSERT_EQ(XGBoosterSetParam(bst, "booster", "gbtree"), 0) << XGBGetLastError();
|
||||||
CheckCAPICall(XGBoosterSetParam(bst, "learning_rate", "0.01"));
|
ASSERT_EQ(XGBoosterSetParam(bst, "learning_rate", "0.01"), 0) << XGBGetLastError();
|
||||||
CheckCAPICall(XGBoosterSetParam(bst, "max_depth", "8"));
|
ASSERT_EQ(XGBoosterSetParam(bst, "max_depth", "8"), 0) << XGBGetLastError();
|
||||||
CheckCAPICall(XGBoosterSetParam(bst, "objective", "binary:logistic"));
|
ASSERT_EQ(XGBoosterSetParam(
|
||||||
CheckCAPICall(XGBoosterSetParam(bst, "seed", "123"));
|
bst, "objective", "binary:logistic"), 0) << XGBGetLastError();
|
||||||
CheckCAPICall(XGBoosterSetParam(bst, "tree_method", "gpu_hist"));
|
ASSERT_EQ(XGBoosterSetParam(bst, "seed", "123"), 0) << XGBGetLastError();
|
||||||
CheckCAPICall(XGBoosterSetParam(bst, "n_gpus", std::to_string(ngpu).c_str()));
|
ASSERT_EQ(XGBoosterSetParam(
|
||||||
CheckCAPICall(XGBoosterSetParam(bst, "predictor", "gpu_predictor"));
|
bst, "tree_method", "gpu_hist"), 0) << XGBGetLastError();
|
||||||
|
ASSERT_EQ(XGBoosterSetParam(
|
||||||
|
bst, "n_gpus", std::to_string(ngpu).c_str()), 0) << XGBGetLastError();
|
||||||
|
ASSERT_EQ(XGBoosterSetParam(bst, "predictor", "gpu_predictor"), 0) << XGBGetLastError();
|
||||||
|
|
||||||
// Run boosting iterations
|
// Run boosting iterations
|
||||||
for (int i = 0; i < 10; ++i) {
|
for (int i = 0; i < 10; ++i) {
|
||||||
CheckCAPICall(XGBoosterUpdateOneIter(bst, i, dmat[0]));
|
ASSERT_EQ(XGBoosterUpdateOneIter(bst, i, dmat[0]), 0) << XGBGetLastError();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete matrix
|
// Delete matrix
|
||||||
|
|||||||
@ -19,40 +19,6 @@ TEST(Learner, Basic) {
|
|||||||
delete mat_ptr;
|
delete mat_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Learner, SelectTreeMethod) {
|
|
||||||
using Arg = std::pair<std::string, std::string>;
|
|
||||||
auto mat_ptr = CreateDMatrix(10, 10, 0);
|
|
||||||
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {*mat_ptr};
|
|
||||||
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
|
|
||||||
|
|
||||||
// Test if `tree_method` can be set
|
|
||||||
learner->Configure({Arg("tree_method", "approx")});
|
|
||||||
ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
|
|
||||||
"grow_histmaker,prune");
|
|
||||||
learner->Configure({Arg("tree_method", "exact")});
|
|
||||||
ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
|
|
||||||
"grow_colmaker,prune");
|
|
||||||
learner->Configure({Arg("tree_method", "hist")});
|
|
||||||
ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
|
|
||||||
"grow_quantile_histmaker");
|
|
||||||
learner->Configure({Arg{"booster", "dart"}, Arg{"tree_method", "hist"}});
|
|
||||||
ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
|
|
||||||
"grow_quantile_histmaker");
|
|
||||||
#ifdef XGBOOST_USE_CUDA
|
|
||||||
learner->Configure({Arg("tree_method", "gpu_exact")});
|
|
||||||
ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
|
|
||||||
"grow_gpu,prune");
|
|
||||||
learner->Configure({Arg("tree_method", "gpu_hist")});
|
|
||||||
ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
|
|
||||||
"grow_gpu_hist");
|
|
||||||
learner->Configure({Arg{"booster", "dart"}, Arg{"tree_method", "gpu_hist"}});
|
|
||||||
ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
|
|
||||||
"grow_gpu_hist");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
delete mat_ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(Learner, CheckGroup) {
|
TEST(Learner, CheckGroup) {
|
||||||
using Arg = std::pair<std::string, std::string>;
|
using Arg = std::pair<std::string, std::string>;
|
||||||
size_t constexpr kNumGroups = 4;
|
size_t constexpr kNumGroups = 4;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user