Refactor configuration [Part II]. (#4577)
* Refactor configuration [Part II].
* General changes:
** Remove `Init` methods to avoid ambiguity.
** Remove `Configure(std::map<>)` to avoid redundant copying and prepare for
parameter validation. (`std::vector` is returned from `InitAllowUnknown`).
** Add name to tree updaters for easier debugging.
* Learner changes:
** Make `LearnerImpl` the only source of configuration.
All configurations are stored and carried out by `LearnerImpl::Configure()`.
** Remove booster in C API.
Originally kept for "compatibility reason", but did not state why. So here
we just remove it.
** Add a `metric_names_` field in `LearnerImpl`.
** Remove `LazyInit`. Configuration will always be lazy.
** Run `Configure` before every iteration.
* Predictor changes:
** Allocate both cpu and gpu predictor.
** Remove cpu_predictor from gpu_predictor.
`GBTree` is now used to dispatch the predictor.
** Remove some GPU Predictor tests.
* IO
No IO changes. The binary model format stability is tested by comparing
hashing value of save models between two commits
This commit is contained in:
@@ -43,8 +43,7 @@ SplitEvaluator* SplitEvaluator::Create(const std::string& name) {
|
||||
}
|
||||
|
||||
// Default implementations of some virtual methods that aren't always needed
|
||||
void SplitEvaluator::Init(
|
||||
const std::vector<std::pair<std::string, std::string> >& args) {}
|
||||
void SplitEvaluator::Init(const Args& args) {}
|
||||
void SplitEvaluator::Reset() {}
|
||||
void SplitEvaluator::AddSplit(bst_uint nodeid,
|
||||
bst_uint leftid,
|
||||
@@ -104,8 +103,7 @@ class ElasticNet final : public SplitEvaluator {
|
||||
LOG(FATAL) << "ElasticNet does not accept an inner SplitEvaluator";
|
||||
}
|
||||
}
|
||||
void Init(
|
||||
const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
void Init(const Args& args) override {
|
||||
params_.InitAllowUnknown(args);
|
||||
}
|
||||
|
||||
@@ -210,7 +208,7 @@ class MonotonicConstraint final : public SplitEvaluator {
|
||||
inner_ = std::move(inner);
|
||||
}
|
||||
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args)
|
||||
void Init(const Args& args)
|
||||
override {
|
||||
inner_->Init(args);
|
||||
params_.InitAllowUnknown(args);
|
||||
@@ -369,7 +367,7 @@ class InteractionConstraint final : public SplitEvaluator {
|
||||
inner_ = std::move(inner);
|
||||
}
|
||||
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args)
|
||||
void Init(const Args& args)
|
||||
override {
|
||||
inner_->Init(args);
|
||||
params_.InitAllowUnknown(args);
|
||||
|
||||
@@ -32,8 +32,7 @@ class SplitEvaluator {
|
||||
virtual ~SplitEvaluator() = default;
|
||||
|
||||
// Used to initialise any regularisation hyperparameters provided by the user
|
||||
virtual void Init(
|
||||
const std::vector<std::pair<std::string, std::string> >& args);
|
||||
virtual void Init(const Args& args);
|
||||
|
||||
// Resets the SplitEvaluator to the state it was in after the Init was called
|
||||
virtual void Reset();
|
||||
|
||||
@@ -14,7 +14,7 @@ DMLC_REGISTRY_ENABLE(::xgboost::TreeUpdaterReg);
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
TreeUpdater* TreeUpdater::Create(const std::string& name, LearnerTrainParam const* tparam) {
|
||||
TreeUpdater* TreeUpdater::Create(const std::string& name, GenericParameter const* tparam) {
|
||||
auto *e = ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->Find(name);
|
||||
if (e == nullptr) {
|
||||
LOG(FATAL) << "Unknown tree updater " << name;
|
||||
|
||||
@@ -30,7 +30,7 @@ namespace tree {
|
||||
*/
|
||||
class BaseMaker: public TreeUpdater {
|
||||
public:
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
void Configure(const Args& args) override {
|
||||
param_.InitAllowUnknown(args);
|
||||
}
|
||||
|
||||
|
||||
@@ -25,12 +25,16 @@ DMLC_REGISTRY_FILE_TAG(updater_colmaker);
|
||||
/*! \brief column-wise update to construct a tree */
|
||||
class ColMaker: public TreeUpdater {
|
||||
public:
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
void Configure(const Args& args) override {
|
||||
param_.InitAllowUnknown(args);
|
||||
spliteval_.reset(SplitEvaluator::Create(param_.split_evaluator));
|
||||
spliteval_->Init(args);
|
||||
}
|
||||
|
||||
char const* Name() const override {
|
||||
return "grow_colmaker";
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair> *gpair,
|
||||
DMatrix* dmat,
|
||||
const std::vector<RegTree*> &trees) override {
|
||||
@@ -768,13 +772,18 @@ class ColMaker: public TreeUpdater {
|
||||
// distributed column maker
|
||||
class DistColMaker : public ColMaker {
|
||||
public:
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
void Configure(const Args& args) override {
|
||||
param_.InitAllowUnknown(args);
|
||||
pruner_.reset(TreeUpdater::Create("prune", tparam_));
|
||||
pruner_->Init(args);
|
||||
pruner_->Configure(args);
|
||||
spliteval_.reset(SplitEvaluator::Create(param_.split_evaluator));
|
||||
spliteval_->Init(args);
|
||||
}
|
||||
|
||||
char const* Name() const override {
|
||||
return "distcol";
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair> *gpair,
|
||||
DMatrix* dmat,
|
||||
const std::vector<RegTree*> &trees) override {
|
||||
|
||||
@@ -581,7 +581,11 @@ class GPUMaker : public TreeUpdater {
|
||||
GPUMaker() : allocated_{false} {}
|
||||
~GPUMaker() override = default;
|
||||
|
||||
void Init(const std::vector<std::pair<std::string, std::string>> &args) override {
|
||||
char const* Name() const override {
|
||||
return "gpu_exact";
|
||||
}
|
||||
|
||||
void Configure(const Args &args) override {
|
||||
param_.InitAllowUnknown(args);
|
||||
maxNodes_ = (1 << (param_.max_depth + 1)) - 1;
|
||||
maxLeaves_ = 1 << param_.max_depth;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017 XGBoost contributors
|
||||
* Copyright 2017-2019 XGBoost contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include <thrust/random.h>
|
||||
|
||||
@@ -1290,13 +1290,12 @@ template <typename GradientSumT>
|
||||
class GPUHistMakerSpecialised {
|
||||
public:
|
||||
GPUHistMakerSpecialised() : initialised_{false}, p_last_fmat_{nullptr} {}
|
||||
void Init(const std::vector<std::pair<std::string, std::string>>& args,
|
||||
LearnerTrainParam const* lparam) {
|
||||
void Configure(const Args& args, GenericParameter const* generic_param) {
|
||||
param_.InitAllowUnknown(args);
|
||||
learner_param_ = lparam;
|
||||
generic_param_ = generic_param;
|
||||
hist_maker_param_.InitAllowUnknown(args);
|
||||
auto devices = GPUSet::All(learner_param_->gpu_id,
|
||||
learner_param_->n_gpus);
|
||||
auto devices = GPUSet::All(generic_param_->gpu_id,
|
||||
generic_param_->n_gpus);
|
||||
n_devices_ = devices.Size();
|
||||
CHECK(n_devices_ != 0) << "Must have at least one device";
|
||||
dist_ = GPUDistribution::Block(devices);
|
||||
@@ -1362,7 +1361,7 @@ class GPUHistMakerSpecialised {
|
||||
|
||||
monitor_.StartCuda("Quantiles");
|
||||
// Create the quantile sketches for the dmatrix and initialize HistogramCuts
|
||||
size_t row_stride = common::DeviceSketch(param_, *learner_param_,
|
||||
size_t row_stride = common::DeviceSketch(param_, *generic_param_,
|
||||
hist_maker_param_.gpu_batch_nrows,
|
||||
dmat, &hmat_);
|
||||
monitor_.StopCuda("Quantiles");
|
||||
@@ -1488,7 +1487,7 @@ class GPUHistMakerSpecialised {
|
||||
int n_bins_;
|
||||
|
||||
GPUHistMakerTrainParam hist_maker_param_;
|
||||
LearnerTrainParam const* learner_param_;
|
||||
GenericParameter const* generic_param_;
|
||||
|
||||
dh::AllReducer reducer_;
|
||||
|
||||
@@ -1502,17 +1501,16 @@ class GPUHistMakerSpecialised {
|
||||
|
||||
class GPUHistMaker : public TreeUpdater {
|
||||
public:
|
||||
void Init(
|
||||
const std::vector<std::pair<std::string, std::string>>& args) override {
|
||||
void Configure(const Args& args) override {
|
||||
hist_maker_param_.InitAllowUnknown(args);
|
||||
float_maker_.reset();
|
||||
double_maker_.reset();
|
||||
if (hist_maker_param_.single_precision_histogram) {
|
||||
float_maker_.reset(new GPUHistMakerSpecialised<GradientPair>());
|
||||
float_maker_->Init(args, tparam_);
|
||||
float_maker_->Configure(args, tparam_);
|
||||
} else {
|
||||
double_maker_.reset(new GPUHistMakerSpecialised<GradientPairPrecise>());
|
||||
double_maker_->Init(args, tparam_);
|
||||
double_maker_->Configure(args, tparam_);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1534,6 +1532,10 @@ class GPUHistMaker : public TreeUpdater {
|
||||
}
|
||||
}
|
||||
|
||||
char const* Name() const override {
|
||||
return "gpu_hist";
|
||||
}
|
||||
|
||||
private:
|
||||
GPUHistMakerTrainParam hist_maker_param_;
|
||||
std::unique_ptr<GPUHistMakerSpecialised<GradientPair>> float_maker_;
|
||||
|
||||
@@ -33,6 +33,9 @@ class HistMaker: public BaseMaker {
|
||||
}
|
||||
param_.learning_rate = lr;
|
||||
}
|
||||
char const* Name() const override {
|
||||
return "grow_histmaker";
|
||||
}
|
||||
|
||||
protected:
|
||||
/*! \brief a single histogram */
|
||||
@@ -83,7 +86,7 @@ class HistMaker: public BaseMaker {
|
||||
// per thread histset
|
||||
std::vector<HistSet> hset;
|
||||
// initialize the hist set
|
||||
inline void Init(const TrainParam ¶m, int nthread) {
|
||||
inline void Configure(const TrainParam ¶m, int nthread) {
|
||||
hset.resize(nthread);
|
||||
// cleanup statistics
|
||||
for (int tid = 0; tid < nthread; ++tid) {
|
||||
@@ -274,6 +277,9 @@ class HistMaker: public BaseMaker {
|
||||
class CQHistMaker: public HistMaker {
|
||||
public:
|
||||
CQHistMaker() = default;
|
||||
char const* Name() const override {
|
||||
return "grow_local_histmaker";
|
||||
}
|
||||
|
||||
protected:
|
||||
struct HistEntry {
|
||||
@@ -339,7 +345,7 @@ class CQHistMaker: public HistMaker {
|
||||
feat2workindex_[fset[i]] = static_cast<int>(i);
|
||||
}
|
||||
// start to work
|
||||
this->wspace_.Init(this->param_, 1);
|
||||
this->wspace_.Configure(this->param_, 1);
|
||||
// if it is C++11, use lazy evaluation for Allreduce,
|
||||
// to gain speedup in recovery
|
||||
auto lazy_get_hist = [&]() {
|
||||
@@ -637,6 +643,11 @@ class CQHistMaker: public HistMaker {
|
||||
|
||||
// global proposal
|
||||
class GlobalProposalHistMaker: public CQHistMaker {
|
||||
public:
|
||||
char const* Name() const override {
|
||||
return "grow_global_histmaker";
|
||||
}
|
||||
|
||||
protected:
|
||||
void ResetPosAndPropose(const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat,
|
||||
@@ -682,7 +693,7 @@ class GlobalProposalHistMaker: public CQHistMaker {
|
||||
this->feat2workindex_[fset[i]] = static_cast<int>(i);
|
||||
}
|
||||
// start to work
|
||||
this->wspace_.Init(this->param_, 1);
|
||||
this->wspace_.Configure(this->param_, 1);
|
||||
// to gain speedup in recovery
|
||||
{
|
||||
this->thread_hist_.resize(omp_get_max_threads());
|
||||
|
||||
@@ -24,10 +24,14 @@ class TreePruner: public TreeUpdater {
|
||||
TreePruner() {
|
||||
syncher_.reset(TreeUpdater::Create("sync", tparam_));
|
||||
}
|
||||
char const* Name() const override {
|
||||
return "prune";
|
||||
}
|
||||
|
||||
// set training parameter
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
void Configure(const Args& args) override {
|
||||
param_.InitAllowUnknown(args);
|
||||
syncher_->Init(args);
|
||||
syncher_->Configure(args);
|
||||
}
|
||||
// update the tree, do pruning
|
||||
void Update(HostDeviceVector<GradientPair> *gpair,
|
||||
|
||||
@@ -32,12 +32,12 @@ namespace tree {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_quantile_hist);
|
||||
|
||||
void QuantileHistMaker::Init(const std::vector<std::pair<std::string, std::string> >& args) {
|
||||
void QuantileHistMaker::Configure(const Args& args) {
|
||||
// initialize pruner
|
||||
if (!pruner_) {
|
||||
pruner_.reset(TreeUpdater::Create("prune", tparam_));
|
||||
}
|
||||
pruner_->Init(args);
|
||||
pruner_->Configure(args);
|
||||
param_.InitAllowUnknown(args);
|
||||
is_gmat_initialized_ = false;
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ using xgboost::common::Column;
|
||||
/*! \brief construct a tree using quantized feature values */
|
||||
class QuantileHistMaker: public TreeUpdater {
|
||||
public:
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override;
|
||||
void Configure(const Args& args) override;
|
||||
|
||||
void Update(HostDeviceVector<GradientPair>* gpair,
|
||||
DMatrix* dmat,
|
||||
@@ -55,6 +55,9 @@ class QuantileHistMaker: public TreeUpdater {
|
||||
bool UpdatePredictionCache(const DMatrix* data,
|
||||
HostDeviceVector<bst_float>* out_preds) override;
|
||||
|
||||
char const* Name() const override {
|
||||
return "grow_quantile_histmaker";
|
||||
}
|
||||
|
||||
protected:
|
||||
// training parameter
|
||||
|
||||
@@ -21,9 +21,12 @@ DMLC_REGISTRY_FILE_TAG(updater_refresh);
|
||||
/*! \brief pruner that prunes a tree after growing finishs */
|
||||
class TreeRefresher: public TreeUpdater {
|
||||
public:
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
void Configure(const Args& args) override {
|
||||
param_.InitAllowUnknown(args);
|
||||
}
|
||||
char const* Name() const override {
|
||||
return "refresh";
|
||||
}
|
||||
// update the tree, do pruning
|
||||
void Update(HostDeviceVector<GradientPair> *gpair,
|
||||
DMatrix *p_fmat,
|
||||
|
||||
@@ -22,6 +22,9 @@ DMLC_REGISTRY_FILE_TAG(updater_skmaker);
|
||||
|
||||
class SketchMaker: public BaseMaker {
|
||||
public:
|
||||
char const* Name() const override {
|
||||
return "grow_skmaker";
|
||||
}
|
||||
void Update(HostDeviceVector<GradientPair> *gpair,
|
||||
DMatrix *p_fmat,
|
||||
const std::vector<RegTree*> &trees) override {
|
||||
|
||||
@@ -20,7 +20,11 @@ DMLC_REGISTRY_FILE_TAG(updater_sync);
|
||||
*/
|
||||
class TreeSyncher: public TreeUpdater {
|
||||
public:
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {}
|
||||
void Configure(const Args& args) override {}
|
||||
|
||||
char const* Name() const override {
|
||||
return "prune";
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair> *gpair,
|
||||
DMatrix* dmat,
|
||||
|
||||
Reference in New Issue
Block a user