Refactor configuration [Part II]. (#4577)
* Refactor configuration [Part II].
* General changes:
** Remove `Init` methods to avoid ambiguity.
** Remove `Configure(std::map<>)` to avoid redundant copying and prepare for
parameter validation. (`std::vector` is returned from `InitAllowUnknown`).
** Add name to tree updaters for easier debugging.
* Learner changes:
** Make `LearnerImpl` the only source of configuration.
All configurations are stored and carried out by `LearnerImpl::Configure()`.
** Remove booster in C API.
Originally kept for "compatibility reason", but did not state why. So here
we just remove it.
** Add a `metric_names_` field in `LearnerImpl`.
** Remove `LazyInit`. Configuration will always be lazy.
** Run `Configure` before every iteration.
* Predictor changes:
** Allocate both cpu and gpu predictor.
** Remove cpu_predictor from gpu_predictor.
`GBTree` is now used to dispatch the predictor.
** Remove some GPU Predictor tests.
* IO
No IO changes. The binary model format stability is tested by comparing
hashing value of save models between two commits
This commit is contained in:
parent
ad1192e8a3
commit
f0064c07ab
@ -10,6 +10,9 @@
|
||||
#include <dmlc/omp.h>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
/*!
|
||||
* \brief string flag for R library, to leave hooks when needed.
|
||||
@ -199,6 +202,8 @@ using GradientPairPrecise = detail::GradientPairInternal<double>;
|
||||
* associative. */
|
||||
using GradientPairInteger = detail::GradientPairInternal<int64_t>;
|
||||
|
||||
using Args = std::vector<std::pair<std::string, std::string> >;
|
||||
|
||||
/*! \brief small eps gap for minimum split decision. */
|
||||
const bst_float kRtEps = 1e-6f;
|
||||
|
||||
|
||||
@ -29,19 +29,11 @@ namespace xgboost {
|
||||
*/
|
||||
class GradientBooster {
|
||||
protected:
|
||||
LearnerTrainParam const* learner_param_;
|
||||
GenericParameter const* learner_param_;
|
||||
|
||||
public:
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~GradientBooster() = default;
|
||||
/*!
|
||||
* \brief set configuration from pair iterators.
|
||||
* \param begin The beginning iterator.
|
||||
* \param end The end iterator.
|
||||
* \tparam PairIter iterator<std::pair<std::string, std::string> >
|
||||
*/
|
||||
template<typename PairIter>
|
||||
inline void Configure(PairIter begin, PairIter end);
|
||||
/*!
|
||||
* \brief Set the configuration of gradient boosting.
|
||||
* User must call configure once before InitModel and Training.
|
||||
@ -159,18 +151,11 @@ class GradientBooster {
|
||||
*/
|
||||
static GradientBooster* Create(
|
||||
const std::string& name,
|
||||
LearnerTrainParam const* gparam,
|
||||
GenericParameter const* gparam,
|
||||
const std::vector<std::shared_ptr<DMatrix> >& cache_mats,
|
||||
bst_float base_margin);
|
||||
};
|
||||
|
||||
// implementing configure.
|
||||
template<typename PairIter>
|
||||
inline void GradientBooster::Configure(PairIter begin, PairIter end) {
|
||||
std::vector<std::pair<std::string, std::string> > vec(begin, end);
|
||||
this->Configure(vec);
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Registry entry for tree updater.
|
||||
*/
|
||||
|
||||
@ -11,36 +11,20 @@
|
||||
#include <string>
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
enum class DataSplitMode : int {
|
||||
kAuto = 0, kCol = 1, kRow = 2
|
||||
};
|
||||
} // namespace xgboost
|
||||
|
||||
DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);
|
||||
|
||||
namespace xgboost {
|
||||
struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
|
||||
struct GenericParameter : public dmlc::Parameter<GenericParameter> {
|
||||
// stored random seed
|
||||
int seed;
|
||||
// whether seed the PRNG each iteration
|
||||
bool seed_per_iteration;
|
||||
// data split mode, can be row, col, or none.
|
||||
DataSplitMode dsplit;
|
||||
// number of threads to use if OpenMP is enabled
|
||||
// if equals 0, use system default
|
||||
int nthread;
|
||||
// flag to disable default metric
|
||||
int disable_default_eval_metric;
|
||||
// primary device.
|
||||
int gpu_id;
|
||||
// number of devices to use, -1 implies using all available devices.
|
||||
int n_gpus;
|
||||
|
||||
std::string booster;
|
||||
|
||||
// declare parameters
|
||||
DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
|
||||
DMLC_DECLARE_PARAMETER(GenericParameter) {
|
||||
DMLC_DECLARE_FIELD(seed).set_default(0).describe(
|
||||
"Random number seed during training.");
|
||||
DMLC_DECLARE_FIELD(seed_per_iteration)
|
||||
@ -49,17 +33,8 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
|
||||
"Seed PRNG determnisticly via iterator number, "
|
||||
"this option will be switched on automatically on distributed "
|
||||
"mode.");
|
||||
DMLC_DECLARE_FIELD(dsplit)
|
||||
.set_default(DataSplitMode::kAuto)
|
||||
.add_enum("auto", DataSplitMode::kAuto)
|
||||
.add_enum("col", DataSplitMode::kCol)
|
||||
.add_enum("row", DataSplitMode::kRow)
|
||||
.describe("Data split mode for distributed training.");
|
||||
DMLC_DECLARE_FIELD(nthread).set_default(0).describe(
|
||||
"Number of threads to use.");
|
||||
DMLC_DECLARE_FIELD(disable_default_eval_metric)
|
||||
.set_default(0)
|
||||
.describe("flag to disable default metric. Set to >0 to disable");
|
||||
DMLC_DECLARE_FIELD(gpu_id)
|
||||
.set_default(0)
|
||||
.describe("The primary GPU device ordinal.");
|
||||
@ -69,9 +44,6 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
|
||||
.describe("Deprecated, please use distributed training with one "
|
||||
"process per GPU. "
|
||||
"Number of GPUs to use for multi-gpu algorithms.");
|
||||
DMLC_DECLARE_FIELD(booster)
|
||||
.set_default("gbtree")
|
||||
.describe("Gradient booster used for training.");
|
||||
}
|
||||
};
|
||||
} // namespace xgboost
|
||||
|
||||
@ -24,6 +24,7 @@
|
||||
#include <vector>
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
/*!
|
||||
* \brief Learner class that does training and prediction.
|
||||
* This is the user facing module of xgboost training.
|
||||
@ -45,25 +46,9 @@ class Learner : public rabit::Serializable {
|
||||
/*! \brief virtual destructor */
|
||||
~Learner() override = default;
|
||||
/*!
|
||||
* \brief set configuration from pair iterators.
|
||||
* \param begin The beginning iterator.
|
||||
* \param end The end iterator.
|
||||
* \tparam PairIter iterator<std::pair<std::string, std::string> >
|
||||
* \brief Configure Learner based on set parameters.
|
||||
*/
|
||||
template<typename PairIter>
|
||||
inline void Configure(PairIter begin, PairIter end);
|
||||
/*!
|
||||
* \brief Set the configuration of gradient boosting.
|
||||
* User must call configure once before InitModel and Training.
|
||||
*
|
||||
* \param cfg configurations on both training and model parameters.
|
||||
*/
|
||||
virtual void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) = 0;
|
||||
/*!
|
||||
* \brief Initialize the model using the specified configurations via Configure.
|
||||
* An model have to be either Loaded or initialized before Update/Predict/Save can be called.
|
||||
*/
|
||||
virtual void InitModel() = 0;
|
||||
virtual void Configure() = 0;
|
||||
/*!
|
||||
* \brief load model from stream
|
||||
* \param fi input stream.
|
||||
@ -121,10 +106,27 @@ class Learner : public rabit::Serializable {
|
||||
bool pred_contribs = false,
|
||||
bool approx_contribs = false,
|
||||
bool pred_interactions = false) = 0;
|
||||
/*!
|
||||
* \brief Set multiple parameters at once.
|
||||
*
|
||||
* \param args parameters.
|
||||
*/
|
||||
virtual void SetParams(Args const& args) = 0;
|
||||
/*!
|
||||
* \brief Set parameter for booster
|
||||
*
|
||||
* The property will NOT be saved along with booster
|
||||
*
|
||||
* \param key The key of parameter
|
||||
* \param value The value of parameter
|
||||
*/
|
||||
virtual void SetParam(const std::string& key, const std::string& value) = 0;
|
||||
|
||||
/*!
|
||||
* \brief Set additional attribute to the Booster.
|
||||
*
|
||||
* The property will be saved along the booster.
|
||||
*
|
||||
* \param key The key of the property.
|
||||
* \param value The value of the property.
|
||||
*/
|
||||
@ -148,8 +150,6 @@ class Learner : public rabit::Serializable {
|
||||
* \return vector of attribute name strings.
|
||||
*/
|
||||
virtual std::vector<std::string> GetAttrNames() const = 0;
|
||||
|
||||
virtual LearnerTrainParam const& GetLearnerTrainParameter() const = 0;
|
||||
/*!
|
||||
* \return whether the model allow lazy checkpoint in rabit.
|
||||
*/
|
||||
@ -161,24 +161,9 @@ class Learner : public rabit::Serializable {
|
||||
* \param format the format to dump the model in
|
||||
* \return a vector of dump for boosters.
|
||||
*/
|
||||
std::vector<std::string> DumpModel(const FeatureMap& fmap,
|
||||
virtual std::vector<std::string> DumpModel(const FeatureMap& fmap,
|
||||
bool with_stats,
|
||||
std::string format) const;
|
||||
/*!
|
||||
* \brief online prediction function, predict score for one instance at a time
|
||||
* NOTE: use the batch prediction interface if possible, batch prediction is usually
|
||||
* more efficient than online prediction
|
||||
* This function is NOT threadsafe, make sure you only call from one thread.
|
||||
*
|
||||
* \param inst the instance you want to predict
|
||||
* \param output_margin whether to only predict margin value instead of transformed prediction
|
||||
* \param out_preds output vector to hold the predictions
|
||||
* \param ntree_limit limit the number of trees used in prediction
|
||||
*/
|
||||
inline void Predict(const SparsePage::Inst &inst,
|
||||
bool output_margin,
|
||||
HostDeviceVector<bst_float> *out_preds,
|
||||
unsigned ntree_limit = 0) const;
|
||||
std::string format) const = 0;
|
||||
/*!
|
||||
* \brief Create a new instance of learner.
|
||||
* \param cache_data The matrix to cache the prediction.
|
||||
@ -186,6 +171,7 @@ class Learner : public rabit::Serializable {
|
||||
*/
|
||||
static Learner* Create(const std::vector<std::shared_ptr<DMatrix> >& cache_data);
|
||||
|
||||
virtual GenericParameter const& GetGenericParameter() const = 0;
|
||||
/*!
|
||||
* \brief Get configuration arguments currently stored by the learner
|
||||
* \return Key-value pairs representing configuration arguments
|
||||
@ -202,26 +188,8 @@ class Learner : public rabit::Serializable {
|
||||
/*! \brief The evaluation metrics used to evaluate the model. */
|
||||
std::vector<std::unique_ptr<Metric> > metrics_;
|
||||
/*! \brief Training parameter. */
|
||||
LearnerTrainParam tparam_;
|
||||
GenericParameter generic_param_;
|
||||
};
|
||||
|
||||
// implementation of inline functions.
|
||||
inline void Learner::Predict(const SparsePage::Inst& inst,
|
||||
bool output_margin,
|
||||
HostDeviceVector<bst_float>* out_preds,
|
||||
unsigned ntree_limit) const {
|
||||
gbm_->PredictInstance(inst, &out_preds->HostVector(), ntree_limit);
|
||||
if (!output_margin) {
|
||||
obj_->PredTransform(out_preds);
|
||||
}
|
||||
}
|
||||
|
||||
// implementing configure.
|
||||
template<typename PairIter>
|
||||
inline void Learner::Configure(PairIter begin, PairIter end) {
|
||||
std::vector<std::pair<std::string, std::string> > vec(begin, end);
|
||||
this->Configure(vec);
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_LEARNER_H_
|
||||
|
||||
@ -20,7 +20,7 @@ namespace xgboost {
|
||||
*/
|
||||
class LinearUpdater {
|
||||
protected:
|
||||
LearnerTrainParam const* learner_param_;
|
||||
GenericParameter const* learner_param_;
|
||||
|
||||
public:
|
||||
/*! \brief virtual destructor */
|
||||
@ -29,7 +29,7 @@ class LinearUpdater {
|
||||
* \brief Initialize the updater with given arguments.
|
||||
* \param args arguments to the objective function.
|
||||
*/
|
||||
virtual void Init(
|
||||
virtual void Configure(
|
||||
const std::vector<std::pair<std::string, std::string> >& args) = 0;
|
||||
|
||||
/**
|
||||
@ -40,7 +40,6 @@ class LinearUpdater {
|
||||
* \param model Model to be updated.
|
||||
* \param sum_instance_weight The sum instance weights, used to normalise l1/l2 penalty.
|
||||
*/
|
||||
|
||||
virtual void Update(HostDeviceVector<GradientPair>* in_gpair, DMatrix* data,
|
||||
gbm::GBLinearModel* model,
|
||||
double sum_instance_weight) = 0;
|
||||
@ -49,7 +48,7 @@ class LinearUpdater {
|
||||
* \brief Create a linear updater given name
|
||||
* \param name Name of the linear updater.
|
||||
*/
|
||||
static LinearUpdater* Create(const std::string& name, LearnerTrainParam const*);
|
||||
static LinearUpdater* Create(const std::string& name, GenericParameter const*);
|
||||
};
|
||||
|
||||
/*!
|
||||
|
||||
@ -66,14 +66,9 @@ class ConsoleLogger : public BaseLogger {
|
||||
static ConsoleLoggerParam param_;
|
||||
|
||||
LogVerbosity cur_verbosity_;
|
||||
static void Configure(const std::map<std::string, std::string>& args);
|
||||
|
||||
public:
|
||||
template <typename ArgIter>
|
||||
static void Configure(ArgIter begin, ArgIter end) {
|
||||
std::map<std::string, std::string> args(begin, end);
|
||||
Configure(args);
|
||||
}
|
||||
static void Configure(Args const& args);
|
||||
|
||||
static LogVerbosity GlobalVerbosity();
|
||||
static LogVerbosity DefaultVerbosity();
|
||||
|
||||
@ -26,7 +26,7 @@ namespace xgboost {
|
||||
*/
|
||||
class Metric {
|
||||
protected:
|
||||
LearnerTrainParam const* tparam_;
|
||||
GenericParameter const* tparam_;
|
||||
|
||||
public:
|
||||
/*!
|
||||
@ -35,17 +35,6 @@ class Metric {
|
||||
*/
|
||||
virtual void Configure(
|
||||
const std::vector<std::pair<std::string, std::string> >& args) {}
|
||||
/*!
|
||||
* \brief set configuration from pair iterators.
|
||||
* \param begin The beginning iterator.
|
||||
* \param end The end iterator.
|
||||
* \tparam PairIter iterator<std::pair<std::string, std::string> >
|
||||
*/
|
||||
template<typename PairIter>
|
||||
inline void Configure(PairIter begin, PairIter end) {
|
||||
std::vector<std::pair<std::string, std::string> > vec(begin, end);
|
||||
this->Configure(vec);
|
||||
}
|
||||
/*!
|
||||
* \brief evaluate a specific metric
|
||||
* \param preds prediction
|
||||
@ -68,7 +57,7 @@ class Metric {
|
||||
* and the name will be matched in the registry.
|
||||
* \return the created metric.
|
||||
*/
|
||||
static Metric* Create(const std::string& name, LearnerTrainParam const* tparam);
|
||||
static Metric* Create(const std::string& name, GenericParameter const* tparam);
|
||||
};
|
||||
|
||||
/*!
|
||||
|
||||
@ -24,19 +24,11 @@ namespace xgboost {
|
||||
/*! \brief interface of objective function */
|
||||
class ObjFunction {
|
||||
protected:
|
||||
LearnerTrainParam const* tparam_;
|
||||
GenericParameter const* tparam_;
|
||||
|
||||
public:
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~ObjFunction() = default;
|
||||
/*!
|
||||
* \brief set configuration from pair iterators.
|
||||
* \param begin The beginning iterator.
|
||||
* \param end The end iterator.
|
||||
* \tparam PairIter iterator<std::pair<std::string, std::string> >
|
||||
*/
|
||||
template<typename PairIter>
|
||||
inline void Configure(PairIter begin, PairIter end);
|
||||
/*!
|
||||
* \brief Configure the objective with the specified parameters.
|
||||
* \param args arguments to the objective function.
|
||||
@ -85,16 +77,9 @@ class ObjFunction {
|
||||
* \param tparam Generic parameters.
|
||||
* \param name Name of the objective.
|
||||
*/
|
||||
static ObjFunction* Create(const std::string& name, LearnerTrainParam const* tparam);
|
||||
static ObjFunction* Create(const std::string& name, GenericParameter const* tparam);
|
||||
};
|
||||
|
||||
// implementing configure.
|
||||
template<typename PairIter>
|
||||
inline void ObjFunction::Configure(PairIter begin, PairIter end) {
|
||||
std::vector<std::pair<std::string, std::string> > vec(begin, end);
|
||||
this->Configure(vec);
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Registry entry for objective factory functions.
|
||||
*/
|
||||
|
||||
@ -40,7 +40,7 @@ namespace xgboost {
|
||||
|
||||
class Predictor {
|
||||
protected:
|
||||
LearnerTrainParam const* learner_param_;
|
||||
GenericParameter const* learner_param_;
|
||||
|
||||
public:
|
||||
virtual ~Predictor() = default;
|
||||
@ -55,7 +55,7 @@ class Predictor {
|
||||
* \param cache Vector of DMatrix's to be used in prediction.
|
||||
*/
|
||||
|
||||
virtual void Init(const std::vector<std::pair<std::string, std::string>>& cfg,
|
||||
virtual void Configure(const std::vector<std::pair<std::string, std::string>>& cfg,
|
||||
const std::vector<std::shared_ptr<DMatrix>>& cache);
|
||||
|
||||
/**
|
||||
@ -174,7 +174,7 @@ class Predictor {
|
||||
*
|
||||
*/
|
||||
|
||||
static Predictor* Create(std::string const& name, LearnerTrainParam const*);
|
||||
static Predictor* Create(std::string const& name, GenericParameter const*);
|
||||
|
||||
protected:
|
||||
/**
|
||||
@ -191,7 +191,6 @@ class Predictor {
|
||||
* \brief Map of matrices and associated cached predictions to facilitate
|
||||
* storing and looking up predictions.
|
||||
*/
|
||||
|
||||
std::unordered_map<DMatrix*, PredictionCacheEntry> cache_;
|
||||
};
|
||||
|
||||
|
||||
@ -27,7 +27,7 @@ namespace xgboost {
|
||||
*/
|
||||
class TreeUpdater {
|
||||
protected:
|
||||
LearnerTrainParam const* tparam_;
|
||||
GenericParameter const* tparam_;
|
||||
|
||||
public:
|
||||
/*! \brief virtual destructor */
|
||||
@ -36,7 +36,7 @@ class TreeUpdater {
|
||||
* \brief Initialize the updater with given arguments.
|
||||
* \param args arguments to the objective function.
|
||||
*/
|
||||
virtual void Init(const std::vector<std::pair<std::string, std::string> >& args) = 0;
|
||||
virtual void Configure(const Args& args) = 0;
|
||||
/*!
|
||||
* \brief perform update to the tree models
|
||||
* \param gpair the gradient pair statistics of the data
|
||||
@ -65,11 +65,13 @@ class TreeUpdater {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual char const* Name() const = 0;
|
||||
|
||||
/*!
|
||||
* \brief Create a tree updater given name
|
||||
* \param name Name of the tree updater.
|
||||
*/
|
||||
static TreeUpdater* Create(const std::string& name, LearnerTrainParam const* tparam);
|
||||
static TreeUpdater* Create(const std::string& name, GenericParameter const* tparam);
|
||||
};
|
||||
|
||||
/*!
|
||||
|
||||
@ -24,82 +24,6 @@
|
||||
|
||||
|
||||
namespace xgboost {
|
||||
// booster wrapper for backward compatible reason.
|
||||
class Booster {
|
||||
public:
|
||||
explicit Booster(const std::vector<std::shared_ptr<DMatrix> >& cache_mats)
|
||||
: configured_(false),
|
||||
initialized_(false),
|
||||
learner_(Learner::Create(cache_mats)) {}
|
||||
|
||||
inline Learner* learner() { // NOLINT
|
||||
return learner_.get();
|
||||
}
|
||||
|
||||
inline void SetParam(const std::string& name, const std::string& val) {
|
||||
auto it = std::find_if(cfg_.begin(), cfg_.end(),
|
||||
[&name, &val](decltype(*cfg_.begin()) &x) {
|
||||
if (name == "eval_metric") {
|
||||
return x.first == name && x.second == val;
|
||||
}
|
||||
return x.first == name;
|
||||
});
|
||||
if (it == cfg_.end()) {
|
||||
cfg_.emplace_back(name, val);
|
||||
} else {
|
||||
(*it).second = val;
|
||||
}
|
||||
if (configured_) {
|
||||
learner_->Configure(cfg_);
|
||||
}
|
||||
}
|
||||
|
||||
inline void LazyInit() {
|
||||
if (!configured_) {
|
||||
LoadSavedParamFromAttr();
|
||||
learner_->Configure(cfg_);
|
||||
configured_ = true;
|
||||
}
|
||||
if (!initialized_) {
|
||||
learner_->InitModel();
|
||||
initialized_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
inline void LoadSavedParamFromAttr() {
|
||||
// Locate saved parameters from learner attributes
|
||||
const std::string prefix = "SAVED_PARAM_";
|
||||
for (const std::string& attr_name : learner_->GetAttrNames()) {
|
||||
if (attr_name.find(prefix) == 0) {
|
||||
const std::string saved_param = attr_name.substr(prefix.length());
|
||||
if (std::none_of(cfg_.begin(), cfg_.end(),
|
||||
[&](const std::pair<std::string, std::string>& x)
|
||||
{ return x.first == saved_param; })) {
|
||||
// If cfg_ contains the parameter already, skip it
|
||||
// (this is to allow the user to explicitly override its value)
|
||||
std::string saved_param_value;
|
||||
CHECK(learner_->GetAttr(attr_name, &saved_param_value));
|
||||
cfg_.emplace_back(saved_param, saved_param_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void LoadModel(dmlc::Stream* fi) {
|
||||
learner_->Load(fi);
|
||||
initialized_ = true;
|
||||
}
|
||||
|
||||
bool IsInitialized() const { return initialized_; }
|
||||
void Intialize() { initialized_ = true; }
|
||||
|
||||
private:
|
||||
bool configured_;
|
||||
bool initialized_;
|
||||
std::unique_ptr<Learner> learner_;
|
||||
std::vector<std::pair<std::string, std::string> > cfg_;
|
||||
};
|
||||
|
||||
// declare the data callback.
|
||||
XGB_EXTERN_C int XGBoostNativeDataIterSetData(
|
||||
void *handle, XGBoostBatchCSR batch);
|
||||
@ -861,14 +785,14 @@ XGB_DLL int XGBoosterCreate(const DMatrixHandle dmats[],
|
||||
for (xgboost::bst_ulong i = 0; i < len; ++i) {
|
||||
mats.push_back(*static_cast<std::shared_ptr<DMatrix>*>(dmats[i]));
|
||||
}
|
||||
*out = new Booster(mats);
|
||||
*out = Learner::Create(mats);
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGBoosterFree(BoosterHandle handle) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
delete static_cast<Booster*>(handle);
|
||||
delete static_cast<Learner*>(handle);
|
||||
API_END();
|
||||
}
|
||||
|
||||
@ -877,7 +801,7 @@ XGB_DLL int XGBoosterSetParam(BoosterHandle handle,
|
||||
const char *value) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
static_cast<Booster*>(handle)->SetParam(name, value);
|
||||
static_cast<Learner*>(handle)->SetParam(name, value);
|
||||
API_END();
|
||||
}
|
||||
|
||||
@ -886,12 +810,11 @@ XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle,
|
||||
DMatrixHandle dtrain) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto* bst = static_cast<Booster*>(handle);
|
||||
auto* bst = static_cast<Learner*>(handle);
|
||||
auto *dtr =
|
||||
static_cast<std::shared_ptr<DMatrix>*>(dtrain);
|
||||
|
||||
bst->LazyInit();
|
||||
bst->learner()->UpdateOneIter(iter, dtr->get());
|
||||
bst->UpdateOneIter(iter, dtr->get());
|
||||
API_END();
|
||||
}
|
||||
|
||||
@ -903,7 +826,7 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
|
||||
HostDeviceVector<GradientPair> tmp_gpair;
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto* bst = static_cast<Booster*>(handle);
|
||||
auto* bst = static_cast<Learner*>(handle);
|
||||
auto* dtr =
|
||||
static_cast<std::shared_ptr<DMatrix>*>(dtrain);
|
||||
tmp_gpair.Resize(len);
|
||||
@ -912,8 +835,7 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
|
||||
tmp_gpair_h[i] = GradientPair(grad[i], hess[i]);
|
||||
}
|
||||
|
||||
bst->LazyInit();
|
||||
bst->learner()->BoostOneIter(0, dtr->get(), &tmp_gpair);
|
||||
bst->BoostOneIter(0, dtr->get(), &tmp_gpair);
|
||||
API_END();
|
||||
}
|
||||
|
||||
@ -926,7 +848,7 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
|
||||
std::string& eval_str = XGBAPIThreadLocalStore::Get()->ret_str;
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto* bst = static_cast<Booster*>(handle);
|
||||
auto* bst = static_cast<Learner*>(handle);
|
||||
std::vector<DMatrix*> data_sets;
|
||||
std::vector<std::string> data_names;
|
||||
|
||||
@ -935,8 +857,7 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
|
||||
data_names.emplace_back(evnames[i]);
|
||||
}
|
||||
|
||||
bst->LazyInit();
|
||||
eval_str = bst->learner()->EvalOneIter(iter, data_sets, data_names);
|
||||
eval_str = bst->EvalOneIter(iter, data_sets, data_names);
|
||||
*out_str = eval_str.c_str();
|
||||
API_END();
|
||||
}
|
||||
@ -951,10 +872,9 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
|
||||
XGBAPIThreadLocalStore::Get()->ret_vec_float;
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto *bst = static_cast<Booster*>(handle);
|
||||
bst->LazyInit();
|
||||
auto *bst = static_cast<Learner*>(handle);
|
||||
HostDeviceVector<bst_float> tmp_preds;
|
||||
bst->learner()->Predict(
|
||||
bst->Predict(
|
||||
static_cast<std::shared_ptr<DMatrix>*>(dmat)->get(),
|
||||
(option_mask & 1) != 0,
|
||||
&tmp_preds, ntree_limit,
|
||||
@ -972,7 +892,7 @@ XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char* fname) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname, "r"));
|
||||
static_cast<Booster*>(handle)->LoadModel(fi.get());
|
||||
static_cast<Learner*>(handle)->Load(fi.get());
|
||||
API_END();
|
||||
}
|
||||
|
||||
@ -980,9 +900,8 @@ XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char* fname) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname, "w"));
|
||||
auto *bst = static_cast<Booster*>(handle);
|
||||
bst->LazyInit();
|
||||
bst->learner()->Save(fo.get());
|
||||
auto *bst = static_cast<Learner*>(handle);
|
||||
bst->Save(fo.get());
|
||||
API_END();
|
||||
}
|
||||
|
||||
@ -992,7 +911,7 @@ XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
common::MemoryFixSizeBuffer fs((void*)buf, len); // NOLINT(*)
|
||||
static_cast<Booster*>(handle)->LoadModel(&fs);
|
||||
static_cast<Learner*>(handle)->Load(&fs);
|
||||
API_END();
|
||||
}
|
||||
|
||||
@ -1005,9 +924,8 @@ XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle,
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
common::MemoryBufferStream fo(&raw_str);
|
||||
auto *bst = static_cast<Booster*>(handle);
|
||||
bst->LazyInit();
|
||||
bst->learner()->Save(&fo);
|
||||
auto *bst = static_cast<Learner*>(handle);
|
||||
bst->Save(&fo);
|
||||
*out_dptr = dmlc::BeginPtr(raw_str);
|
||||
*out_len = static_cast<xgboost::bst_ulong>(raw_str.length());
|
||||
API_END();
|
||||
@ -1022,9 +940,8 @@ inline void XGBoostDumpModelImpl(
|
||||
const char*** out_models) {
|
||||
std::vector<std::string>& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str;
|
||||
std::vector<const char*>& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp;
|
||||
auto *bst = static_cast<Booster*>(handle);
|
||||
bst->LazyInit();
|
||||
str_vecs = bst->learner()->DumpModel(fmap, with_stats != 0, format);
|
||||
auto *bst = static_cast<Learner*>(handle);
|
||||
str_vecs = bst->DumpModel(fmap, with_stats != 0, format);
|
||||
charp_vecs.resize(str_vecs.size());
|
||||
for (size_t i = 0; i < str_vecs.size(); ++i) {
|
||||
charp_vecs[i] = str_vecs[i].c_str();
|
||||
@ -1093,11 +1010,11 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle,
|
||||
const char* key,
|
||||
const char** out,
|
||||
int* success) {
|
||||
auto* bst = static_cast<Booster*>(handle);
|
||||
auto* bst = static_cast<Learner*>(handle);
|
||||
std::string& ret_str = XGBAPIThreadLocalStore::Get()->ret_str;
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
if (bst->learner()->GetAttr(key, &ret_str)) {
|
||||
if (bst->GetAttr(key, &ret_str)) {
|
||||
*out = ret_str.c_str();
|
||||
*success = 1;
|
||||
} else {
|
||||
@ -1110,13 +1027,13 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle,
|
||||
XGB_DLL int XGBoosterSetAttr(BoosterHandle handle,
|
||||
const char* key,
|
||||
const char* value) {
|
||||
auto* bst = static_cast<Booster*>(handle);
|
||||
auto* bst = static_cast<Learner*>(handle);
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
if (value == nullptr) {
|
||||
bst->learner()->DelAttr(key);
|
||||
bst->DelAttr(key);
|
||||
} else {
|
||||
bst->learner()->SetAttr(key, value);
|
||||
bst->SetAttr(key, value);
|
||||
}
|
||||
API_END();
|
||||
}
|
||||
@ -1126,10 +1043,10 @@ XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle,
|
||||
const char*** out) {
|
||||
std::vector<std::string>& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str;
|
||||
std::vector<const char*>& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp;
|
||||
auto *bst = static_cast<Booster*>(handle);
|
||||
auto *bst = static_cast<Learner*>(handle);
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
str_vecs = bst->learner()->GetAttrNames();
|
||||
str_vecs = bst->GetAttrNames();
|
||||
charp_vecs.resize(str_vecs.size());
|
||||
for (size_t i = 0; i < str_vecs.size(); ++i) {
|
||||
charp_vecs[i] = str_vecs[i].c_str();
|
||||
@ -1143,10 +1060,10 @@ XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle,
|
||||
int* version) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto* bst = static_cast<Booster*>(handle);
|
||||
*version = rabit::LoadCheckPoint(bst->learner());
|
||||
auto* bst = static_cast<Learner*>(handle);
|
||||
*version = rabit::LoadCheckPoint(bst);
|
||||
if (*version != 0) {
|
||||
bst->Intialize();
|
||||
bst->Configure();
|
||||
}
|
||||
API_END();
|
||||
}
|
||||
@ -1154,23 +1071,14 @@ XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle,
|
||||
XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto* bst = static_cast<Booster*>(handle);
|
||||
if (bst->learner()->AllowLazyCheckPoint()) {
|
||||
rabit::LazyCheckPoint(bst->learner());
|
||||
auto* bst = static_cast<Learner*>(handle);
|
||||
if (bst->AllowLazyCheckPoint()) {
|
||||
rabit::LazyCheckPoint(bst);
|
||||
} else {
|
||||
rabit::CheckPoint(bst->learner());
|
||||
rabit::CheckPoint(bst);
|
||||
}
|
||||
API_END();
|
||||
}
|
||||
|
||||
/* hidden method; only known to C++ test suite */
|
||||
const std::map<std::string, std::string>&
|
||||
QueryBoosterConfigurationArguments(BoosterHandle handle) {
|
||||
CHECK_HANDLE();
|
||||
auto* bst = static_cast<Booster*>(handle);
|
||||
bst->LazyInit();
|
||||
return bst->learner()->GetConfigurationArguments();
|
||||
}
|
||||
|
||||
// force link rabit
|
||||
static DMLC_ATTRIBUTE_UNUSED int XGBOOST_LINK_RABIT_C_API_ = RabitLinkTag();
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2014 by Contributors
|
||||
* Copyright 2014-2019 by Contributors
|
||||
* \file cli_main.cc
|
||||
* \brief The command line interface program of xgboost.
|
||||
* This file is not included in dynamic library.
|
||||
@ -188,10 +188,9 @@ void CLITrain(const CLIParam& param) {
|
||||
std::unique_ptr<dmlc::Stream> fi(
|
||||
dmlc::Stream::Create(param.model_in.c_str(), "r"));
|
||||
learner->Load(fi.get());
|
||||
learner->Configure(param.cfg);
|
||||
learner->SetParams(param.cfg);
|
||||
} else {
|
||||
learner->Configure(param.cfg);
|
||||
learner->InitModel();
|
||||
learner->SetParams(param.cfg);
|
||||
}
|
||||
}
|
||||
LOG(INFO) << "Loading data: " << dmlc::GetTime() - tstart_data_load << " sec";
|
||||
@ -275,7 +274,7 @@ void CLIDumpModel(const CLIParam& param) {
|
||||
std::unique_ptr<Learner> learner(Learner::Create({}));
|
||||
std::unique_ptr<dmlc::Stream> fi(
|
||||
dmlc::Stream::Create(param.model_in.c_str(), "r"));
|
||||
learner->Configure(param.cfg);
|
||||
learner->SetParams(param.cfg);
|
||||
learner->Load(fi.get());
|
||||
// dump data
|
||||
std::vector<std::string> dump = learner->DumpModel(
|
||||
@ -316,7 +315,7 @@ void CLIPredict(const CLIParam& param) {
|
||||
std::unique_ptr<dmlc::Stream> fi(
|
||||
dmlc::Stream::Create(param.model_in.c_str(), "r"));
|
||||
learner->Load(fi.get());
|
||||
learner->Configure(param.cfg);
|
||||
learner->SetParams(param.cfg);
|
||||
|
||||
LOG(INFO) << "start prediction...";
|
||||
HostDeviceVector<bst_float> preds;
|
||||
|
||||
@ -402,7 +402,7 @@ struct GPUSketcher {
|
||||
|
||||
void SketchBatch(const SparsePage &batch, const MetaInfo &info) {
|
||||
GPUDistribution dist =
|
||||
GPUDistribution::Block(GPUSet::All(learner_param_.gpu_id, learner_param_.n_gpus,
|
||||
GPUDistribution::Block(GPUSet::All(generic_param_.gpu_id, generic_param_.n_gpus,
|
||||
batch.Size()));
|
||||
|
||||
// create device shards
|
||||
@ -429,8 +429,8 @@ struct GPUSketcher {
|
||||
}
|
||||
}
|
||||
|
||||
GPUSketcher(const tree::TrainParam ¶m, const LearnerTrainParam &learner_param, int gpu_nrows)
|
||||
: param_(param), learner_param_(learner_param), gpu_batch_nrows_(gpu_nrows), row_stride_(0) {
|
||||
GPUSketcher(const tree::TrainParam ¶m, const GenericParameter &generic_param, int gpu_nrows)
|
||||
: param_(param), generic_param_(generic_param), gpu_batch_nrows_(gpu_nrows), row_stride_(0) {
|
||||
}
|
||||
|
||||
/* Builds the sketches on the GPU for the dmatrix and returns the row stride
|
||||
@ -452,14 +452,14 @@ struct GPUSketcher {
|
||||
private:
|
||||
std::vector<std::unique_ptr<DeviceShard>> shards_;
|
||||
const tree::TrainParam ¶m_;
|
||||
const LearnerTrainParam &learner_param_;
|
||||
const GenericParameter &generic_param_;
|
||||
int gpu_batch_nrows_;
|
||||
size_t row_stride_;
|
||||
std::unique_ptr<SketchContainer> sketch_container_;
|
||||
};
|
||||
|
||||
size_t DeviceSketch
|
||||
(const tree::TrainParam ¶m, const LearnerTrainParam &learner_param, int gpu_batch_nrows,
|
||||
(const tree::TrainParam ¶m, const GenericParameter &learner_param, int gpu_batch_nrows,
|
||||
DMatrix *dmat, HistogramCuts *hmat) {
|
||||
GPUSketcher sketcher(param, learner_param, gpu_batch_nrows);
|
||||
// We only need to return the result in HistogramCuts container, so it is safe to
|
||||
|
||||
@ -291,7 +291,7 @@ class DenseCuts : public CutsBuilder {
|
||||
* \return The row stride across the entire dataset.
|
||||
*/
|
||||
size_t DeviceSketch
|
||||
(const tree::TrainParam& param, const LearnerTrainParam &learner_param, int gpu_batch_nrows,
|
||||
(const tree::TrainParam& param, const GenericParameter &learner_param, int gpu_batch_nrows,
|
||||
DMatrix* dmat, HistogramCuts* hmat);
|
||||
|
||||
|
||||
|
||||
@ -57,13 +57,13 @@ class GBLinear : public GradientBooster {
|
||||
cache_[d.get()] = std::move(e);
|
||||
}
|
||||
}
|
||||
void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
|
||||
void Configure(const Args& cfg) override {
|
||||
if (model_.weight.size() == 0) {
|
||||
model_.param.InitAllowUnknown(cfg);
|
||||
}
|
||||
param_.InitAllowUnknown(cfg);
|
||||
updater_.reset(LinearUpdater::Create(param_.updater, learner_param_));
|
||||
updater_->Init(cfg);
|
||||
updater_->Configure(cfg);
|
||||
monitor_.Init("GBLinear");
|
||||
}
|
||||
void Load(dmlc::Stream* fi) override {
|
||||
|
||||
@ -13,7 +13,7 @@ DMLC_REGISTRY_ENABLE(::xgboost::GradientBoosterReg);
|
||||
namespace xgboost {
|
||||
GradientBooster* GradientBooster::Create(
|
||||
const std::string& name,
|
||||
LearnerTrainParam const* learner_param,
|
||||
GenericParameter const* learner_param,
|
||||
const std::vector<std::shared_ptr<DMatrix> >& cache_mats,
|
||||
bst_float base_margin) {
|
||||
auto *e = ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->Find(name);
|
||||
|
||||
@ -32,12 +32,9 @@ namespace gbm {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(gbtree);
|
||||
|
||||
void GBTree::Configure(const std::vector<std::pair<std::string, std::string> >& cfg) {
|
||||
void GBTree::Configure(const Args& cfg) {
|
||||
this->cfg_ = cfg;
|
||||
tparam_.InitAllowUnknown(cfg);
|
||||
std::string updater_seq = tparam_.updater_seq;
|
||||
|
||||
ConfigureUpdaters({cfg.begin(), cfg.cend()});
|
||||
|
||||
model_.Configure(cfg);
|
||||
|
||||
@ -46,15 +43,46 @@ void GBTree::Configure(const std::vector<std::pair<std::string, std::string> >&
|
||||
model_.InitTreesToUpdate();
|
||||
}
|
||||
|
||||
// configure predictor
|
||||
predictor_ = std::unique_ptr<Predictor>(
|
||||
Predictor::Create(tparam_.predictor, this->learner_param_));
|
||||
predictor_->Init(cfg, cache_);
|
||||
// configure predictors
|
||||
if (!cpu_predictor_) {
|
||||
cpu_predictor_ = std::unique_ptr<Predictor>(
|
||||
Predictor::Create("cpu_predictor", this->learner_param_));
|
||||
}
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
if (!gpu_predictor_) {
|
||||
gpu_predictor_ = std::unique_ptr<Predictor>(
|
||||
Predictor::Create("gpu_predictor", this->learner_param_));
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
monitor_.Init("GBTree");
|
||||
|
||||
configured_ = true;
|
||||
}
|
||||
|
||||
void GBTree::PerformTreeMethodHeuristic(DMatrix* p_train,
|
||||
std::map<std::string, std::string> cfg) {
|
||||
// FIXME(trivialfis): This handles updaters and predictor. Because the choice of updaters
|
||||
// depends on whether external memory is used and how large is dataset. We can remove the
|
||||
// dependency on DMatrix once `hist` tree method can handle external memory so that we can
|
||||
// make it default.
|
||||
void GBTree::ConfigureWithKnownData(std::map<std::string, std::string> const& cfg, DMatrix* fmat) {
|
||||
std::string updater_seq = tparam_.updater_seq;
|
||||
tparam_.InitAllowUnknown(cfg);
|
||||
this->PerformTreeMethodHeuristic({this->cfg_.begin(), this->cfg_.end()}, fmat);
|
||||
this->ConfigureUpdaters({this->cfg_.begin(), this->cfg_.end()});
|
||||
LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
|
||||
// initialize the updaters only when needed.
|
||||
if (updater_seq != tparam_.updater_seq) {
|
||||
this->updaters_.clear();
|
||||
}
|
||||
this->InitUpdater();
|
||||
cpu_predictor_->Configure({cfg.cbegin(), cfg.cend()}, cache_);
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
gpu_predictor_->Configure({cfg.cbegin(), cfg.cend()}, cache_);
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
void GBTree::PerformTreeMethodHeuristic(std::map<std::string, std::string> const& cfg,
|
||||
DMatrix* fmat) {
|
||||
if (cfg.find("updater") != cfg.cend()) {
|
||||
// This method is disabled when `updater` parameter is explicitly
|
||||
// set, since only experts are expected to do so.
|
||||
@ -71,11 +99,11 @@ void GBTree::PerformTreeMethodHeuristic(DMatrix* p_train,
|
||||
"Tree method is automatically selected to be 'approx' "
|
||||
"for distributed training.";
|
||||
tparam_.tree_method = TreeMethod::kApprox;
|
||||
} else if (!p_train->SingleColBlock()) {
|
||||
} else if (!fmat->SingleColBlock()) {
|
||||
LOG(WARNING) << "Tree method is automatically set to 'approx' "
|
||||
"since external-memory data matrix is used.";
|
||||
tparam_.tree_method = TreeMethod::kApprox;
|
||||
} else if (p_train->Info().num_row_ >= (4UL << 20UL)) {
|
||||
} else if (fmat->Info().num_row_ >= (4UL << 20UL)) {
|
||||
/* Choose tree_method='approx' automatically for large data matrix */
|
||||
LOG(WARNING) << "Tree method is automatically selected to be "
|
||||
"'approx' for faster speed. To use old behavior "
|
||||
@ -141,17 +169,9 @@ void GBTree::ConfigureUpdaters(const std::map<std::string, std::string>& cfg) {
|
||||
void GBTree::DoBoost(DMatrix* p_fmat,
|
||||
HostDeviceVector<GradientPair>* in_gpair,
|
||||
ObjFunction* obj) {
|
||||
std::string updater_seq = tparam_.updater_seq;
|
||||
this->PerformTreeMethodHeuristic(p_fmat, {this->cfg_.begin(), this->cfg_.end()});
|
||||
this->ConfigureUpdaters({this->cfg_.begin(), this->cfg_.end()});
|
||||
LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
|
||||
// initialize the updaters only when needed.
|
||||
if (updater_seq != tparam_.updater_seq) {
|
||||
this->updaters_.clear();
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
|
||||
const int ngroup = model_.param.num_output_group;
|
||||
ConfigureWithKnownData({this->cfg_.cbegin(), this->cfg_.cend()}, p_fmat);
|
||||
monitor_.Start("BoostNewTrees");
|
||||
if (ngroup == 1) {
|
||||
std::vector<std::unique_ptr<RegTree> > ret;
|
||||
@ -189,7 +209,7 @@ void GBTree::InitUpdater() {
|
||||
std::vector<std::string> ups = common::Split(tval, ',');
|
||||
for (const std::string& pstr : ups) {
|
||||
std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str(), learner_param_));
|
||||
up->Init(this->cfg_);
|
||||
up->Configure(this->cfg_);
|
||||
updaters_.push_back(std::move(up));
|
||||
}
|
||||
}
|
||||
@ -198,7 +218,6 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
|
||||
DMatrix *p_fmat,
|
||||
int bst_group,
|
||||
std::vector<std::unique_ptr<RegTree> >* ret) {
|
||||
this->InitUpdater();
|
||||
std::vector<RegTree*> new_trees;
|
||||
ret->clear();
|
||||
// create the trees
|
||||
@ -230,7 +249,8 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
|
||||
num_new_trees += new_trees[gid].size();
|
||||
model_.CommitModel(std::move(new_trees[gid]), gid);
|
||||
}
|
||||
predictor_->UpdatePredictionCache(model_, &updaters_, num_new_trees);
|
||||
CHECK(configured_);
|
||||
GetPredictor()->UpdatePredictionCache(model_, &updaters_, num_new_trees);
|
||||
}
|
||||
|
||||
|
||||
@ -239,7 +259,7 @@ class Dart : public GBTree {
|
||||
public:
|
||||
explicit Dart(bst_float base_margin) : GBTree(base_margin) {}
|
||||
|
||||
void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
|
||||
void Configure(const Args& cfg) override {
|
||||
GBTree::Configure(cfg);
|
||||
if (model_.trees.size() == 0) {
|
||||
dparam_.InitAllowUnknown(cfg);
|
||||
|
||||
@ -154,13 +154,15 @@ class GBTree : public GradientBooster {
|
||||
#endif // XGBOOST_USE_CUDA
|
||||
}
|
||||
|
||||
void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override;
|
||||
void Configure(const Args& cfg) override;
|
||||
// Revise `tree_method` and `updater` parameters after seeing the training
|
||||
// data matrix
|
||||
void PerformTreeMethodHeuristic(DMatrix* p_train,
|
||||
std::map<std::string, std::string> cfg);
|
||||
void PerformTreeMethodHeuristic(std::map<std::string, std::string> const& cfg,
|
||||
DMatrix* fmat);
|
||||
/*! \brief Map `tree_method` parameter to `updater` parameter */
|
||||
void ConfigureUpdaters(const std::map<std::string, std::string>& cfg);
|
||||
void ConfigureWithKnownData(std::map<std::string, std::string> const& cfg, DMatrix* fmat);
|
||||
|
||||
/*! \brief Carry out one iteration of boosting */
|
||||
void DoBoost(DMatrix* p_fmat,
|
||||
HostDeviceVector<GradientPair>* in_gpair,
|
||||
@ -197,34 +199,39 @@ class GBTree : public GradientBooster {
|
||||
void PredictBatch(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_float>* out_preds,
|
||||
unsigned ntree_limit) override {
|
||||
predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
|
||||
CHECK(configured_);
|
||||
GetPredictor()->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
|
||||
}
|
||||
|
||||
void PredictInstance(const SparsePage::Inst& inst,
|
||||
std::vector<bst_float>* out_preds,
|
||||
unsigned ntree_limit,
|
||||
unsigned root_index) override {
|
||||
predictor_->PredictInstance(inst, out_preds, model_,
|
||||
CHECK(configured_);
|
||||
cpu_predictor_->PredictInstance(inst, out_preds, model_,
|
||||
ntree_limit, root_index);
|
||||
}
|
||||
|
||||
void PredictLeaf(DMatrix* p_fmat,
|
||||
std::vector<bst_float>* out_preds,
|
||||
unsigned ntree_limit) override {
|
||||
predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
|
||||
CHECK(configured_);
|
||||
cpu_predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
|
||||
}
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
unsigned ntree_limit, bool approximate, int condition,
|
||||
unsigned condition_feature) override {
|
||||
predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
|
||||
CHECK(configured_);
|
||||
cpu_predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
|
||||
}
|
||||
|
||||
void PredictInteractionContributions(DMatrix* p_fmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
unsigned ntree_limit, bool approximate) override {
|
||||
predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
|
||||
CHECK(configured_);
|
||||
cpu_predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
|
||||
ntree_limit, approximate);
|
||||
}
|
||||
|
||||
@ -244,6 +251,25 @@ class GBTree : public GradientBooster {
|
||||
int bst_group,
|
||||
std::vector<std::unique_ptr<RegTree> >* ret);
|
||||
|
||||
std::unique_ptr<Predictor> const& GetPredictor() const {
|
||||
CHECK(configured_);
|
||||
if (tparam_.predictor == "cpu_predictor") {
|
||||
CHECK(cpu_predictor_);
|
||||
return cpu_predictor_;
|
||||
} else if (tparam_.predictor == "gpu_predictor") {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
CHECK(gpu_predictor_);
|
||||
return gpu_predictor_;
|
||||
#else
|
||||
LOG(FATAL) << "XGBoost is not compiled with CUDA support.";
|
||||
return cpu_predictor_;
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} else {
|
||||
LOG(FATAL) << "Unknown predictor: " << tparam_.predictor;
|
||||
return cpu_predictor_;
|
||||
}
|
||||
}
|
||||
|
||||
// commit new trees all at once
|
||||
virtual void CommitModel(
|
||||
std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees);
|
||||
@ -253,13 +279,17 @@ class GBTree : public GradientBooster {
|
||||
// training parameter
|
||||
GBTreeTrainParam tparam_;
|
||||
// ----training fields----
|
||||
bool configured_ {false};
|
||||
// configurations for tree
|
||||
std::vector<std::pair<std::string, std::string> > cfg_;
|
||||
Args cfg_;
|
||||
// the updaters that can be applied to each of tree
|
||||
std::vector<std::unique_ptr<TreeUpdater>> updaters_;
|
||||
// Cached matrices
|
||||
std::vector<std::shared_ptr<DMatrix>> cache_;
|
||||
std::unique_ptr<Predictor> predictor_;
|
||||
std::unique_ptr<Predictor> cpu_predictor_;
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
std::unique_ptr<Predictor> gpu_predictor_;
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
common::Monitor monitor_;
|
||||
};
|
||||
|
||||
|
||||
@ -63,7 +63,7 @@ struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
|
||||
|
||||
struct GBTreeModel {
|
||||
explicit GBTreeModel(bst_float base_margin) : base_margin(base_margin) {}
|
||||
void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) {
|
||||
void Configure(const Args& cfg) {
|
||||
// initialize model parameters if not yet been initialized.
|
||||
if (trees.size() == 0) {
|
||||
param.InitAllowUnknown(cfg);
|
||||
|
||||
409
src/learner.cc
409
src/learner.cc
@ -6,6 +6,7 @@
|
||||
*/
|
||||
#include <dmlc/io.h>
|
||||
#include <dmlc/timer.h>
|
||||
#include <dmlc/any.h>
|
||||
#include <xgboost/feature_map.h>
|
||||
#include <xgboost/learner.h>
|
||||
#include <xgboost/logging.h>
|
||||
@ -50,18 +51,21 @@ inline std::string RenderParamVal(const std::string& str) {
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
enum class DataSplitMode : int {
|
||||
kAuto = 0, kCol = 1, kRow = 2
|
||||
};
|
||||
} // namespace xgboost
|
||||
|
||||
DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);
|
||||
|
||||
namespace xgboost {
|
||||
// implementation of base learner.
|
||||
bool Learner::AllowLazyCheckPoint() const {
|
||||
return gbm_->AllowLazyCheckPoint();
|
||||
}
|
||||
|
||||
std::vector<std::string> Learner::DumpModel(const FeatureMap& fmap,
|
||||
bool with_stats,
|
||||
std::string format) const {
|
||||
return gbm_->DumpModel(fmap, with_stats, format);
|
||||
}
|
||||
|
||||
/*! \brief training parameter for regression */
|
||||
struct LearnerModelParam : public dmlc::Parameter<LearnerModelParam> {
|
||||
/* \brief global bias */
|
||||
@ -97,9 +101,39 @@ struct LearnerModelParam : public dmlc::Parameter<LearnerModelParam> {
|
||||
}
|
||||
};
|
||||
|
||||
struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
|
||||
// data split mode, can be row, col, or none.
|
||||
DataSplitMode dsplit;
|
||||
// flag to disable default metric
|
||||
int disable_default_eval_metric;
|
||||
|
||||
std::string booster;
|
||||
std::string objective;
|
||||
|
||||
// declare parameters
|
||||
DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
|
||||
DMLC_DECLARE_FIELD(dsplit)
|
||||
.set_default(DataSplitMode::kAuto)
|
||||
.add_enum("auto", DataSplitMode::kAuto)
|
||||
.add_enum("col", DataSplitMode::kCol)
|
||||
.add_enum("row", DataSplitMode::kRow)
|
||||
.describe("Data split mode for distributed training.");
|
||||
DMLC_DECLARE_FIELD(disable_default_eval_metric)
|
||||
.set_default(0)
|
||||
.describe("flag to disable default metric. Set to >0 to disable");
|
||||
DMLC_DECLARE_FIELD(booster)
|
||||
.set_default("gbtree")
|
||||
.describe("Gradient booster used for training.");
|
||||
DMLC_DECLARE_FIELD(objective)
|
||||
.set_default("reg:squarederror")
|
||||
.describe("Objective function used for obtaining gradient.");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
DMLC_REGISTER_PARAMETER(LearnerModelParam);
|
||||
DMLC_REGISTER_PARAMETER(LearnerTrainParam);
|
||||
DMLC_REGISTER_PARAMETER(GenericParameter);
|
||||
|
||||
/*!
|
||||
* \brief learner that performs gradient boosting for a specific objective
|
||||
@ -108,56 +142,20 @@ DMLC_REGISTER_PARAMETER(LearnerTrainParam);
|
||||
class LearnerImpl : public Learner {
|
||||
public:
|
||||
explicit LearnerImpl(std::vector<std::shared_ptr<DMatrix> > cache)
|
||||
: cache_(std::move(cache)) {
|
||||
// boosted tree
|
||||
name_obj_ = "reg:squarederror";
|
||||
name_gbm_ = "gbtree";
|
||||
}
|
||||
|
||||
void ConfigureObjective() {
|
||||
if (cfg_.count("num_class") != 0) {
|
||||
cfg_["num_output_group"] = cfg_["num_class"];
|
||||
if (atoi(cfg_["num_class"].c_str()) > 1 && cfg_.count("objective") == 0) {
|
||||
cfg_["objective"] = "multi:softmax";
|
||||
}
|
||||
}
|
||||
|
||||
if (cfg_.find("max_delta_step") == cfg_.cend() &&
|
||||
cfg_.find("objective") != cfg_.cend() &&
|
||||
cfg_["objective"] == "count:poisson") {
|
||||
cfg_["max_delta_step"] = kMaxDeltaStepDefaultValue;
|
||||
}
|
||||
|
||||
if (cfg_.count("objective") == 0) {
|
||||
cfg_["objective"] = "reg:squarederror";
|
||||
}
|
||||
}
|
||||
|
||||
: configured_{false}, cache_(std::move(cache)) {}
|
||||
// Configuration before data is known.
|
||||
void Configure(
|
||||
const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
// add to configurations
|
||||
tparam_.InitAllowUnknown(args);
|
||||
ConsoleLogger::Configure(args.cbegin(), args.cend());
|
||||
void Configure() override {
|
||||
if (configured_) { return; }
|
||||
monitor_.Init("Learner");
|
||||
cfg_.clear();
|
||||
monitor_.Start("Configure");
|
||||
auto old_tparam = tparam_;
|
||||
Args args = {cfg_.cbegin(), cfg_.cend()};
|
||||
|
||||
for (const auto& kv : args) {
|
||||
if (kv.first == "eval_metric") {
|
||||
// check duplication
|
||||
auto dup_check = [&kv](const std::unique_ptr<Metric>& m) {
|
||||
return m->Name() != kv.second;
|
||||
};
|
||||
if (std::all_of(metrics_.begin(), metrics_.end(), dup_check)) {
|
||||
metrics_.emplace_back(Metric::Create(kv.second, &tparam_));
|
||||
mparam_.contain_eval_metrics = 1;
|
||||
}
|
||||
} else {
|
||||
cfg_[kv.first] = kv.second;
|
||||
}
|
||||
}
|
||||
if (tparam_.nthread != 0) {
|
||||
omp_set_num_threads(tparam_.nthread);
|
||||
tparam_.InitAllowUnknown(args);
|
||||
generic_param_.InitAllowUnknown(args);
|
||||
ConsoleLogger::Configure(args);
|
||||
if (generic_param_.nthread != 0) {
|
||||
omp_set_num_threads(generic_param_.nthread);
|
||||
}
|
||||
|
||||
// add additional parameters
|
||||
@ -166,54 +164,34 @@ class LearnerImpl : public Learner {
|
||||
tparam_.dsplit = DataSplitMode::kRow;
|
||||
}
|
||||
|
||||
ConfigureObjective();
|
||||
name_gbm_ = tparam_.booster;
|
||||
|
||||
// FIXME(trivialfis): So which one should go first? Init or Configure?
|
||||
if (!this->ModelInitialized()) {
|
||||
mparam_.InitAllowUnknown(args);
|
||||
name_obj_ = cfg_["objective"];
|
||||
// set seed only before the model is initialized
|
||||
common::GlobalRandom().seed(tparam_.seed);
|
||||
common::GlobalRandom().seed(generic_param_.seed);
|
||||
// must precede configure gbm since num_features is required for gbm
|
||||
this->ConfigureNumFeatures();
|
||||
args = {cfg_.cbegin(), cfg_.cend()}; // renew
|
||||
this->ConfigureObjective(old_tparam, &args);
|
||||
this->ConfigureGBM(old_tparam, args);
|
||||
this->ConfigureMetrics(args);
|
||||
|
||||
this->configured_ = true;
|
||||
monitor_.Stop("Configure");
|
||||
}
|
||||
|
||||
// set number of features correctly.
|
||||
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
|
||||
cfg_["num_class"] = common::ToString(mparam_.num_class);
|
||||
|
||||
if (gbm_ != nullptr) {
|
||||
gbm_->Configure(cfg_.begin(), cfg_.end());
|
||||
}
|
||||
if (obj_ != nullptr) {
|
||||
obj_->Configure(cfg_.begin(), cfg_.end());
|
||||
}
|
||||
|
||||
for (auto& p_metric : metrics_) {
|
||||
p_metric->Configure(cfg_.begin(), cfg_.end());
|
||||
}
|
||||
}
|
||||
|
||||
void InitModel() override { this->LazyInitModel(); }
|
||||
|
||||
// Configuration can only be done after data is known
|
||||
void ConfigurationWithKnownData(DMatrix* dmat) {
|
||||
CHECK(ModelInitialized())
|
||||
<< " Internal Error: Always call InitModel or Load before any evaluation.";
|
||||
this->ValidateDMatrix(dmat);
|
||||
CHECK(this->gbm_) << " Internal: GBM is not set";
|
||||
if (this->gbm_->UseGPU()) {
|
||||
if (cfg_.find("n_gpus") == cfg_.cend()) {
|
||||
tparam_.n_gpus = 1;
|
||||
}
|
||||
if (tparam_.n_gpus != 1) {
|
||||
LOG(WARNING) << "Multi-GPU training is deprecated. "
|
||||
"Please use distributed GPU training with one process per GPU.";
|
||||
void CheckDataSplitMode() {
|
||||
if (rabit::IsDistributed()) {
|
||||
CHECK(tparam_.dsplit != DataSplitMode::kAuto)
|
||||
<< "Precondition violated; dsplit cannot be 'auto' in distributed mode";
|
||||
if (tparam_.dsplit == DataSplitMode::kCol) {
|
||||
// 'distcol' updater hidden until it becomes functional again
|
||||
// See discussion at https://github.com/dmlc/xgboost/issues/1832
|
||||
LOG(FATAL) << "Column-wise data split is currently not supported.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Load(dmlc::Stream* fi) override {
|
||||
tparam_ = LearnerTrainParam();
|
||||
generic_param_.InitAllowUnknown(Args{});
|
||||
tparam_.Init(std::vector<std::pair<std::string, std::string>>{});
|
||||
// TODO(tqchen) mark deprecation of old format.
|
||||
common::PeekableInStream fp(fi);
|
||||
@ -244,15 +222,15 @@ class LearnerImpl : public Learner {
|
||||
len = len >> static_cast<uint64_t>(32UL);
|
||||
}
|
||||
if (len != 0) {
|
||||
name_obj_.resize(len);
|
||||
CHECK_EQ(fi->Read(&name_obj_[0], len), len)
|
||||
tparam_.objective.resize(len);
|
||||
CHECK_EQ(fi->Read(&tparam_.objective[0], len), len)
|
||||
<< "BoostLearner: wrong model format";
|
||||
}
|
||||
}
|
||||
CHECK(fi->Read(&name_gbm_)) << "BoostLearner: wrong model format";
|
||||
CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
|
||||
// duplicated code with LazyInitModel
|
||||
obj_.reset(ObjFunction::Create(name_obj_, &tparam_));
|
||||
gbm_.reset(GradientBooster::Create(name_gbm_, &tparam_,
|
||||
obj_.reset(ObjFunction::Create(tparam_.objective, &generic_param_));
|
||||
gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_param_,
|
||||
cache_, mparam_.base_score));
|
||||
gbm_->Load(fi);
|
||||
if (mparam_.contain_extra_attrs != 0) {
|
||||
@ -287,17 +265,17 @@ class LearnerImpl : public Learner {
|
||||
kv.second = "cpu_predictor";
|
||||
}
|
||||
#endif // XGBOOST_USE_CUDA
|
||||
// NO visiable GPU on current environment
|
||||
// NO visible GPU in current environment
|
||||
if (is_gpu_predictor && GPUSet::AllVisible().Size() == 0) {
|
||||
cfg_["predictor"] = "cpu_predictor";
|
||||
kv.second = "cpu_predictor";
|
||||
LOG(INFO) << "Switch gpu_predictor to cpu_predictor.";
|
||||
}
|
||||
}
|
||||
}
|
||||
attributes_ =
|
||||
std::map<std::string, std::string>(attr.begin(), attr.end());
|
||||
attributes_ = std::map<std::string, std::string>(attr.begin(), attr.end());
|
||||
}
|
||||
if (name_obj_ == "count:poisson") {
|
||||
if (tparam_.objective == "count:poisson") {
|
||||
std::string max_delta_step;
|
||||
fi->Read(&max_delta_step);
|
||||
cfg_["max_delta_step"] = max_delta_step;
|
||||
@ -306,26 +284,41 @@ class LearnerImpl : public Learner {
|
||||
std::vector<std::string> metr;
|
||||
fi->Read(&metr);
|
||||
for (auto name : metr) {
|
||||
metrics_.emplace_back(
|
||||
Metric::Create(name, &tparam_));
|
||||
metrics_.emplace_back(Metric::Create(name, &generic_param_));
|
||||
}
|
||||
}
|
||||
cfg_["num_class"] = common::ToString(mparam_.num_class);
|
||||
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
|
||||
obj_->Configure(cfg_.begin(), cfg_.end());
|
||||
|
||||
gbm_->Configure({cfg_.cbegin(), cfg_.cend()});
|
||||
obj_->Configure({cfg_.begin(), cfg_.end()});
|
||||
|
||||
for (auto& p_metric : metrics_) {
|
||||
p_metric->Configure(cfg_.begin(), cfg_.end());
|
||||
p_metric->Configure({cfg_.begin(), cfg_.end()});
|
||||
}
|
||||
|
||||
this->configured_ = true;
|
||||
}
|
||||
|
||||
// rabit save model to rabit checkpoint
|
||||
void Save(dmlc::Stream* fo) const override {
|
||||
if (!this->configured_) {
|
||||
// Save empty model. Calling Configure in a dummy LearnerImpl avoids violating
|
||||
// constness.
|
||||
LearnerImpl empty(std::move(this->cache_));
|
||||
empty.SetParams({this->cfg_.cbegin(), this->cfg_.cend()});
|
||||
for (auto const& kv : attributes_) {
|
||||
empty.SetAttr(kv.first, kv.second);
|
||||
}
|
||||
empty.Configure();
|
||||
empty.Save(fo);
|
||||
return;
|
||||
}
|
||||
|
||||
LearnerModelParam mparam = mparam_; // make a copy to potentially modify
|
||||
std::vector<std::pair<std::string, std::string> > extra_attr;
|
||||
// extra attributed to be added just before saving
|
||||
|
||||
if (name_obj_ == "count:poisson") {
|
||||
if (tparam_.objective == "count:poisson") {
|
||||
auto it = cfg_.find("max_delta_step");
|
||||
if (it != cfg_.end()) {
|
||||
// write `max_delta_step` parameter as extra attribute of booster
|
||||
@ -345,8 +338,8 @@ class LearnerImpl : public Learner {
|
||||
}
|
||||
}
|
||||
fo->Write(&mparam, sizeof(LearnerModelParam));
|
||||
fo->Write(name_obj_);
|
||||
fo->Write(name_gbm_);
|
||||
fo->Write(tparam_.objective);
|
||||
fo->Write(tparam_.booster);
|
||||
gbm_->Save(fo);
|
||||
if (mparam.contain_extra_attrs != 0) {
|
||||
std::map<std::string, std::string> attr(attributes_);
|
||||
@ -356,7 +349,7 @@ class LearnerImpl : public Learner {
|
||||
fo->Write(std::vector<std::pair<std::string, std::string>>(
|
||||
attr.begin(), attr.end()));
|
||||
}
|
||||
if (name_obj_ == "count:poisson") {
|
||||
if (tparam_.objective == "count:poisson") {
|
||||
auto it = cfg_.find("max_delta_step");
|
||||
if (it != cfg_.end()) {
|
||||
fo->Write(it->second);
|
||||
@ -377,27 +370,21 @@ class LearnerImpl : public Learner {
|
||||
}
|
||||
}
|
||||
|
||||
void CheckDataSplitMode() {
|
||||
if (rabit::IsDistributed()) {
|
||||
CHECK(tparam_.dsplit != DataSplitMode::kAuto)
|
||||
<< "Precondition violated; dsplit cannot be 'auto' in distributed mode";
|
||||
if (tparam_.dsplit == DataSplitMode::kCol) {
|
||||
// 'distcol' updater hidden until it becomes functional again
|
||||
// See discussion at https://github.com/dmlc/xgboost/issues/1832
|
||||
LOG(FATAL) << "Column-wise data split is currently not supported.";
|
||||
}
|
||||
}
|
||||
std::vector<std::string> DumpModel(const FeatureMap& fmap,
|
||||
bool with_stats,
|
||||
std::string format) const override {
|
||||
return gbm_->DumpModel(fmap, with_stats, format);
|
||||
}
|
||||
|
||||
void UpdateOneIter(int iter, DMatrix* train) override {
|
||||
monitor_.Start("UpdateOneIter");
|
||||
|
||||
if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
|
||||
common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
|
||||
if (generic_param_.seed_per_iteration || rabit::IsDistributed()) {
|
||||
common::GlobalRandom().seed(generic_param_.seed * kRandSeedMagic + iter);
|
||||
}
|
||||
// this->PerformTreeMethodHeuristic(train);
|
||||
this->Configure();
|
||||
this->CheckDataSplitMode();
|
||||
this->ConfigurationWithKnownData(train);
|
||||
this->ValidateDMatrix(train);
|
||||
|
||||
monitor_.Start("PredictRaw");
|
||||
this->PredictRaw(train, &preds_[train]);
|
||||
@ -412,12 +399,12 @@ class LearnerImpl : public Learner {
|
||||
void BoostOneIter(int iter, DMatrix* train,
|
||||
HostDeviceVector<GradientPair>* in_gpair) override {
|
||||
monitor_.Start("BoostOneIter");
|
||||
if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
|
||||
common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
|
||||
if (generic_param_.seed_per_iteration || rabit::IsDistributed()) {
|
||||
common::GlobalRandom().seed(generic_param_.seed * kRandSeedMagic + iter);
|
||||
}
|
||||
this->Configure();
|
||||
this->CheckDataSplitMode();
|
||||
// this->PerformTreeMethodHeuristic(train);
|
||||
this->ConfigurationWithKnownData(train);
|
||||
this->ValidateDMatrix(train);
|
||||
|
||||
gbm_->DoBoost(train, in_gpair);
|
||||
monitor_.Stop("BoostOneIter");
|
||||
@ -426,16 +413,17 @@ class LearnerImpl : public Learner {
|
||||
std::string EvalOneIter(int iter, const std::vector<DMatrix*>& data_sets,
|
||||
const std::vector<std::string>& data_names) override {
|
||||
monitor_.Start("EvalOneIter");
|
||||
this->Configure();
|
||||
|
||||
std::ostringstream os;
|
||||
os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
|
||||
if (metrics_.size() == 0 && tparam_.disable_default_eval_metric <= 0) {
|
||||
metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &tparam_));
|
||||
metrics_.back()->Configure(cfg_.begin(), cfg_.end());
|
||||
metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &generic_param_));
|
||||
metrics_.back()->Configure({cfg_.begin(), cfg_.end()});
|
||||
}
|
||||
for (size_t i = 0; i < data_sets.size(); ++i) {
|
||||
DMatrix * dmat = data_sets[i];
|
||||
this->ConfigurationWithKnownData(dmat);
|
||||
this->ValidateDMatrix(dmat);
|
||||
this->PredictRaw(data_sets[i], &preds_[dmat]);
|
||||
obj_->EvalTransform(&preds_[dmat]);
|
||||
for (auto& ev : metrics_) {
|
||||
@ -449,6 +437,25 @@ class LearnerImpl : public Learner {
|
||||
return os.str();
|
||||
}
|
||||
|
||||
void SetParam(const std::string& key, const std::string& value) override {
|
||||
configured_ = false;
|
||||
if (key == kEvalMetric) {
|
||||
if (std::find(metric_names_.cbegin(), metric_names_.cend(),
|
||||
value) == metric_names_.cend()) {
|
||||
metric_names_.emplace_back(value);
|
||||
}
|
||||
} else {
|
||||
cfg_[key] = value;
|
||||
}
|
||||
}
|
||||
// Short hand for setting multiple parameters
|
||||
void SetParams(std::vector<std::pair<std::string, std::string>> const& args) override {
|
||||
configured_ = false;
|
||||
for (auto const& kv : args) {
|
||||
this->SetParam(kv.first, kv.second);
|
||||
}
|
||||
}
|
||||
|
||||
void SetAttr(const std::string& key, const std::string& value) override {
|
||||
attributes_[key] = value;
|
||||
mparam_.contain_extra_attrs = 1;
|
||||
@ -463,34 +470,21 @@ class LearnerImpl : public Learner {
|
||||
|
||||
bool DelAttr(const std::string& key) override {
|
||||
auto it = attributes_.find(key);
|
||||
if (it == attributes_.end()) return false;
|
||||
if (it == attributes_.end()) { return false; }
|
||||
attributes_.erase(it);
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<std::string> GetAttrNames() const override {
|
||||
std::vector<std::string> out;
|
||||
out.reserve(attributes_.size());
|
||||
for (auto& p : attributes_) {
|
||||
out.push_back(p.first);
|
||||
for (auto const& kv : attributes_) {
|
||||
out.emplace_back(kv.first);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
LearnerTrainParam const& GetLearnerTrainParameter() const override {
|
||||
return tparam_;
|
||||
}
|
||||
|
||||
std::pair<std::string, bst_float> Evaluate(DMatrix* data,
|
||||
std::string metric) {
|
||||
if (metric == "auto") metric = obj_->DefaultEvalMetric();
|
||||
std::unique_ptr<Metric> ev(Metric::Create(metric.c_str(), &tparam_));
|
||||
this->ConfigurationWithKnownData(data);
|
||||
this->PredictRaw(data, &preds_[data]);
|
||||
obj_->EvalTransform(&preds_[data]);
|
||||
return std::make_pair(metric,
|
||||
ev->Eval(preds_[data], data->Info(),
|
||||
tparam_.dsplit == DataSplitMode::kRow));
|
||||
GenericParameter const& GetGenericParameter() const override {
|
||||
return generic_param_;
|
||||
}
|
||||
|
||||
void Predict(DMatrix* data, bool output_margin,
|
||||
@ -500,6 +494,7 @@ class LearnerImpl : public Learner {
|
||||
int multiple_predictions = static_cast<int>(pred_leaf) +
|
||||
static_cast<int>(pred_interactions) +
|
||||
static_cast<int>(pred_contribs);
|
||||
this->Configure();
|
||||
CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
|
||||
if (pred_contribs) {
|
||||
gbm_->PredictContribution(data, &out_preds->HostVector(), ntree_limit, approx_contribs);
|
||||
@ -521,11 +516,76 @@ class LearnerImpl : public Learner {
|
||||
}
|
||||
|
||||
protected:
|
||||
/*!
|
||||
* \brief get un-transformed prediction
|
||||
* \param data training data matrix
|
||||
* \param out_preds output vector that stores the prediction
|
||||
* \param ntree_limit limit number of trees used for boosted tree
|
||||
* predictor, when it equals 0, this means we are using all the trees
|
||||
*/
|
||||
void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds,
|
||||
unsigned ntree_limit = 0) const {
|
||||
CHECK(gbm_ != nullptr)
|
||||
<< "Predict must happen after Load or InitModel";
|
||||
this->ValidateDMatrix(data);
|
||||
gbm_->PredictBatch(data, out_preds, ntree_limit);
|
||||
}
|
||||
|
||||
// return whether model is already initialized.
|
||||
inline bool ModelInitialized() const { return gbm_ != nullptr; }
|
||||
// lazily initialize the model based on configuration if it haven't yet been initialized.
|
||||
inline void LazyInitModel() {
|
||||
if (this->ModelInitialized()) return;
|
||||
bool ModelInitialized() const { return configured_; }
|
||||
|
||||
void ConfigureObjective(LearnerTrainParam const& old, Args* p_args) {
|
||||
if (cfg_.find("num_class") != cfg_.cend() && cfg_.at("num_class") != "0") {
|
||||
cfg_["num_output_group"] = cfg_["num_class"];
|
||||
if (atoi(cfg_["num_class"].c_str()) > 1 && cfg_.count("objective") == 0) {
|
||||
tparam_.objective = "multi:softmax";
|
||||
}
|
||||
}
|
||||
|
||||
if (cfg_.find("max_delta_step") == cfg_.cend() &&
|
||||
cfg_.find("objective") != cfg_.cend() &&
|
||||
tparam_.objective == "count:poisson") {
|
||||
cfg_["max_delta_step"] = kMaxDeltaStepDefaultValue;
|
||||
}
|
||||
if (obj_ == nullptr || tparam_.objective != old.objective) {
|
||||
obj_.reset(ObjFunction::Create(tparam_.objective, &generic_param_));
|
||||
}
|
||||
// reset the base score
|
||||
mparam_.base_score = obj_->ProbToMargin(mparam_.base_score);
|
||||
auto& args = *p_args;
|
||||
args = {cfg_.cbegin(), cfg_.cend()}; // renew
|
||||
obj_->Configure(args);
|
||||
}
|
||||
|
||||
void ConfigureMetrics(Args const& args) {
|
||||
for (auto const& name : metric_names_) {
|
||||
auto DupCheck = [&name](std::unique_ptr<Metric> const& m) {
|
||||
return m->Name() != name;
|
||||
};
|
||||
if (std::all_of(metrics_.begin(), metrics_.end(), DupCheck)) {
|
||||
metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &generic_param_)));
|
||||
mparam_.contain_eval_metrics = 1;
|
||||
}
|
||||
}
|
||||
for (auto& p_metric : metrics_) {
|
||||
p_metric->Configure(args);
|
||||
}
|
||||
}
|
||||
|
||||
void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {
|
||||
if (gbm_ == nullptr || old.booster != tparam_.booster) {
|
||||
gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_param_,
|
||||
cache_, mparam_.base_score));
|
||||
}
|
||||
gbm_->Configure(args);
|
||||
|
||||
if (this->gbm_->UseGPU() && cfg_.find("n_gpus") == cfg_.cend()) {
|
||||
generic_param_.n_gpus = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// set number of features correctly.
|
||||
void ConfigureNumFeatures() {
|
||||
// estimate feature bound
|
||||
// TODO(hcho3): Change num_feature to 64-bit integer
|
||||
unsigned num_feature = 0;
|
||||
@ -546,32 +606,10 @@ class LearnerImpl : public Learner {
|
||||
<< "0 feature is supplied. Are you using raw Booster interface?";
|
||||
// setup
|
||||
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
|
||||
CHECK(obj_ == nullptr && gbm_ == nullptr);
|
||||
obj_.reset(ObjFunction::Create(name_obj_, &tparam_));
|
||||
obj_->Configure(cfg_.begin(), cfg_.end());
|
||||
// reset the base score
|
||||
mparam_.base_score = obj_->ProbToMargin(mparam_.base_score);
|
||||
gbm_.reset(GradientBooster::Create(name_gbm_, &tparam_,
|
||||
cache_, mparam_.base_score));
|
||||
gbm_->Configure(cfg_.begin(), cfg_.end());
|
||||
cfg_["num_class"] = common::ToString(mparam_.num_class);
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief get un-transformed prediction
|
||||
* \param data training data matrix
|
||||
* \param out_preds output vector that stores the prediction
|
||||
* \param ntree_limit limit number of trees used for boosted tree
|
||||
* predictor, when it equals 0, this means we are using all the trees
|
||||
*/
|
||||
void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds,
|
||||
unsigned ntree_limit = 0) {
|
||||
CHECK(gbm_ != nullptr)
|
||||
<< "Predict must happen after Load or InitModel";
|
||||
ConfigurationWithKnownData(data);
|
||||
gbm_->PredictBatch(data, out_preds, ntree_limit);
|
||||
}
|
||||
|
||||
void ValidateDMatrix(DMatrix* p_fmat) {
|
||||
void ValidateDMatrix(DMatrix* p_fmat) const {
|
||||
MetaInfo const& info = p_fmat->Info();
|
||||
auto const& weights = info.weights_.HostVector();
|
||||
if (info.group_ptr_.size() != 0 && weights.size() != 0) {
|
||||
@ -586,28 +624,33 @@ class LearnerImpl : public Learner {
|
||||
|
||||
// model parameter
|
||||
LearnerModelParam mparam_;
|
||||
LearnerTrainParam tparam_;
|
||||
// configurations
|
||||
std::map<std::string, std::string> cfg_;
|
||||
// attributes
|
||||
// FIXME(trivialfis): Legacy field used to store extra attributes into binary model.
|
||||
std::map<std::string, std::string> attributes_;
|
||||
// name of gbm
|
||||
std::string name_gbm_;
|
||||
// name of objective function
|
||||
std::string name_obj_;
|
||||
std::vector<std::string> metric_names_;
|
||||
static std::string const kEvalMetric; // NOLINT
|
||||
// temporal storages for prediction
|
||||
std::map<DMatrix*, HostDeviceVector<bst_float>> preds_;
|
||||
// gradient pairs
|
||||
HostDeviceVector<GradientPair> gpair_;
|
||||
|
||||
bool configured_;
|
||||
|
||||
private:
|
||||
/*! \brief random number transformation seed. */
|
||||
static const int kRandSeedMagic = 127;
|
||||
static int32_t constexpr kRandSeedMagic = 127;
|
||||
// internal cached dmatrix
|
||||
std::vector<std::shared_ptr<DMatrix> > cache_;
|
||||
|
||||
common::Monitor monitor_;
|
||||
};
|
||||
|
||||
std::string const LearnerImpl::kEvalMetric {"eval_metric"}; // NOLINT
|
||||
|
||||
constexpr int32_t LearnerImpl::kRandSeedMagic;
|
||||
|
||||
Learner* Learner::Create(
|
||||
const std::vector<std::shared_ptr<DMatrix> >& cache_data) {
|
||||
return new LearnerImpl(cache_data);
|
||||
|
||||
@ -11,7 +11,7 @@ DMLC_REGISTRY_ENABLE(::xgboost::LinearUpdaterReg);
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
LinearUpdater* LinearUpdater::Create(const std::string& name, LearnerTrainParam const* lparam) {
|
||||
LinearUpdater* LinearUpdater::Create(const std::string& name, GenericParameter const* lparam) {
|
||||
auto *e = ::dmlc::Registry< ::xgboost::LinearUpdaterReg>::Get()->Find(name);
|
||||
if (e == nullptr) {
|
||||
LOG(FATAL) << "Unknown linear updater " << name;
|
||||
|
||||
@ -24,8 +24,7 @@ DMLC_REGISTRY_FILE_TAG(updater_coordinate);
|
||||
class CoordinateUpdater : public LinearUpdater {
|
||||
public:
|
||||
// set training parameter
|
||||
void Init(
|
||||
const std::vector<std::pair<std::string, std::string> > &args) override {
|
||||
void Configure(Args const& args) override {
|
||||
const std::vector<std::pair<std::string, std::string> > rest {
|
||||
tparam_.InitAllowUnknown(args)
|
||||
};
|
||||
|
||||
@ -157,8 +157,7 @@ class DeviceShard {
|
||||
class GPUCoordinateUpdater : public LinearUpdater {
|
||||
public:
|
||||
// set training parameter
|
||||
void Init(
|
||||
const std::vector<std::pair<std::string, std::string>> &args) override {
|
||||
void Configure(Args const& args) override {
|
||||
tparam_.InitAllowUnknown(args);
|
||||
selector_.reset(FeatureSelector::Create(tparam_.feature_selector));
|
||||
monitor_.Init("GPUCoordinateUpdater");
|
||||
|
||||
@ -14,7 +14,7 @@ DMLC_REGISTRY_FILE_TAG(updater_shotgun);
|
||||
class ShotgunUpdater : public LinearUpdater {
|
||||
public:
|
||||
// set training parameter
|
||||
void Init(const std::vector<std::pair<std::string, std::string> > &args) override {
|
||||
void Configure(Args const& args) override {
|
||||
param_.InitAllowUnknown(args);
|
||||
if (param_.feature_selector != kCyclic &&
|
||||
param_.feature_selector != kShuffle) {
|
||||
|
||||
@ -50,7 +50,7 @@ bool ConsoleLogger::ShouldLog(LogVerbosity verbosity) {
|
||||
return verbosity <= global_verbosity_ || verbosity == LV::kIgnore;
|
||||
}
|
||||
|
||||
void ConsoleLogger::Configure(const std::map<std::string, std::string>& args) {
|
||||
void ConsoleLogger::Configure(Args const& args) {
|
||||
param_.InitAllowUnknown(args);
|
||||
// Deprecated, but when trying to display deprecation message some R
|
||||
// tests trying to catch stdout will fail.
|
||||
|
||||
@ -104,7 +104,7 @@ class ElementWiseMetricsReduction {
|
||||
#endif // XGBOOST_USE_CUDA
|
||||
|
||||
PackedReduceResult Reduce(
|
||||
const LearnerTrainParam &tparam,
|
||||
const GenericParameter &tparam,
|
||||
GPUSet devices,
|
||||
const HostDeviceVector<bst_float>& weights,
|
||||
const HostDeviceVector<bst_float>& labels,
|
||||
|
||||
@ -12,7 +12,7 @@ DMLC_REGISTRY_ENABLE(::xgboost::MetricReg);
|
||||
}
|
||||
|
||||
namespace xgboost {
|
||||
Metric* Metric::Create(const std::string& name, LearnerTrainParam const* tparam) {
|
||||
Metric* Metric::Create(const std::string& name, GenericParameter const* tparam) {
|
||||
std::string buf = name;
|
||||
std::string prefix = name;
|
||||
const char* param;
|
||||
|
||||
@ -126,7 +126,7 @@ class MultiClassMetricsReduction {
|
||||
#endif // XGBOOST_USE_CUDA
|
||||
|
||||
PackedReduceResult Reduce(
|
||||
const LearnerTrainParam &tparam,
|
||||
const GenericParameter &tparam,
|
||||
GPUSet devices,
|
||||
size_t n_class,
|
||||
const HostDeviceVector<bst_float>& weights,
|
||||
|
||||
@ -50,7 +50,11 @@ class SoftmaxMultiClassObj : public ObjFunction {
|
||||
HostDeviceVector<GradientPair>* out_gpair) override {
|
||||
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
|
||||
CHECK(preds.Size() == (static_cast<size_t>(param_.num_class) * info.labels_.Size()))
|
||||
<< "SoftmaxMultiClassObj: label size and pred size does not match";
|
||||
<< "SoftmaxMultiClassObj: label size and pred size does not match.\n"
|
||||
<< "label.Size() * num_class: "
|
||||
<< info.labels_.Size() * static_cast<size_t>(param_.num_class) << "\n"
|
||||
<< "num_class: " << param_.num_class << "\n"
|
||||
<< "preds.Size(): " << preds.Size();
|
||||
|
||||
const int nclass = param_.num_class;
|
||||
const auto ndata = static_cast<int64_t>(preds.Size() / nclass);
|
||||
|
||||
@ -14,7 +14,7 @@ DMLC_REGISTRY_ENABLE(::xgboost::ObjFunctionReg);
|
||||
|
||||
namespace xgboost {
|
||||
// implement factory functions
|
||||
ObjFunction* ObjFunction::Create(const std::string& name, LearnerTrainParam const* tparam) {
|
||||
ObjFunction* ObjFunction::Create(const std::string& name, GenericParameter const* tparam) {
|
||||
auto *e = ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->Find(name);
|
||||
if (e == nullptr) {
|
||||
for (const auto& entry : ::dmlc::Registry< ::xgboost::ObjFunctionReg>::List()) {
|
||||
|
||||
@ -377,8 +377,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
}
|
||||
|
||||
public:
|
||||
GPUPredictor() // NOLINT
|
||||
: cpu_predictor_(Predictor::Create("cpu_predictor", learner_param_)) {}
|
||||
GPUPredictor() = default;
|
||||
|
||||
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
|
||||
const gbm::GBTreeModel& model, int tree_begin,
|
||||
@ -468,12 +467,15 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
std::vector<bst_float>* out_preds,
|
||||
const gbm::GBTreeModel& model, unsigned ntree_limit,
|
||||
unsigned root_index) override {
|
||||
cpu_predictor_->PredictInstance(inst, out_preds, model, root_index);
|
||||
LOG(FATAL) << "Internal error: " << __func__
|
||||
<< " is not implemented in GPU Predictor.";
|
||||
}
|
||||
|
||||
void PredictLeaf(DMatrix* p_fmat, std::vector<bst_float>* out_preds,
|
||||
const gbm::GBTreeModel& model,
|
||||
unsigned ntree_limit) override {
|
||||
cpu_predictor_->PredictLeaf(p_fmat, out_preds, model, ntree_limit);
|
||||
LOG(FATAL) << "Internal error: " << __func__
|
||||
<< " is not implemented in GPU Predictor.";
|
||||
}
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
@ -481,9 +483,8 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
const gbm::GBTreeModel& model, unsigned ntree_limit,
|
||||
bool approximate, int condition,
|
||||
unsigned condition_feature) override {
|
||||
cpu_predictor_->PredictContribution(p_fmat, out_contribs, model, ntree_limit,
|
||||
approximate, condition,
|
||||
condition_feature);
|
||||
LOG(FATAL) << "Internal error: " << __func__
|
||||
<< " is not implemented in GPU Predictor.";
|
||||
}
|
||||
|
||||
void PredictInteractionContributions(DMatrix* p_fmat,
|
||||
@ -491,14 +492,13 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
const gbm::GBTreeModel& model,
|
||||
unsigned ntree_limit,
|
||||
bool approximate) override {
|
||||
cpu_predictor_->PredictInteractionContributions(p_fmat, out_contribs, model,
|
||||
ntree_limit, approximate);
|
||||
LOG(FATAL) << "Internal error: " << __func__
|
||||
<< " is not implemented in GPU Predictor.";
|
||||
}
|
||||
|
||||
void Init(const std::vector<std::pair<std::string, std::string>>& cfg,
|
||||
void Configure(const std::vector<std::pair<std::string, std::string>>& cfg,
|
||||
const std::vector<std::shared_ptr<DMatrix>>& cache) override {
|
||||
Predictor::Init(cfg, cache);
|
||||
cpu_predictor_->Init(cfg, cache);
|
||||
Predictor::Configure(cfg, cache);
|
||||
|
||||
GPUSet devices = GPUSet::All(learner_param_->gpu_id, learner_param_->n_gpus);
|
||||
ConfigureShards(devices);
|
||||
@ -517,7 +517,6 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
});
|
||||
}
|
||||
|
||||
std::unique_ptr<Predictor> cpu_predictor_;
|
||||
std::vector<DeviceShard> shards_;
|
||||
GPUSet devices_;
|
||||
common::Monitor monitor_;
|
||||
|
||||
@ -8,14 +8,14 @@ namespace dmlc {
|
||||
DMLC_REGISTRY_ENABLE(::xgboost::PredictorReg);
|
||||
} // namespace dmlc
|
||||
namespace xgboost {
|
||||
void Predictor::Init(
|
||||
void Predictor::Configure(
|
||||
const std::vector<std::pair<std::string, std::string>>& cfg,
|
||||
const std::vector<std::shared_ptr<DMatrix>>& cache) {
|
||||
for (const std::shared_ptr<DMatrix>& d : cache) {
|
||||
cache_[d.get()].data = d;
|
||||
}
|
||||
}
|
||||
Predictor* Predictor::Create(std::string const& name, LearnerTrainParam const* learner_param) {
|
||||
Predictor* Predictor::Create(std::string const& name, GenericParameter const* learner_param) {
|
||||
auto* e = ::dmlc::Registry<PredictorReg>::Get()->Find(name);
|
||||
if (e == nullptr) {
|
||||
LOG(FATAL) << "Unknown predictor type " << name;
|
||||
|
||||
@ -43,8 +43,7 @@ SplitEvaluator* SplitEvaluator::Create(const std::string& name) {
|
||||
}
|
||||
|
||||
// Default implementations of some virtual methods that aren't always needed
|
||||
void SplitEvaluator::Init(
|
||||
const std::vector<std::pair<std::string, std::string> >& args) {}
|
||||
void SplitEvaluator::Init(const Args& args) {}
|
||||
void SplitEvaluator::Reset() {}
|
||||
void SplitEvaluator::AddSplit(bst_uint nodeid,
|
||||
bst_uint leftid,
|
||||
@ -104,8 +103,7 @@ class ElasticNet final : public SplitEvaluator {
|
||||
LOG(FATAL) << "ElasticNet does not accept an inner SplitEvaluator";
|
||||
}
|
||||
}
|
||||
void Init(
|
||||
const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
void Init(const Args& args) override {
|
||||
params_.InitAllowUnknown(args);
|
||||
}
|
||||
|
||||
@ -210,7 +208,7 @@ class MonotonicConstraint final : public SplitEvaluator {
|
||||
inner_ = std::move(inner);
|
||||
}
|
||||
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args)
|
||||
void Init(const Args& args)
|
||||
override {
|
||||
inner_->Init(args);
|
||||
params_.InitAllowUnknown(args);
|
||||
@ -369,7 +367,7 @@ class InteractionConstraint final : public SplitEvaluator {
|
||||
inner_ = std::move(inner);
|
||||
}
|
||||
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args)
|
||||
void Init(const Args& args)
|
||||
override {
|
||||
inner_->Init(args);
|
||||
params_.InitAllowUnknown(args);
|
||||
|
||||
@ -32,8 +32,7 @@ class SplitEvaluator {
|
||||
virtual ~SplitEvaluator() = default;
|
||||
|
||||
// Used to initialise any regularisation hyperparameters provided by the user
|
||||
virtual void Init(
|
||||
const std::vector<std::pair<std::string, std::string> >& args);
|
||||
virtual void Init(const Args& args);
|
||||
|
||||
// Resets the SplitEvaluator to the state it was in after the Init was called
|
||||
virtual void Reset();
|
||||
|
||||
@ -14,7 +14,7 @@ DMLC_REGISTRY_ENABLE(::xgboost::TreeUpdaterReg);
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
TreeUpdater* TreeUpdater::Create(const std::string& name, LearnerTrainParam const* tparam) {
|
||||
TreeUpdater* TreeUpdater::Create(const std::string& name, GenericParameter const* tparam) {
|
||||
auto *e = ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->Find(name);
|
||||
if (e == nullptr) {
|
||||
LOG(FATAL) << "Unknown tree updater " << name;
|
||||
|
||||
@ -30,7 +30,7 @@ namespace tree {
|
||||
*/
|
||||
class BaseMaker: public TreeUpdater {
|
||||
public:
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
void Configure(const Args& args) override {
|
||||
param_.InitAllowUnknown(args);
|
||||
}
|
||||
|
||||
|
||||
@ -25,12 +25,16 @@ DMLC_REGISTRY_FILE_TAG(updater_colmaker);
|
||||
/*! \brief column-wise update to construct a tree */
|
||||
class ColMaker: public TreeUpdater {
|
||||
public:
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
void Configure(const Args& args) override {
|
||||
param_.InitAllowUnknown(args);
|
||||
spliteval_.reset(SplitEvaluator::Create(param_.split_evaluator));
|
||||
spliteval_->Init(args);
|
||||
}
|
||||
|
||||
char const* Name() const override {
|
||||
return "grow_colmaker";
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair> *gpair,
|
||||
DMatrix* dmat,
|
||||
const std::vector<RegTree*> &trees) override {
|
||||
@ -768,13 +772,18 @@ class ColMaker: public TreeUpdater {
|
||||
// distributed column maker
|
||||
class DistColMaker : public ColMaker {
|
||||
public:
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
void Configure(const Args& args) override {
|
||||
param_.InitAllowUnknown(args);
|
||||
pruner_.reset(TreeUpdater::Create("prune", tparam_));
|
||||
pruner_->Init(args);
|
||||
pruner_->Configure(args);
|
||||
spliteval_.reset(SplitEvaluator::Create(param_.split_evaluator));
|
||||
spliteval_->Init(args);
|
||||
}
|
||||
|
||||
char const* Name() const override {
|
||||
return "distcol";
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair> *gpair,
|
||||
DMatrix* dmat,
|
||||
const std::vector<RegTree*> &trees) override {
|
||||
|
||||
@ -581,7 +581,11 @@ class GPUMaker : public TreeUpdater {
|
||||
GPUMaker() : allocated_{false} {}
|
||||
~GPUMaker() override = default;
|
||||
|
||||
void Init(const std::vector<std::pair<std::string, std::string>> &args) override {
|
||||
char const* Name() const override {
|
||||
return "gpu_exact";
|
||||
}
|
||||
|
||||
void Configure(const Args &args) override {
|
||||
param_.InitAllowUnknown(args);
|
||||
maxNodes_ = (1 << (param_.max_depth + 1)) - 1;
|
||||
maxLeaves_ = 1 << param_.max_depth;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017 XGBoost contributors
|
||||
* Copyright 2017-2019 XGBoost contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include <thrust/random.h>
|
||||
|
||||
@ -1290,13 +1290,12 @@ template <typename GradientSumT>
|
||||
class GPUHistMakerSpecialised {
|
||||
public:
|
||||
GPUHistMakerSpecialised() : initialised_{false}, p_last_fmat_{nullptr} {}
|
||||
void Init(const std::vector<std::pair<std::string, std::string>>& args,
|
||||
LearnerTrainParam const* lparam) {
|
||||
void Configure(const Args& args, GenericParameter const* generic_param) {
|
||||
param_.InitAllowUnknown(args);
|
||||
learner_param_ = lparam;
|
||||
generic_param_ = generic_param;
|
||||
hist_maker_param_.InitAllowUnknown(args);
|
||||
auto devices = GPUSet::All(learner_param_->gpu_id,
|
||||
learner_param_->n_gpus);
|
||||
auto devices = GPUSet::All(generic_param_->gpu_id,
|
||||
generic_param_->n_gpus);
|
||||
n_devices_ = devices.Size();
|
||||
CHECK(n_devices_ != 0) << "Must have at least one device";
|
||||
dist_ = GPUDistribution::Block(devices);
|
||||
@ -1362,7 +1361,7 @@ class GPUHistMakerSpecialised {
|
||||
|
||||
monitor_.StartCuda("Quantiles");
|
||||
// Create the quantile sketches for the dmatrix and initialize HistogramCuts
|
||||
size_t row_stride = common::DeviceSketch(param_, *learner_param_,
|
||||
size_t row_stride = common::DeviceSketch(param_, *generic_param_,
|
||||
hist_maker_param_.gpu_batch_nrows,
|
||||
dmat, &hmat_);
|
||||
monitor_.StopCuda("Quantiles");
|
||||
@ -1488,7 +1487,7 @@ class GPUHistMakerSpecialised {
|
||||
int n_bins_;
|
||||
|
||||
GPUHistMakerTrainParam hist_maker_param_;
|
||||
LearnerTrainParam const* learner_param_;
|
||||
GenericParameter const* generic_param_;
|
||||
|
||||
dh::AllReducer reducer_;
|
||||
|
||||
@ -1502,17 +1501,16 @@ class GPUHistMakerSpecialised {
|
||||
|
||||
class GPUHistMaker : public TreeUpdater {
|
||||
public:
|
||||
void Init(
|
||||
const std::vector<std::pair<std::string, std::string>>& args) override {
|
||||
void Configure(const Args& args) override {
|
||||
hist_maker_param_.InitAllowUnknown(args);
|
||||
float_maker_.reset();
|
||||
double_maker_.reset();
|
||||
if (hist_maker_param_.single_precision_histogram) {
|
||||
float_maker_.reset(new GPUHistMakerSpecialised<GradientPair>());
|
||||
float_maker_->Init(args, tparam_);
|
||||
float_maker_->Configure(args, tparam_);
|
||||
} else {
|
||||
double_maker_.reset(new GPUHistMakerSpecialised<GradientPairPrecise>());
|
||||
double_maker_->Init(args, tparam_);
|
||||
double_maker_->Configure(args, tparam_);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1534,6 +1532,10 @@ class GPUHistMaker : public TreeUpdater {
|
||||
}
|
||||
}
|
||||
|
||||
char const* Name() const override {
|
||||
return "gpu_hist";
|
||||
}
|
||||
|
||||
private:
|
||||
GPUHistMakerTrainParam hist_maker_param_;
|
||||
std::unique_ptr<GPUHistMakerSpecialised<GradientPair>> float_maker_;
|
||||
|
||||
@ -33,6 +33,9 @@ class HistMaker: public BaseMaker {
|
||||
}
|
||||
param_.learning_rate = lr;
|
||||
}
|
||||
char const* Name() const override {
|
||||
return "grow_histmaker";
|
||||
}
|
||||
|
||||
protected:
|
||||
/*! \brief a single histogram */
|
||||
@ -83,7 +86,7 @@ class HistMaker: public BaseMaker {
|
||||
// per thread histset
|
||||
std::vector<HistSet> hset;
|
||||
// initialize the hist set
|
||||
inline void Init(const TrainParam ¶m, int nthread) {
|
||||
inline void Configure(const TrainParam ¶m, int nthread) {
|
||||
hset.resize(nthread);
|
||||
// cleanup statistics
|
||||
for (int tid = 0; tid < nthread; ++tid) {
|
||||
@ -274,6 +277,9 @@ class HistMaker: public BaseMaker {
|
||||
class CQHistMaker: public HistMaker {
|
||||
public:
|
||||
CQHistMaker() = default;
|
||||
char const* Name() const override {
|
||||
return "grow_local_histmaker";
|
||||
}
|
||||
|
||||
protected:
|
||||
struct HistEntry {
|
||||
@ -339,7 +345,7 @@ class CQHistMaker: public HistMaker {
|
||||
feat2workindex_[fset[i]] = static_cast<int>(i);
|
||||
}
|
||||
// start to work
|
||||
this->wspace_.Init(this->param_, 1);
|
||||
this->wspace_.Configure(this->param_, 1);
|
||||
// if it is C++11, use lazy evaluation for Allreduce,
|
||||
// to gain speedup in recovery
|
||||
auto lazy_get_hist = [&]() {
|
||||
@ -637,6 +643,11 @@ class CQHistMaker: public HistMaker {
|
||||
|
||||
// global proposal
|
||||
class GlobalProposalHistMaker: public CQHistMaker {
|
||||
public:
|
||||
char const* Name() const override {
|
||||
return "grow_global_histmaker";
|
||||
}
|
||||
|
||||
protected:
|
||||
void ResetPosAndPropose(const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat,
|
||||
@ -682,7 +693,7 @@ class GlobalProposalHistMaker: public CQHistMaker {
|
||||
this->feat2workindex_[fset[i]] = static_cast<int>(i);
|
||||
}
|
||||
// start to work
|
||||
this->wspace_.Init(this->param_, 1);
|
||||
this->wspace_.Configure(this->param_, 1);
|
||||
// to gain speedup in recovery
|
||||
{
|
||||
this->thread_hist_.resize(omp_get_max_threads());
|
||||
|
||||
@ -24,10 +24,14 @@ class TreePruner: public TreeUpdater {
|
||||
TreePruner() {
|
||||
syncher_.reset(TreeUpdater::Create("sync", tparam_));
|
||||
}
|
||||
char const* Name() const override {
|
||||
return "prune";
|
||||
}
|
||||
|
||||
// set training parameter
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
void Configure(const Args& args) override {
|
||||
param_.InitAllowUnknown(args);
|
||||
syncher_->Init(args);
|
||||
syncher_->Configure(args);
|
||||
}
|
||||
// update the tree, do pruning
|
||||
void Update(HostDeviceVector<GradientPair> *gpair,
|
||||
|
||||
@ -32,12 +32,12 @@ namespace tree {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_quantile_hist);
|
||||
|
||||
void QuantileHistMaker::Init(const std::vector<std::pair<std::string, std::string> >& args) {
|
||||
void QuantileHistMaker::Configure(const Args& args) {
|
||||
// initialize pruner
|
||||
if (!pruner_) {
|
||||
pruner_.reset(TreeUpdater::Create("prune", tparam_));
|
||||
}
|
||||
pruner_->Init(args);
|
||||
pruner_->Configure(args);
|
||||
param_.InitAllowUnknown(args);
|
||||
is_gmat_initialized_ = false;
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@ using xgboost::common::Column;
|
||||
/*! \brief construct a tree using quantized feature values */
|
||||
class QuantileHistMaker: public TreeUpdater {
|
||||
public:
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override;
|
||||
void Configure(const Args& args) override;
|
||||
|
||||
void Update(HostDeviceVector<GradientPair>* gpair,
|
||||
DMatrix* dmat,
|
||||
@ -55,6 +55,9 @@ class QuantileHistMaker: public TreeUpdater {
|
||||
bool UpdatePredictionCache(const DMatrix* data,
|
||||
HostDeviceVector<bst_float>* out_preds) override;
|
||||
|
||||
char const* Name() const override {
|
||||
return "grow_quantile_histmaker";
|
||||
}
|
||||
|
||||
protected:
|
||||
// training parameter
|
||||
|
||||
@ -21,9 +21,12 @@ DMLC_REGISTRY_FILE_TAG(updater_refresh);
|
||||
/*! \brief pruner that prunes a tree after growing finishs */
|
||||
class TreeRefresher: public TreeUpdater {
|
||||
public:
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
void Configure(const Args& args) override {
|
||||
param_.InitAllowUnknown(args);
|
||||
}
|
||||
char const* Name() const override {
|
||||
return "refresh";
|
||||
}
|
||||
// update the tree, do pruning
|
||||
void Update(HostDeviceVector<GradientPair> *gpair,
|
||||
DMatrix *p_fmat,
|
||||
|
||||
@ -22,6 +22,9 @@ DMLC_REGISTRY_FILE_TAG(updater_skmaker);
|
||||
|
||||
class SketchMaker: public BaseMaker {
|
||||
public:
|
||||
char const* Name() const override {
|
||||
return "grow_skmaker";
|
||||
}
|
||||
void Update(HostDeviceVector<GradientPair> *gpair,
|
||||
DMatrix *p_fmat,
|
||||
const std::vector<RegTree*> &trees) override {
|
||||
|
||||
@ -20,7 +20,11 @@ DMLC_REGISTRY_FILE_TAG(updater_sync);
|
||||
*/
|
||||
class TreeSyncher: public TreeUpdater {
|
||||
public:
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {}
|
||||
void Configure(const Args& args) override {}
|
||||
|
||||
char const* Name() const override {
|
||||
return "prune";
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair> *gpair,
|
||||
DMatrix* dmat,
|
||||
|
||||
@ -46,14 +46,14 @@ TEST(GPUSet, Verbose) {
|
||||
args["verbosity"] = "3"; // LOG INFO
|
||||
|
||||
testing::internal::CaptureStderr();
|
||||
ConsoleLogger::Configure(args.cbegin(), args.cend());
|
||||
ConsoleLogger::Configure({args.cbegin(), args.cend()});
|
||||
GPUSet::All(0, 1);
|
||||
std::string output = testing::internal::GetCapturedStderr();
|
||||
ASSERT_NE(output.find("GPU ID: 0"), std::string::npos);
|
||||
ASSERT_NE(output.find("GPUs: 1"), std::string::npos);
|
||||
|
||||
args["verbosity"] = "1"; // restore
|
||||
ConsoleLogger::Configure(args.cbegin(), args.cend());
|
||||
ConsoleLogger::Configure({args.cbegin(), args.cend()});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -14,8 +14,8 @@ TEST(Monitor, Logging) {
|
||||
monitor_.Stop("basic");
|
||||
};
|
||||
|
||||
std::map<std::string, std::string> args = {std::make_pair("verbosity", "3")};
|
||||
ConsoleLogger::Configure(args.cbegin(), args.cend());
|
||||
Args args = {std::make_pair("verbosity", "3")};
|
||||
ConsoleLogger::Configure(args);
|
||||
testing::internal::CaptureStderr();
|
||||
run_monitor();
|
||||
std::string output = testing::internal::GetCapturedStderr();
|
||||
@ -23,7 +23,7 @@ TEST(Monitor, Logging) {
|
||||
|
||||
// Monitor only prints messages when set to DEBUG.
|
||||
args = {std::make_pair("verbosity", "2")};
|
||||
ConsoleLogger::Configure(args.cbegin(), args.cend());
|
||||
ConsoleLogger::Configure(args);
|
||||
testing::internal::CaptureStderr();
|
||||
run_monitor();
|
||||
output = testing::internal::GetCapturedStderr();
|
||||
|
||||
@ -8,42 +8,47 @@ TEST(GBTree, SelectTreeMethod) {
|
||||
using Arg = std::pair<std::string, std::string>;
|
||||
size_t constexpr kRows = 10;
|
||||
size_t constexpr kCols = 10;
|
||||
auto mat_ptr = CreateDMatrix(kRows, kCols, 0);
|
||||
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {*mat_ptr};
|
||||
auto p_shared_ptr_dmat = CreateDMatrix(kRows, kCols, 0);
|
||||
auto p_dmat {(*p_shared_ptr_dmat).get()};
|
||||
|
||||
LearnerTrainParam learner_param;
|
||||
learner_param.InitAllowUnknown(std::vector<Arg>{Arg("n_gpus", "0")});
|
||||
GenericParameter generic_param;
|
||||
generic_param.InitAllowUnknown(std::vector<Arg>{Arg("n_gpus", "0")});
|
||||
std::unique_ptr<GradientBooster> p_gbm{
|
||||
GradientBooster::Create("gbtree", &learner_param, {}, 0)};
|
||||
GradientBooster::Create("gbtree", &generic_param, {}, 0)};
|
||||
auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
|
||||
|
||||
// Test if `tree_method` can be set
|
||||
std::string n_feat = std::to_string(kCols);
|
||||
gbtree.Configure({Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}});
|
||||
std::map<std::string, std::string> args {Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}};
|
||||
gbtree.Configure({args.cbegin(), args.cend()});
|
||||
|
||||
gbtree.ConfigureWithKnownData(args, p_dmat);
|
||||
auto const& tparam = gbtree.GetTrainParam();
|
||||
gbtree.ConfigureWithKnownData({Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}}, p_dmat);
|
||||
ASSERT_EQ(tparam.updater_seq, "grow_histmaker,prune");
|
||||
gbtree.Configure({Arg("tree_method", "exact"), Arg("num_feature", n_feat)});
|
||||
gbtree.ConfigureWithKnownData({Arg("tree_method", "exact"), Arg("num_feature", n_feat)}, p_dmat);
|
||||
ASSERT_EQ(tparam.updater_seq, "grow_colmaker,prune");
|
||||
gbtree.Configure({Arg("tree_method", "hist"), Arg("num_feature", n_feat)});
|
||||
gbtree.ConfigureWithKnownData({Arg("tree_method", "hist"), Arg("num_feature", n_feat)}, p_dmat);
|
||||
ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
|
||||
ASSERT_EQ(tparam.predictor, "cpu_predictor");
|
||||
gbtree.Configure({Arg{"booster", "dart"}, Arg{"tree_method", "hist"},
|
||||
Arg{"num_feature", n_feat}});
|
||||
gbtree.ConfigureWithKnownData({Arg{"booster", "dart"}, Arg{"tree_method", "hist"},
|
||||
Arg{"num_feature", n_feat}}, p_dmat);
|
||||
ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
|
||||
#ifdef XGBOOST_USE_CUDA
|
||||
learner_param.InitAllowUnknown(std::vector<Arg>{Arg{"n_gpus", "1"}});
|
||||
gbtree.Configure({Arg("tree_method", "gpu_exact"),
|
||||
Arg("num_feature", n_feat)});
|
||||
generic_param.InitAllowUnknown(std::vector<Arg>{Arg{"n_gpus", "1"}});
|
||||
gbtree.ConfigureWithKnownData({Arg("tree_method", "gpu_exact"),
|
||||
Arg("num_feature", n_feat)}, p_dmat);
|
||||
ASSERT_EQ(tparam.updater_seq, "grow_gpu,prune");
|
||||
ASSERT_EQ(tparam.predictor, "gpu_predictor");
|
||||
gbtree.Configure({Arg("tree_method", "gpu_hist"), Arg("num_feature", n_feat)});
|
||||
gbtree.ConfigureWithKnownData({Arg("tree_method", "gpu_hist"), Arg("num_feature", n_feat)},
|
||||
p_dmat);
|
||||
ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
|
||||
ASSERT_EQ(tparam.predictor, "gpu_predictor");
|
||||
gbtree.Configure({Arg{"booster", "dart"}, Arg{"tree_method", "gpu_hist"},
|
||||
Arg{"num_feature", n_feat}});
|
||||
gbtree.ConfigureWithKnownData({Arg{"booster", "dart"}, Arg{"tree_method", "gpu_hist"},
|
||||
Arg{"num_feature", n_feat}}, p_dmat);
|
||||
ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
|
||||
#endif
|
||||
|
||||
delete mat_ptr;
|
||||
delete p_shared_ptr_dmat;
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@ -189,8 +189,8 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(size_t n_rows, size_t n_c
|
||||
|
||||
gbm::GBTreeModel CreateTestModel();
|
||||
|
||||
inline LearnerTrainParam CreateEmptyGenericParam(int gpu_id, int n_gpus) {
|
||||
xgboost::LearnerTrainParam tparam;
|
||||
inline GenericParameter CreateEmptyGenericParam(int gpu_id, int n_gpus) {
|
||||
xgboost::GenericParameter tparam;
|
||||
std::vector<std::pair<std::string, std::string>> args {
|
||||
{"gpu_id", std::to_string(gpu_id)},
|
||||
{"n_gpus", std::to_string(n_gpus)}};
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2018 by Contributors
|
||||
* Copyright 2018-2019 by Contributors
|
||||
*/
|
||||
#include <xgboost/linear_updater.h>
|
||||
#include "../helpers.h"
|
||||
@ -11,7 +11,7 @@ TEST(Linear, shotgun) {
|
||||
{
|
||||
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
|
||||
xgboost::LinearUpdater::Create("shotgun", &lparam));
|
||||
updater->Init({{"eta", "1."}});
|
||||
updater->Configure({{"eta", "1."}});
|
||||
xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
|
||||
(*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
|
||||
xgboost::gbm::GBLinearModel model;
|
||||
@ -26,7 +26,7 @@ TEST(Linear, shotgun) {
|
||||
{
|
||||
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
|
||||
xgboost::LinearUpdater::Create("shotgun", &lparam));
|
||||
EXPECT_ANY_THROW(updater->Init({{"feature_selector", "random"}}));
|
||||
EXPECT_ANY_THROW(updater->Configure({{"feature_selector", "random"}}));
|
||||
}
|
||||
delete mat;
|
||||
}
|
||||
@ -36,7 +36,7 @@ TEST(Linear, coordinate) {
|
||||
auto lparam = xgboost::CreateEmptyGenericParam(0, 0);
|
||||
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
|
||||
xgboost::LinearUpdater::Create("coord_descent", &lparam));
|
||||
updater->Init({{"eta", "1."}});
|
||||
updater->Configure({{"eta", "1."}});
|
||||
xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
|
||||
(*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
|
||||
xgboost::gbm::GBLinearModel model;
|
||||
|
||||
@ -11,7 +11,7 @@ TEST(Linear, GPUCoordinate) {
|
||||
lparam.n_gpus = 1;
|
||||
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
|
||||
xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
|
||||
updater->Init({{"eta", "1."}});
|
||||
updater->Configure({{"eta", "1."}});
|
||||
xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
|
||||
(*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
|
||||
xgboost::gbm::GBLinearModel model;
|
||||
@ -33,7 +33,7 @@ TEST(Linear, MGPU_GPUCoordinate) {
|
||||
lparam.n_gpus = -1;
|
||||
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
|
||||
xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
|
||||
updater->Init({{"eta", "1."}});
|
||||
updater->Configure({{"eta", "1."}});
|
||||
xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
|
||||
(*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
|
||||
xgboost::gbm::GBLinearModel model;
|
||||
@ -52,7 +52,7 @@ TEST(Linear, MGPU_GPUCoordinate) {
|
||||
auto mat = xgboost::CreateDMatrix(10, 10, 0);
|
||||
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
|
||||
xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
|
||||
updater->Init({{"eta", "1."}});
|
||||
updater->Configure({{"eta", "1."}});
|
||||
xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
|
||||
(*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
|
||||
xgboost::gbm::GBLinearModel model;
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
#include "../helpers.h"
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(HingeObj)) {
|
||||
xgboost::LearnerTrainParam tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::GenericParameter tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("binary:hinge", &tparam);
|
||||
|
||||
xgboost::bst_float eps = std::numeric_limits<xgboost::bst_float>::min();
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
#include "../helpers.h"
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassObjGPair)) {
|
||||
xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
std::vector<std::pair<std::string, std::string>> args {{"num_class", "3"}};
|
||||
xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("multi:softmax", &lparam);
|
||||
|
||||
@ -47,7 +47,7 @@ TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassBasic)) {
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(SoftprobMultiClassBasic)) {
|
||||
xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
std::vector<std::pair<std::string, std::string>> args {
|
||||
std::pair<std::string, std::string>("num_class", "3")};
|
||||
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
|
||||
TEST(Objective, UnknownFunction) {
|
||||
xgboost::ObjFunction* obj = nullptr;
|
||||
xgboost::LearnerTrainParam tparam;
|
||||
xgboost::GenericParameter tparam;
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
tparam.InitAllowUnknown(args);
|
||||
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
#include "../helpers.h"
|
||||
|
||||
TEST(Objective, PairwiseRankingGPair) {
|
||||
xgboost::LearnerTrainParam tparam;
|
||||
xgboost::GenericParameter tparam;
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
tparam.InitAllowUnknown(args);
|
||||
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
#include "../helpers.h"
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {
|
||||
xgboost::LearnerTrainParam tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::GenericParameter tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
|
||||
xgboost::ObjFunction * obj =
|
||||
@ -32,7 +32,7 @@ TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(SquaredLog)) {
|
||||
xgboost::LearnerTrainParam tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::GenericParameter tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
|
||||
xgboost::ObjFunction * obj =
|
||||
@ -56,7 +56,7 @@ TEST(Objective, DeclareUnifiedTest(SquaredLog)) {
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(LogisticRegressionGPair)) {
|
||||
xgboost::LearnerTrainParam tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::GenericParameter tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:logistic", &tparam);
|
||||
|
||||
@ -72,7 +72,7 @@ TEST(Objective, DeclareUnifiedTest(LogisticRegressionGPair)) {
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(LogisticRegressionBasic)) {
|
||||
xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:logistic", &lparam);
|
||||
|
||||
@ -102,7 +102,7 @@ TEST(Objective, DeclareUnifiedTest(LogisticRegressionBasic)) {
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(LogisticRawGPair)) {
|
||||
xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("binary:logitraw", &lparam);
|
||||
|
||||
@ -118,7 +118,7 @@ TEST(Objective, DeclareUnifiedTest(LogisticRawGPair)) {
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) {
|
||||
xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("count:poisson", &lparam);
|
||||
|
||||
@ -140,7 +140,7 @@ TEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) {
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(PoissonRegressionBasic)) {
|
||||
xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("count:poisson", &lparam);
|
||||
|
||||
@ -168,7 +168,7 @@ TEST(Objective, DeclareUnifiedTest(PoissonRegressionBasic)) {
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(GammaRegressionGPair)) {
|
||||
xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:gamma", &lparam);
|
||||
|
||||
@ -189,7 +189,7 @@ TEST(Objective, DeclareUnifiedTest(GammaRegressionGPair)) {
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(GammaRegressionBasic)) {
|
||||
xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:gamma", &lparam);
|
||||
|
||||
@ -217,7 +217,7 @@ TEST(Objective, DeclareUnifiedTest(GammaRegressionBasic)) {
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) {
|
||||
xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:tweedie", &lparam);
|
||||
|
||||
@ -241,7 +241,7 @@ TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) {
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
TEST(Objective, CPU_vs_CUDA) {
|
||||
xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, 1);
|
||||
xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, 1);
|
||||
|
||||
xgboost::ObjFunction * obj =
|
||||
xgboost::ObjFunction::Create("reg:squarederror", &lparam);
|
||||
@ -294,7 +294,7 @@ TEST(Objective, CPU_vs_CUDA) {
|
||||
#endif
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(TweedieRegressionBasic)) {
|
||||
xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:tweedie", &lparam);
|
||||
|
||||
@ -325,7 +325,7 @@ TEST(Objective, DeclareUnifiedTest(TweedieRegressionBasic)) {
|
||||
// CoxRegression not implemented in GPU code, no need for testing.
|
||||
#if !defined(__CUDACC__)
|
||||
TEST(Objective, CoxRegressionGPair) {
|
||||
xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, 0);
|
||||
xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, 0);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
xgboost::ObjFunction * obj =
|
||||
xgboost::ObjFunction::Create("survival:cox", &lparam);
|
||||
|
||||
@ -2,10 +2,12 @@
|
||||
/*!
|
||||
* Copyright 2017-2019 XGBoost contributors
|
||||
*/
|
||||
#include <dmlc/logging.h>
|
||||
#include <dmlc/filesystem.h>
|
||||
#include <xgboost/c_api.h>
|
||||
#include <xgboost/predictor.h>
|
||||
#include <xgboost/logging.h>
|
||||
#include <xgboost/learner.h>
|
||||
|
||||
#include <string>
|
||||
#include "gtest/gtest.h"
|
||||
#include "../helpers.h"
|
||||
@ -20,8 +22,14 @@ inline void CheckCAPICall(int ret) {
|
||||
} // namespace anonymous
|
||||
#endif
|
||||
|
||||
extern const std::map<std::string, std::string>&
|
||||
QueryBoosterConfigurationArguments(BoosterHandle handle);
|
||||
const std::map<std::string, std::string>&
|
||||
QueryBoosterConfigurationArguments(BoosterHandle handle) {
|
||||
CHECK_NE(handle, static_cast<void*>(nullptr));
|
||||
auto* bst = static_cast<xgboost::Learner*>(handle);
|
||||
bst->Configure();
|
||||
return bst->GetConfigurationArguments();
|
||||
}
|
||||
|
||||
|
||||
namespace xgboost {
|
||||
namespace predictor {
|
||||
@ -35,8 +43,8 @@ TEST(gpu_predictor, Test) {
|
||||
std::unique_ptr<Predictor> cpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &cpu_lparam));
|
||||
|
||||
gpu_predictor->Init({}, {});
|
||||
cpu_predictor->Init({}, {});
|
||||
gpu_predictor->Configure({}, {});
|
||||
cpu_predictor->Configure({}, {});
|
||||
|
||||
int n_row = 5;
|
||||
int n_col = 5;
|
||||
@ -56,35 +64,6 @@ TEST(gpu_predictor, Test) {
|
||||
for (int i = 0; i < gpu_out_predictions.Size(); i++) {
|
||||
ASSERT_NEAR(gpu_out_predictions_h[i], cpu_out_predictions_h[i], abs_tolerance);
|
||||
}
|
||||
// Test predict instance
|
||||
const auto &batch = *(*dmat)->GetRowBatches().begin();
|
||||
for (int i = 0; i < batch.Size(); i++) {
|
||||
std::vector<float> gpu_instance_out_predictions;
|
||||
std::vector<float> cpu_instance_out_predictions;
|
||||
cpu_predictor->PredictInstance(batch[i], &cpu_instance_out_predictions,
|
||||
model);
|
||||
gpu_predictor->PredictInstance(batch[i], &gpu_instance_out_predictions,
|
||||
model);
|
||||
ASSERT_EQ(gpu_instance_out_predictions[0], cpu_instance_out_predictions[0]);
|
||||
}
|
||||
|
||||
// Test predict leaf
|
||||
std::vector<float> gpu_leaf_out_predictions;
|
||||
std::vector<float> cpu_leaf_out_predictions;
|
||||
cpu_predictor->PredictLeaf((*dmat).get(), &cpu_leaf_out_predictions, model);
|
||||
gpu_predictor->PredictLeaf((*dmat).get(), &gpu_leaf_out_predictions, model);
|
||||
for (int i = 0; i < gpu_leaf_out_predictions.size(); i++) {
|
||||
ASSERT_EQ(gpu_leaf_out_predictions[i], cpu_leaf_out_predictions[i]);
|
||||
}
|
||||
|
||||
// Test predict contribution
|
||||
std::vector<float> gpu_out_contribution;
|
||||
std::vector<float> cpu_out_contribution;
|
||||
cpu_predictor->PredictContribution((*dmat).get(), &cpu_out_contribution, model);
|
||||
gpu_predictor->PredictContribution((*dmat).get(), &gpu_out_contribution, model);
|
||||
for (int i = 0; i < gpu_out_contribution.size(); i++) {
|
||||
ASSERT_EQ(gpu_out_contribution[i], cpu_out_contribution[i]);
|
||||
}
|
||||
|
||||
delete dmat;
|
||||
}
|
||||
@ -93,7 +72,7 @@ TEST(gpu_predictor, ExternalMemoryTest) {
|
||||
auto lparam = CreateEmptyGenericParam(0, 1);
|
||||
std::unique_ptr<Predictor> gpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
|
||||
gpu_predictor->Init({}, {});
|
||||
gpu_predictor->Configure({}, {});
|
||||
gbm::GBTreeModel model = CreateTestModel();
|
||||
int n_col = 3;
|
||||
model.param.num_feature = n_col;
|
||||
@ -108,38 +87,6 @@ TEST(gpu_predictor, ExternalMemoryTest) {
|
||||
for (const auto& v : out_predictions.HostVector()) {
|
||||
ASSERT_EQ(v, 1.5);
|
||||
}
|
||||
|
||||
// Test predict leaf
|
||||
std::vector<float> leaf_out_predictions;
|
||||
gpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
|
||||
EXPECT_EQ(leaf_out_predictions.size(), dmat->Info().num_row_);
|
||||
for (const auto& v : leaf_out_predictions) {
|
||||
ASSERT_EQ(v, 0);
|
||||
}
|
||||
|
||||
// Test predict contribution
|
||||
std::vector<float> out_contribution;
|
||||
gpu_predictor->PredictContribution(dmat.get(), &out_contribution, model);
|
||||
EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_ * (n_col + 1));
|
||||
for (int i = 0; i < out_contribution.size(); i++) {
|
||||
if (i % (n_col + 1) == n_col) {
|
||||
ASSERT_EQ(out_contribution[i], 1.5);
|
||||
} else {
|
||||
ASSERT_EQ(out_contribution[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Test predict contribution (approximate method)
|
||||
std::vector<float> out_contribution_approximate;
|
||||
gpu_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, true);
|
||||
EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_ * (n_col + 1));
|
||||
for (int i = 0; i < out_contribution.size(); i++) {
|
||||
if (i % (n_col + 1) == n_col) {
|
||||
ASSERT_EQ(out_contribution[i], 1.5);
|
||||
} else {
|
||||
ASSERT_EQ(out_contribution[i], 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_NCCL)
|
||||
@ -231,7 +178,7 @@ TEST(gpu_predictor, MGPU_Test) {
|
||||
std::unique_ptr<Predictor> cpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &cpu_lparam));
|
||||
|
||||
cpu_predictor->Init({}, {});
|
||||
cpu_predictor->Configure({}, {});
|
||||
|
||||
for (size_t i = 1; i < 33; i *= 2) {
|
||||
int n_row = i, n_col = i;
|
||||
@ -263,7 +210,7 @@ TEST(gpu_predictor, MGPU_ExternalMemoryTest) {
|
||||
|
||||
std::unique_ptr<Predictor> gpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &gpu_lparam));
|
||||
gpu_predictor->Init({}, {});
|
||||
gpu_predictor->Configure({}, {});
|
||||
|
||||
gbm::GBTreeModel model = CreateTestModel();
|
||||
model.param.num_feature = 3;
|
||||
|
||||
@ -14,7 +14,7 @@ TEST(Learner, Basic) {
|
||||
auto mat_ptr = CreateDMatrix(10, 10, 0);
|
||||
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {*mat_ptr};
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
|
||||
learner->Configure(args);
|
||||
learner->SetParams(args);
|
||||
|
||||
delete mat_ptr;
|
||||
}
|
||||
@ -46,9 +46,7 @@ TEST(Learner, CheckGroup) {
|
||||
|
||||
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_mat};
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
|
||||
learner->Configure({Arg{"objective", "rank:pairwise"}});
|
||||
learner->InitModel();
|
||||
|
||||
learner->SetParams({Arg{"objective", "rank:pairwise"}});
|
||||
EXPECT_NO_THROW(learner->UpdateOneIter(0, p_mat.get()));
|
||||
|
||||
group.resize(kNumGroups+1);
|
||||
@ -77,11 +75,34 @@ TEST(Learner, SLOW_CheckMultiBatch) {
|
||||
dmat->Info().SetInfo("label", labels.data(), DataType::kFloat32, num_row);
|
||||
std::vector<std::shared_ptr<DMatrix>> mat{dmat};
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
|
||||
learner->Configure({Arg{"objective", "binary:logistic"}});
|
||||
learner->InitModel();
|
||||
learner->SetParams({Arg{"objective", "binary:logistic"}, Arg{"verbosity", "3"}});
|
||||
learner->UpdateOneIter(0, dmat.get());
|
||||
}
|
||||
|
||||
TEST(Learner, Configuration) {
|
||||
std::string const emetric = "eval_metric";
|
||||
{
|
||||
std::unique_ptr<Learner> learner { Learner::Create({nullptr}) };
|
||||
learner->SetParam(emetric, "auc");
|
||||
learner->SetParam(emetric, "rmsle");
|
||||
learner->SetParam("foo", "bar");
|
||||
|
||||
// eval_metric is not part of configuration
|
||||
auto attr_names = learner->GetConfigurationArguments();
|
||||
ASSERT_EQ(attr_names.size(), 1);
|
||||
ASSERT_EQ(attr_names.find(emetric), attr_names.cend());
|
||||
ASSERT_EQ(attr_names.at("foo"), "bar");
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_ptr<Learner> learner { Learner::Create({nullptr}) };
|
||||
learner->SetParams({{"foo", "bar"}, {emetric, "auc"}, {emetric, "entropy"}, {emetric, "KL"}});
|
||||
auto attr_names = learner->GetConfigurationArguments();
|
||||
ASSERT_EQ(attr_names.size(), 1);
|
||||
ASSERT_EQ(attr_names.at("foo"), "bar");
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
|
||||
TEST(Learner, IO) {
|
||||
@ -98,13 +119,12 @@ TEST(Learner, IO) {
|
||||
std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
|
||||
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->Configure({Arg{"tree_method", "auto"},
|
||||
learner->SetParams({Arg{"tree_method", "auto"},
|
||||
Arg{"predictor", "gpu_predictor"},
|
||||
Arg{"n_gpus", "-1"}});
|
||||
learner->InitModel();
|
||||
learner->UpdateOneIter(0, p_dmat.get());
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, -1);
|
||||
ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetGenericParameter().n_gpus, -1);
|
||||
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string fname = tempdir.path + "/model.bst";
|
||||
@ -117,8 +137,8 @@ TEST(Learner, IO) {
|
||||
|
||||
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r"));
|
||||
learner->Load(fi.get());
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, 0);
|
||||
ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetGenericParameter().n_gpus, 0);
|
||||
|
||||
delete pp_dmat;
|
||||
}
|
||||
@ -137,59 +157,53 @@ TEST(Learner, GPUConfiguration) {
|
||||
p_dmat->Info().labels_.HostVector() = labels;
|
||||
{
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->Configure({Arg{"booster", "gblinear"},
|
||||
learner->SetParams({Arg{"booster", "gblinear"},
|
||||
Arg{"updater", "gpu_coord_descent"}});
|
||||
learner->InitModel();
|
||||
learner->UpdateOneIter(0, p_dmat.get());
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, 1);
|
||||
ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetGenericParameter().n_gpus, 1);
|
||||
}
|
||||
{
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->Configure({Arg{"tree_method", "gpu_exact"}});
|
||||
learner->InitModel();
|
||||
learner->SetParams({Arg{"tree_method", "gpu_exact"}});
|
||||
learner->UpdateOneIter(0, p_dmat.get());
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, 1);
|
||||
ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetGenericParameter().n_gpus, 1);
|
||||
}
|
||||
{
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->Configure({Arg{"tree_method", "gpu_hist"}});
|
||||
learner->InitModel();
|
||||
learner->SetParams({Arg{"tree_method", "gpu_hist"}});
|
||||
learner->UpdateOneIter(0, p_dmat.get());
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, 1);
|
||||
ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetGenericParameter().n_gpus, 1);
|
||||
}
|
||||
{
|
||||
// with CPU algorithm
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->Configure({Arg{"tree_method", "hist"}});
|
||||
learner->InitModel();
|
||||
learner->SetParams({Arg{"tree_method", "hist"}});
|
||||
learner->UpdateOneIter(0, p_dmat.get());
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, 0);
|
||||
ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetGenericParameter().n_gpus, 0);
|
||||
}
|
||||
{
|
||||
// with CPU algorithm, but `n_gpus` takes priority
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->Configure({Arg{"tree_method", "hist"},
|
||||
learner->SetParams({Arg{"tree_method", "hist"},
|
||||
Arg{"n_gpus", "1"}});
|
||||
learner->InitModel();
|
||||
learner->UpdateOneIter(0, p_dmat.get());
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, 1);
|
||||
ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetGenericParameter().n_gpus, 1);
|
||||
}
|
||||
{
|
||||
// With CPU algorithm but GPU Predictor, this is to simulate when
|
||||
// XGBoost is only used for prediction, so tree method is not
|
||||
// specified.
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->Configure({Arg{"tree_method", "hist"},
|
||||
learner->SetParams({Arg{"tree_method", "hist"},
|
||||
Arg{"predictor", "gpu_predictor"}});
|
||||
learner->InitModel();
|
||||
learner->UpdateOneIter(0, p_dmat.get());
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, 1);
|
||||
ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
|
||||
ASSERT_EQ(learner->GetGenericParameter().n_gpus, 1);
|
||||
}
|
||||
|
||||
delete pp_dmat;
|
||||
|
||||
@ -10,14 +10,14 @@ TEST(Logging, Basic) {
|
||||
std::string output;
|
||||
|
||||
args["verbosity"] = "0"; // silent
|
||||
ConsoleLogger::Configure(args.cbegin(), args.cend());
|
||||
ConsoleLogger::Configure({args.cbegin(), args.cend()});
|
||||
testing::internal::CaptureStderr();
|
||||
LOG(DEBUG) << "Test silent.";
|
||||
output = testing::internal::GetCapturedStderr();
|
||||
ASSERT_EQ(output.length(), 0);
|
||||
|
||||
args["verbosity"] = "3"; // debug
|
||||
ConsoleLogger::Configure(args.cbegin(), args.cend());
|
||||
ConsoleLogger::Configure({args.cbegin(), args.cend()});
|
||||
|
||||
testing::internal::CaptureStderr();
|
||||
LOG(WARNING) << "Test Log Warning.";
|
||||
@ -35,14 +35,14 @@ TEST(Logging, Basic) {
|
||||
ASSERT_NE(output.find("DEBUG"), std::string::npos);
|
||||
|
||||
args["verbosity"] = "1"; // warning
|
||||
ConsoleLogger::Configure(args.cbegin(), args.cend());
|
||||
ConsoleLogger::Configure({args.cbegin(), args.cend()});
|
||||
testing::internal::CaptureStderr();
|
||||
LOG(INFO) << "INFO should not be displayed when set to warning.";
|
||||
output = testing::internal::GetCapturedStderr();
|
||||
ASSERT_EQ(output.size(), 0);
|
||||
|
||||
args["silent"] = "True";
|
||||
ConsoleLogger::Configure(args.cbegin(), args.cend());
|
||||
ConsoleLogger::Configure({args.cbegin(), args.cend()});
|
||||
testing::internal::CaptureStderr();
|
||||
LOG(INFO) << "Test silent parameter.";
|
||||
output = testing::internal::GetCapturedStderr();
|
||||
@ -54,7 +54,7 @@ TEST(Logging, Basic) {
|
||||
ASSERT_NE(output.find("Test Log Console"), std::string::npos);
|
||||
|
||||
args["verbosity"] = "1"; // restore
|
||||
ConsoleLogger::Configure(args.cbegin(), args.cend());
|
||||
ConsoleLogger::Configure({args.cbegin(), args.cend()});
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
@ -1,12 +1,13 @@
|
||||
// Copyright by Contributors
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/logging.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
std::vector<std::pair<std::string, std::string>> args {{"verbosity", "2"}};
|
||||
xgboost::ConsoleLogger::Configure(args.begin(), args.end());
|
||||
xgboost::Args args {{"verbosity", "2"}};
|
||||
xgboost::ConsoleLogger::Configure(args);
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
testing::FLAGS_gtest_death_test_style = "threadsafe";
|
||||
return RUN_ALL_TESTS();
|
||||
|
||||
@ -16,7 +16,7 @@ TEST(GPUExact, Update) {
|
||||
std::vector<Arg> args{{"max_depth", "1"}};
|
||||
|
||||
auto* p_gpuexact_maker = TreeUpdater::Create("grow_gpu", &lparam);
|
||||
p_gpuexact_maker->Init(args);
|
||||
p_gpuexact_maker->Configure(args);
|
||||
|
||||
size_t constexpr kNRows = 4;
|
||||
size_t constexpr kNCols = 8;
|
||||
|
||||
@ -384,10 +384,11 @@ void TestHistogramIndexImpl(int n_gpus) {
|
||||
{"max_leaves", "0"}
|
||||
};
|
||||
|
||||
LearnerTrainParam learner_param(CreateEmptyGenericParam(0, n_gpus));
|
||||
hist_maker.Init(training_params, &learner_param);
|
||||
GenericParameter generic_param(CreateEmptyGenericParam(0, n_gpus));
|
||||
hist_maker.Configure(training_params, &generic_param);
|
||||
|
||||
hist_maker.InitDataOnce(hist_maker_dmat.get());
|
||||
hist_maker_ext.Init(training_params, &learner_param);
|
||||
hist_maker_ext.Configure(training_params, &generic_param);
|
||||
hist_maker_ext.InitDataOnce(hist_maker_ext_dmat.get());
|
||||
|
||||
ASSERT_EQ(hist_maker.shards_.size(), hist_maker_ext.shards_.size());
|
||||
|
||||
@ -37,7 +37,7 @@ TEST(Updater, Prune) {
|
||||
std::vector<RegTree*> trees {&tree};
|
||||
// prepare pruner
|
||||
std::unique_ptr<TreeUpdater> pruner(TreeUpdater::Create("prune", &lparam));
|
||||
pruner->Init(cfg);
|
||||
pruner->Configure(cfg);
|
||||
|
||||
// loss_chg < min_split_loss;
|
||||
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 0.0f, 0.0f);
|
||||
|
||||
@ -236,7 +236,7 @@ class QuantileHistMock : public QuantileHistMaker {
|
||||
explicit QuantileHistMock(
|
||||
const std::vector<std::pair<std::string, std::string> >& args) :
|
||||
cfg_{args} {
|
||||
QuantileHistMaker::Init(args);
|
||||
QuantileHistMaker::Configure(args);
|
||||
builder_.reset(
|
||||
new BuilderMock(
|
||||
param_,
|
||||
|
||||
@ -37,7 +37,7 @@ TEST(Updater, Refresh) {
|
||||
tree.Stat(cleft).base_weight = 1.2;
|
||||
tree.Stat(cright).base_weight = 1.3;
|
||||
|
||||
refresher->Init(cfg);
|
||||
refresher->Configure(cfg);
|
||||
refresher->Update(&gpair, dmat->get(), trees);
|
||||
|
||||
bst_float constexpr kEps = 1e-6;
|
||||
|
||||
@ -32,7 +32,7 @@ TEST(SplitEvaluator, Interaction) {
|
||||
}
|
||||
std::vector<int32_t> solutions{4, 5};
|
||||
ASSERT_EQ(accepted_features.size(), solutions.size());
|
||||
for (int32_t f = 0; f < accepted_features.size(); ++f) {
|
||||
for (size_t f = 0; f < accepted_features.size(); ++f) {
|
||||
ASSERT_EQ(accepted_features[f], solutions[f]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -154,7 +154,7 @@ class TestModels(unittest.TestCase):
|
||||
|
||||
def test_multi_eval_metric(self):
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 0,
|
||||
param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 1,
|
||||
'objective': 'binary:logistic'}
|
||||
param['eval_metric'] = ["auc", "logloss", 'error']
|
||||
evals_result = {}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user