Pass pointer to model parameters. (#5101)

* Pass pointer to model parameters.

This PR de-duplicates most of the model parameters except the one in
`tree_model.h`.  One difficulty is `base_score` is a model property but can be
changed at runtime by objective function.  Hence when performing model IO, we
need to save the one provided by users, instead of the one transformed by
objective.  Here we created an immutable version of `LearnerModelParam` that
represents the value of model parameter after configuration.
This commit is contained in:
Jiaming Yuan
2019-12-10 12:11:22 +08:00
committed by GitHub
parent 979f74d51a
commit e089e16e3d
33 changed files with 623 additions and 404 deletions

View File

@@ -136,9 +136,9 @@ std::string LoadSequentialFile(std::string fname) {
buffer.resize(fsize + 1);
fread(&buffer[0], 1, fsize, f);
buffer.back() = '\0';
fclose(f);
#endif // defined(__unix__)
buffer.back() = '\0';
return buffer;
}

View File

@@ -7,15 +7,18 @@
*/
#include <dmlc/omp.h>
#include <dmlc/parameter.h>
#include <xgboost/gbm.h>
#include <xgboost/logging.h>
#include <xgboost/linear_updater.h>
#include <vector>
#include <string>
#include <sstream>
#include <algorithm>
#include "xgboost/gbm.h"
#include "xgboost/json.h"
#include "xgboost/linear_updater.h"
#include "xgboost/logging.h"
#include "xgboost/learner.h"
#include "gblinear_model.h"
#include "../common/timer.h"
@@ -48,8 +51,10 @@ struct GBLinearTrainParam : public XGBoostParameter<GBLinearTrainParam> {
class GBLinear : public GradientBooster {
public:
explicit GBLinear(const std::vector<std::shared_ptr<DMatrix> > &cache,
bst_float base_margin)
: base_margin_(base_margin),
LearnerModelParam const* learner_model_param)
: learner_model_param_{learner_model_param},
model_{learner_model_param_},
previous_model_{learner_model_param_},
sum_instance_weight_(0),
sum_weight_complete_(false),
is_converged_(false) {
@@ -62,7 +67,7 @@ class GBLinear : public GradientBooster {
}
void Configure(const Args& cfg) override {
if (model_.weight.size() == 0) {
model_.param.InitAllowUnknown(cfg);
model_.Configure(cfg);
}
param_.UpdateAllowUnknown(cfg);
updater_.reset(LinearUpdater::Create(param_.updater, generic_param_));
@@ -116,11 +121,12 @@ class GBLinear : public GradientBooster {
}
// add base margin
void PredictInstance(const SparsePage::Inst &inst,
std::vector<bst_float> *out_preds,
unsigned ntree_limit) override {
const int ngroup = model_.param.num_output_group;
std::vector<bst_float> *out_preds,
unsigned ntree_limit) override {
const int ngroup = model_.learner_model_param_->num_output_group;
for (int gid = 0; gid < ngroup; ++gid) {
this->Pred(inst, dmlc::BeginPtr(*out_preds), gid, base_margin_);
this->Pred(inst, dmlc::BeginPtr(*out_preds), gid,
learner_model_param_->base_score);
}
}
@@ -138,8 +144,8 @@ class GBLinear : public GradientBooster {
CHECK_EQ(ntree_limit, 0U)
<< "GBLinear::PredictContribution: ntrees is only valid for gbtree predictor";
const auto& base_margin = p_fmat->Info().base_margin_.ConstHostVector();
const int ngroup = model_.param.num_output_group;
const size_t ncolumns = model_.param.num_feature + 1;
const int ngroup = model_.learner_model_param_->num_output_group;
const size_t ncolumns = model_.learner_model_param_->num_feature + 1;
// allocate space for (#features + bias) times #groups times #rows
std::vector<bst_float>& contribs = *out_contribs;
contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup);
@@ -149,35 +155,38 @@ class GBLinear : public GradientBooster {
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
// parallel over local batch
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) {
auto inst = batch[i];
auto inst = batch[i];
auto row_idx = static_cast<size_t>(batch.base_rowid + i);
// loop over output groups
for (int gid = 0; gid < ngroup; ++gid) {
bst_float *p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns];
// calculate linear terms' contributions
for (auto& ins : inst) {
if (ins.index >= model_.param.num_feature) continue;
if (ins.index >= model_.learner_model_param_->num_feature) continue;
p_contribs[ins.index] = ins.fvalue * model_[ins.index][gid];
}
// add base margin to BIAS
p_contribs[ncolumns - 1] = model_.bias()[gid] +
((base_margin.size() != 0) ? base_margin[row_idx * ngroup + gid] : base_margin_);
((base_margin.size() != 0) ? base_margin[row_idx * ngroup + gid] :
learner_model_param_->base_score);
}
}
}
}
void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
std::vector<bst_float>& contribs = *out_contribs;
std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
std::vector<bst_float>& contribs = *out_contribs;
// linear models have no interaction effects
const size_t nelements = model_.param.num_feature*model_.param.num_feature;
contribs.resize(p_fmat->Info().num_row_ * nelements * model_.param.num_output_group);
std::fill(contribs.begin(), contribs.end(), 0);
// linear models have no interaction effects
const size_t nelements = model_.learner_model_param_->num_feature *
model_.learner_model_param_->num_feature;
contribs.resize(p_fmat->Info().num_row_ * nelements *
model_.learner_model_param_->num_output_group);
std::fill(contribs.begin(), contribs.end(), 0);
}
std::vector<std::string> DumpModel(const FeatureMap& fmap,
@@ -196,26 +205,26 @@ class GBLinear : public GradientBooster {
protected:
void PredictBatchInternal(DMatrix *p_fmat,
std::vector<bst_float> *out_preds) {
std::vector<bst_float> *out_preds) {
monitor_.Start("PredictBatchInternal");
model_.LazyInitModel();
std::vector<bst_float> &preds = *out_preds;
const auto& base_margin = p_fmat->Info().base_margin_.ConstHostVector();
// start collecting the prediction
const int ngroup = model_.param.num_output_group;
const int ngroup = model_.learner_model_param_->num_output_group;
preds.resize(p_fmat->Info().num_row_ * ngroup);
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
// output convention: nrow * k, where nrow is number of rows
// k is number of group
// parallel over local batch
const auto nsize = static_cast<omp_ulong>(batch.Size());
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < nsize; ++i) {
const size_t ridx = batch.base_rowid + i;
// loop over output groups
for (int gid = 0; gid < ngroup; ++gid) {
bst_float margin = (base_margin.size() != 0) ?
base_margin[ridx * ngroup + gid] : base_margin_;
base_margin[ridx * ngroup + gid] : learner_model_param_->base_score;
this->Pred(batch[i], &preds[ridx * ngroup], gid, margin);
}
}
@@ -227,7 +236,7 @@ class GBLinear : public GradientBooster {
for (auto &kv : cache_) {
PredictionCacheEntry &e = kv.second;
if (e.predictions.size() == 0) {
size_t n = model_.param.num_output_group * e.data->Info().num_row_;
size_t n = model_.learner_model_param_->num_output_group * e.data->Info().num_row_;
e.predictions.resize(n);
}
this->PredictBatchInternal(e.data.get(), &e.predictions);
@@ -262,18 +271,18 @@ class GBLinear : public GradientBooster {
}
}
inline void Pred(const SparsePage::Inst &inst, bst_float *preds, int gid,
bst_float base) {
void Pred(const SparsePage::Inst &inst, bst_float *preds, int gid,
bst_float base) {
bst_float psum = model_.bias()[gid] + base;
for (const auto& ins : inst) {
if (ins.index >= model_.param.num_feature) continue;
if (ins.index >= model_.learner_model_param_->num_feature) continue;
psum += ins.fvalue * model_[ins.index][gid];
}
preds[gid] = psum;
}
// biase margin score
bst_float base_margin_;
LearnerModelParam const* learner_model_param_;
// model field
GBLinearModel model_;
GBLinearModel previous_model_;
@@ -302,14 +311,13 @@ class GBLinear : public GradientBooster {
};
// register the objective functions
DMLC_REGISTER_PARAMETER(GBLinearModelParam);
DMLC_REGISTER_PARAMETER(GBLinearTrainParam);
XGBOOST_REGISTER_GBM(GBLinear, "gblinear")
.describe("Linear booster, implement generalized linear model.")
.set_body([](const std::vector<std::shared_ptr<DMatrix> > &cache,
bst_float base_margin) {
return new GBLinear(cache, base_margin);
LearnerModelParam const* booster_config) {
return new GBLinear(cache, booster_config);
});
} // namespace gbm
} // namespace xgboost

View File

@@ -1,51 +1,65 @@
/*!
* Copyright by Contributors 2018
* Copyright 2018-2019 by Contributors
*/
#pragma once
#include <dmlc/io.h>
#include <dmlc/parameter.h>
#include <xgboost/base.h>
#include <xgboost/feature_map.h>
#include <xgboost/model.h>
#include <xgboost/learner.h>
#include <vector>
#include <string>
#include <cstring>
#include "xgboost/base.h"
#include "xgboost/feature_map.h"
#include "xgboost/model.h"
#include "xgboost/json.h"
#include "xgboost/parameter.h"
namespace xgboost {
class Json;
namespace gbm {
// model parameter
struct GBLinearModelParam : public dmlc::Parameter<GBLinearModelParam> {
// Deprecated in 1.0.0. model parameter. Only staying here for compatible binary model IO.
struct DeprecatedGBLinearModelParam : public dmlc::Parameter<DeprecatedGBLinearModelParam> {
// number of feature dimension
unsigned num_feature;
// number of output group
int num_output_group;
uint32_t deprecated_num_feature;
// deprecated. use learner_model_param_->num_output_group.
int32_t deprecated_num_output_group;
// reserved field
int reserved[32];
int32_t reserved[32];
// constructor
GBLinearModelParam() { std::memset(this, 0, sizeof(GBLinearModelParam)); }
DMLC_DECLARE_PARAMETER(GBLinearModelParam) {
DMLC_DECLARE_FIELD(num_feature)
.set_lower_bound(0)
.describe("Number of features used in classification.");
DMLC_DECLARE_FIELD(num_output_group)
.set_lower_bound(1)
.set_default(1)
.describe("Number of output groups in the setting.");
DeprecatedGBLinearModelParam() {
static_assert(sizeof(*this) == sizeof(int32_t) * 34,
"Model parameter size can not be changed.");
std::memset(this, 0, sizeof(DeprecatedGBLinearModelParam));
}
DMLC_DECLARE_PARAMETER(DeprecatedGBLinearModelParam) {}
};
// model for linear booster
class GBLinearModel : public Model {
private:
// Deprecated in 1.0.0
DeprecatedGBLinearModelParam param;
public:
// parameter
GBLinearModelParam param;
LearnerModelParam const* learner_model_param_;
public:
explicit GBLinearModel(LearnerModelParam const* learner_model_param) :
learner_model_param_ {learner_model_param} {}
void Configure(Args const &cfg) { }
// weight for each of feature, bias is the last one
std::vector<bst_float> weight;
// initialize the model parameter
inline void LazyInitModel() {
if (!weight.empty()) return;
if (!weight.empty())
return;
// bias is the last weight
weight.resize((param.num_feature + 1) * param.num_output_group);
weight.resize((learner_model_param_->num_feature + 1) *
learner_model_param_->num_output_group);
std::fill(weight.begin(), weight.end(), 0.0f);
}
// save the model to file
@@ -70,33 +84,37 @@ class GBLinearModel : public Model {
}
// model bias
inline bst_float* bias() {
return &weight[param.num_feature * param.num_output_group];
inline bst_float *bias() {
return &weight[learner_model_param_->num_feature *
learner_model_param_->num_output_group];
}
inline const bst_float* bias() const {
return &weight[param.num_feature * param.num_output_group];
inline const bst_float *bias() const {
return &weight[learner_model_param_->num_feature *
learner_model_param_->num_output_group];
}
// get i-th weight
inline bst_float* operator[](size_t i) {
return &weight[i * param.num_output_group];
inline bst_float *operator[](size_t i) {
return &weight[i * learner_model_param_->num_output_group];
}
inline const bst_float* operator[](size_t i) const {
return &weight[i * param.num_output_group];
inline const bst_float *operator[](size_t i) const {
return &weight[i * learner_model_param_->num_output_group];
}
std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
std::vector<std::string> DumpModel(const FeatureMap &fmap, bool with_stats,
std::string format) const {
const int ngroup = param.num_output_group;
const unsigned nfeature = param.num_feature;
const int ngroup = learner_model_param_->num_output_group;
const unsigned nfeature = learner_model_param_->num_feature;
std::stringstream fo("");
if (format == "json") {
fo << " { \"bias\": [" << std::endl;
for (int gid = 0; gid < ngroup; ++gid) {
if (gid != 0) fo << "," << std::endl;
if (gid != 0)
fo << "," << std::endl;
fo << " " << this->bias()[gid];
}
fo << std::endl << " ]," << std::endl
fo << std::endl
<< " ]," << std::endl
<< " \"weight\": [" << std::endl;
for (unsigned i = 0; i < nfeature; ++i) {
for (int gid = 0; gid < ngroup; ++gid) {

View File

@@ -3,8 +3,14 @@
* \file gbm.cc
* \brief Registry of gradient boosters.
*/
#include <xgboost/gbm.h>
#include <dmlc/registry.h>
#include <string>
#include <vector>
#include <memory>
#include "xgboost/gbm.h"
#include "xgboost/learner.h"
#include "xgboost/generic_parameters.h"
namespace dmlc {
DMLC_REGISTRY_ENABLE(::xgboost::GradientBoosterReg);
@@ -14,17 +20,16 @@ namespace xgboost {
GradientBooster* GradientBooster::Create(
const std::string& name,
GenericParameter const* generic_param,
const std::vector<std::shared_ptr<DMatrix> >& cache_mats,
bst_float base_margin) {
LearnerModelParam const* learner_model_param,
const std::vector<std::shared_ptr<DMatrix> >& cache_mats) {
auto *e = ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->Find(name);
if (e == nullptr) {
LOG(FATAL) << "Unknown gbm type " << name;
}
auto p_bst = (e->body)(cache_mats, base_margin);
auto p_bst = (e->body)(cache_mats, learner_model_param);
p_bst->generic_param_ = generic_param;
return p_bst;
}
} // namespace xgboost
namespace xgboost {

View File

@@ -14,8 +14,9 @@
#include <limits>
#include <algorithm>
#include "xgboost/logging.h"
#include "xgboost/gbm.h"
#include "xgboost/logging.h"
#include "xgboost/json.h"
#include "xgboost/predictor.h"
#include "xgboost/tree_updater.h"
#include "xgboost/host_device_vector.h"
@@ -26,7 +27,6 @@
#include "../common/random.h"
#include "../common/timer.h"
namespace xgboost {
namespace gbm {
@@ -161,10 +161,11 @@ void GBTree::ConfigureUpdaters() {
"single updater grow_quantile_histmaker.";
tparam_.updater_seq = "grow_quantile_histmaker";
break;
case TreeMethod::kGPUHist:
case TreeMethod::kGPUHist: {
this->AssertGPUSupport();
tparam_.updater_seq = "grow_gpu_hist";
break;
}
default:
LOG(FATAL) << "Unknown tree_method ("
<< static_cast<int>(tparam_.tree_method) << ") detected";
@@ -175,9 +176,10 @@ void GBTree::DoBoost(DMatrix* p_fmat,
HostDeviceVector<GradientPair>* in_gpair,
ObjFunction* obj) {
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
const int ngroup = model_.param.num_output_group;
const int ngroup = model_.learner_model_param_->num_output_group;
ConfigureWithKnownData(this->cfg_, p_fmat);
monitor_.Start("BoostNewTrees");
CHECK_NE(ngroup, 0);
if (ngroup == 1) {
std::vector<std::unique_ptr<RegTree> > ret;
BoostNewTrees(in_gpair, p_fmat, 0, &ret);
@@ -234,9 +236,11 @@ void GBTree::InitUpdater(Args const& cfg) {
LOG(FATAL) << ss.str();
}
}
// Do not push new updater in.
return;
}
// create new updaters
for (const std::string& pstr : ups) {
std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str(), generic_param_));
up->Configure(cfg);
@@ -255,7 +259,7 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
if (tparam_.process_type == TreeProcessType::kDefault) {
// create new tree
std::unique_ptr<RegTree> ptr(new RegTree());
ptr->param.InitAllowUnknown(this->cfg_);
ptr->param.UpdateAllowUnknown(this->cfg_);
new_trees.push_back(ptr.get());
ret->push_back(std::move(ptr));
} else if (tparam_.process_type == TreeProcessType::kUpdate) {
@@ -276,7 +280,7 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
monitor_.Start("CommitModel");
int num_new_trees = 0;
for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
for (uint32_t gid = 0; gid < model_.learner_model_param_->num_output_group; ++gid) {
num_new_trees += new_trees[gid].size();
model_.CommitModel(std::move(new_trees[gid]), gid);
}
@@ -289,7 +293,8 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
// dart
class Dart : public GBTree {
public:
explicit Dart(bst_float base_margin) : GBTree(base_margin) {}
explicit Dart(LearnerModelParam const* booster_config) :
GBTree(booster_config) {}
void Configure(const Args& cfg) override {
GBTree::Configure(cfg);
@@ -305,7 +310,6 @@ class Dart : public GBTree {
fi->Read(&weight_drop_);
}
}
void Save(dmlc::Stream* fo) const override {
GBTree::Save(fo);
if (weight_drop_.size() != 0) {
@@ -326,18 +330,18 @@ class Dart : public GBTree {
DropTrees(1);
if (thread_temp_.size() == 0) {
thread_temp_.resize(1, RegTree::FVec());
thread_temp_[0].Init(model_.param.num_feature);
thread_temp_[0].Init(model_.learner_model_param_->num_feature);
}
out_preds->resize(model_.param.num_output_group);
ntree_limit *= model_.param.num_output_group;
out_preds->resize(model_.learner_model_param_->num_output_group);
ntree_limit *= model_.learner_model_param_->num_output_group;
if (ntree_limit == 0 || ntree_limit > model_.trees.size()) {
ntree_limit = static_cast<unsigned>(model_.trees.size());
}
// loop over output groups
for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
for (uint32_t gid = 0; gid < model_.learner_model_param_->num_output_group; ++gid) {
(*out_preds)[gid] =
PredValue(inst, gid, &thread_temp_[0], 0, ntree_limit) +
model_.base_margin;
model_.learner_model_param_->base_score;
}
}
@@ -362,6 +366,7 @@ class Dart : public GBTree {
ntree_limit, &weight_drop_, approximate);
}
protected:
friend class GBTree;
// internal prediction loop
@@ -373,7 +378,7 @@ class Dart : public GBTree {
unsigned tree_begin,
unsigned ntree_limit,
bool init_out_preds) {
int num_group = model_.param.num_output_group;
int num_group = model_.learner_model_param_->num_output_group;
ntree_limit *= num_group;
if (ntree_limit == 0 || ntree_limit > model_.trees.size()) {
ntree_limit = static_cast<unsigned>(model_.trees.size());
@@ -388,17 +393,12 @@ class Dart : public GBTree {
CHECK_EQ(out_preds->size(), n);
std::copy(base_margin.begin(), base_margin.end(), out_preds->begin());
} else {
std::fill(out_preds->begin(), out_preds->end(), model_.base_margin);
std::fill(out_preds->begin(), out_preds->end(),
model_.learner_model_param_->base_score);
}
}
if (num_group == 1) {
PredLoopSpecalize<Derived>(p_fmat, out_preds, 1,
tree_begin, ntree_limit);
} else {
PredLoopSpecalize<Derived>(p_fmat, out_preds, num_group,
tree_begin, ntree_limit);
}
PredLoopSpecalize<Derived>(p_fmat, out_preds, num_group, tree_begin,
ntree_limit);
}
template<typename Derived>
@@ -409,7 +409,7 @@ class Dart : public GBTree {
unsigned tree_begin,
unsigned tree_end) {
const int nthread = omp_get_max_threads();
CHECK_EQ(num_group, model_.param.num_output_group);
CHECK_EQ(num_group, model_.learner_model_param_->num_output_group);
InitThreadTemp(nthread);
std::vector<bst_float>& preds = *out_preds;
CHECK_EQ(model_.param.size_leaf_vector, 0)
@@ -443,6 +443,7 @@ class Dart : public GBTree {
}
}
}
for (bst_omp_uint i = nsize - rest; i < nsize; ++i) {
RegTree::FVec& feats = thread_temp_[0];
const auto ridx = static_cast<int64_t>(batch.base_rowid + i);
@@ -461,7 +462,7 @@ class Dart : public GBTree {
void
CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) override {
int num_new_trees = 0;
for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
for (uint32_t gid = 0; gid < model_.learner_model_param_->num_output_group; ++gid) {
num_new_trees += new_trees[gid].size();
model_.CommitModel(std::move(new_trees[gid]), gid);
}
@@ -480,7 +481,7 @@ class Dart : public GBTree {
p_feats->Fill(inst);
for (size_t i = tree_begin; i < tree_end; ++i) {
if (model_.tree_info[i] == bst_group) {
bool drop = (std::binary_search(idx_drop_.begin(), idx_drop_.end(), i));
bool drop = std::binary_search(idx_drop_.begin(), idx_drop_.end(), i);
if (!drop) {
int tid = model_.trees[i]->GetLeafIndex(*p_feats);
psum += weight_drop_[i] * (*model_.trees[i])[tid].LeafValue();
@@ -577,7 +578,7 @@ class Dart : public GBTree {
if (prev_thread_temp_size < nthread) {
thread_temp_.resize(nthread, RegTree::FVec());
for (int i = prev_thread_temp_size; i < nthread; ++i) {
thread_temp_[i].Init(model_.param.num_feature);
thread_temp_[i].Init(model_.learner_model_param_->num_feature);
}
}
}
@@ -600,15 +601,17 @@ DMLC_REGISTER_PARAMETER(DartTrainParam);
XGBOOST_REGISTER_GBM(GBTree, "gbtree")
.describe("Tree booster, gradient boosted trees.")
.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats, bst_float base_margin) {
auto* p = new GBTree(base_margin);
.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats,
LearnerModelParam const* booster_config) {
auto* p = new GBTree(booster_config);
p->InitCache(cached_mats);
return p;
});
XGBOOST_REGISTER_GBM(Dart, "dart")
.describe("Tree booster, dart.")
.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats, bst_float base_margin) {
GBTree* p = new Dart(base_margin);
.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats,
LearnerModelParam const* booster_config) {
GBTree* p = new Dart(booster_config);
return p;
});
} // namespace gbm

View File

@@ -8,7 +8,6 @@
#define XGBOOST_GBM_GBTREE_H_
#include <dmlc/omp.h>
#include <dmlc/parameter.h>
#include <vector>
#include <map>
@@ -86,7 +85,6 @@ struct GBTreeTrainParam : public XGBoostParameter<GBTreeTrainParam> {
.add_enum("update", TreeProcessType::kUpdate)
.describe("Whether to run the normal boosting process that creates new trees,"\
" or to update the trees in an existing model.");
// add alias
DMLC_DECLARE_ALIAS(updater_seq, updater);
DMLC_DECLARE_FIELD(predictor)
.set_default(PredictorType::kAuto)
@@ -153,7 +151,7 @@ struct DartTrainParam : public XGBoostParameter<DartTrainParam> {
// gradient boosted trees
class GBTree : public GradientBooster {
public:
explicit GBTree(bst_float base_margin) : model_(base_margin) {}
explicit GBTree(LearnerModelParam const* booster_config) : model_(booster_config) {}
void InitCache(const std::vector<std::shared_ptr<DMatrix> > &cache) {
cache_ = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();
@@ -181,24 +179,21 @@ class GBTree : public GradientBooster {
tparam_.tree_method == TreeMethod::kGPUHist;
}
void Load(dmlc::Stream* fi) override {
model_.Load(fi);
this->cfg_.clear();
this->cfg_.emplace_back(std::string("num_feature"),
common::ToString(model_.param.num_feature));
}
GBTreeTrainParam const& GetTrainParam() const {
return tparam_;
}
void Load(dmlc::Stream* fi) override {
model_.Load(fi);
this->cfg_.clear();
}
void Save(dmlc::Stream* fo) const override {
model_.Save(fo);
}
bool AllowLazyCheckPoint() const override {
return model_.param.num_output_group == 1 ||
return model_.learner_model_param_->num_output_group == 1 ||
tparam_.updater_seq.find("distcol") != std::string::npos;
}

View File

@@ -1,11 +1,16 @@
/*!
* Copyright by Contributors 2017
* Copyright 2017-2019 by Contributors
* \file gbtree_model.h
*/
#pragma once
#ifndef XGBOOST_GBM_GBTREE_MODEL_H_
#define XGBOOST_GBM_GBTREE_MODEL_H_
#include <dmlc/parameter.h>
#include <dmlc/io.h>
#include <xgboost/model.h>
#include <xgboost/tree_model.h>
#include <xgboost/parameter.h>
#include <xgboost/learner.h>
#include <memory>
#include <utility>
@@ -13,45 +18,42 @@
#include <vector>
namespace xgboost {
class Json;
namespace gbm {
/*! \brief model parameters */
struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
public:
/*! \brief number of trees */
int num_trees;
int32_t num_trees;
/*! \brief (Deprecated) number of roots */
int deprecated_num_roots;
int32_t deprecated_num_roots;
/*! \brief number of features to be used by trees */
int num_feature;
int32_t deprecated_num_feature;
/*! \brief pad this space, for backward compatibility reason.*/
int pad_32bit;
int32_t pad_32bit;
/*! \brief deprecated padding space. */
int64_t num_pbuffer_deprecated;
/*!
* \brief how many output group a single instance can produce
* this affects the behavior of number of output we have:
* suppose we have n instance and k group, output will be k * n
*/
int num_output_group;
int64_t deprecated_num_pbuffer;
// deprecated. use learner_model_param_->num_output_group.
int32_t deprecated_num_output_group;
/*! \brief size of leaf vector needed in tree */
int size_leaf_vector;
int32_t size_leaf_vector;
/*! \brief reserved parameters */
int reserved[32];
int32_t reserved[32];
/*! \brief constructor */
GBTreeModelParam() {
std::memset(this, 0, sizeof(GBTreeModelParam));
static_assert(sizeof(GBTreeModelParam) == (4 + 2 + 2 + 32) * sizeof(int),
std::memset(this, 0, sizeof(GBTreeModelParam)); // FIXME(trivialfis): Why?
static_assert(sizeof(GBTreeModelParam) == (4 + 2 + 2 + 32) * sizeof(int32_t),
"64/32 bit compatibility issue");
}
// declare parameters, only declare those that need to be set.
DMLC_DECLARE_PARAMETER(GBTreeModelParam) {
DMLC_DECLARE_FIELD(num_output_group)
.set_lower_bound(1)
.set_default(1)
.describe(
"Number of output groups to be predicted,"
" used for multi-class classification.");
DMLC_DECLARE_FIELD(num_feature)
DMLC_DECLARE_FIELD(num_trees)
.set_lower_bound(0)
.set_default(0)
.describe("Number of features used for training and prediction.");
DMLC_DECLARE_FIELD(size_leaf_vector)
.set_lower_bound(0)
@@ -61,11 +63,13 @@ struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
};
struct GBTreeModel : public Model {
explicit GBTreeModel(bst_float base_margin) : base_margin(base_margin) {}
public:
explicit GBTreeModel(LearnerModelParam const* learner_model_param) :
learner_model_param_{learner_model_param} {}
void Configure(const Args& cfg) {
// initialize model parameters if not yet been initialized.
if (trees.size() == 0) {
param.InitAllowUnknown(cfg);
param.UpdateAllowUnknown(cfg);
}
}
@@ -136,7 +140,7 @@ struct GBTreeModel : public Model {
}
// base margin
bst_float base_margin;
LearnerModelParam const* learner_model_param_;
// model parameter
GBTreeModelParam param;
/*! \brief vector of trees stored in the model */
@@ -148,3 +152,5 @@ struct GBTreeModel : public Model {
};
} // namespace gbm
} // namespace xgboost
#endif // XGBOOST_GBM_GBTREE_MODEL_H_

View File

@@ -16,17 +16,23 @@
#include <utility>
#include <vector>
#include "xgboost/feature_map.h"
#include "xgboost/learner.h"
#include "xgboost/base.h"
#include "xgboost/parameter.h"
#include "xgboost/logging.h"
#include "xgboost/feature_map.h"
#include "xgboost/gbm.h"
#include "xgboost/generic_parameters.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/json.h"
#include "xgboost/learner.h"
#include "xgboost/logging.h"
#include "xgboost/metric.h"
#include "xgboost/objective.h"
#include "xgboost/parameter.h"
#include "common/common.h"
#include "common/io.h"
#include "common/random.h"
#include "common/timer.h"
#include "common/version.h"
namespace {
@@ -69,8 +75,15 @@ bool Learner::AllowLazyCheckPoint() const {
return gbm_->AllowLazyCheckPoint();
}
/*! \brief training parameter for regression */
struct LearnerModelParam : public dmlc::Parameter<LearnerModelParam> {
Learner::~Learner() = default;
/*! \brief training parameter for regression
*
* Should be deprecated, but still used for being compatible with binary IO.
* Once it's gone, `LearnerModelParam` should handle transforming `base_margin`
* with objective by itself.
*/
struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy> {
/* \brief global bias */
bst_float base_score;
/* \brief number of features */
@@ -84,12 +97,28 @@ struct LearnerModelParam : public dmlc::Parameter<LearnerModelParam> {
/*! \brief reserved field */
int reserved[29];
/*! \brief constructor */
LearnerModelParam() {
std::memset(this, 0, sizeof(LearnerModelParam));
LearnerModelParamLegacy() {
std::memset(this, 0, sizeof(LearnerModelParamLegacy));
base_score = 0.5f;
}
// Skip other legacy fields.
Json ToJson() const {
Object obj;
obj["base_score"] = std::to_string(base_score);
obj["num_feature"] = std::to_string(num_feature);
obj["num_class"] = std::to_string(num_class);
return Json(std::move(obj));
}
void FromJson(Json const& obj) {
auto const& j_param = get<Object const>(obj);
std::map<std::string, std::string> m;
m["base_score"] = get<String const>(j_param.at("base_score"));
m["num_feature"] = get<String const>(j_param.at("num_feature"));
m["num_class"] = get<String const>(j_param.at("num_class"));
this->Init(m);
}
// declare parameters
DMLC_DECLARE_PARAMETER(LearnerModelParam) {
DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {
DMLC_DECLARE_FIELD(base_score)
.set_default(0.5f)
.describe("Global bias of the model.");
@@ -104,12 +133,20 @@ struct LearnerModelParam : public dmlc::Parameter<LearnerModelParam> {
}
};
LearnerModelParam::LearnerModelParam(
LearnerModelParamLegacy const &user_param, float base_margin)
: base_score{base_margin}, num_feature{user_param.num_feature},
num_output_group{user_param.num_class == 0
? 1
: static_cast<uint32_t>(user_param.num_class)} {}
struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
// data split mode, can be row, col, or none.
DataSplitMode dsplit;
// flag to disable default metric
int disable_default_eval_metric;
// FIXME(trivialfis): The following parameters belong to model itself, but can be
// specified by users. Move them to model parameter once we can get rid of binary IO.
std::string booster;
std::string objective;
@@ -134,7 +171,7 @@ struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
};
DMLC_REGISTER_PARAMETER(LearnerModelParam);
DMLC_REGISTER_PARAMETER(LearnerModelParamLegacy);
DMLC_REGISTER_PARAMETER(LearnerTrainParam);
DMLC_REGISTER_PARAMETER(GenericParameter);
@@ -142,14 +179,7 @@ int constexpr GenericParameter::kCpuId;
void GenericParameter::ConfigureGpuId(bool require_gpu) {
#if defined(XGBOOST_USE_CUDA)
int32_t n_visible = common::AllVisibleGPUs();
if (n_visible == 0) {
// Running XGBoost compiled with CUDA on CPU only machine.
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
return;
}
if (this->gpu_id == kCpuId) { // 0. User didn't specify the `gpu_id'
if (gpu_id == kCpuId) { // 0. User didn't specify the `gpu_id'
if (require_gpu) { // 1. `tree_method' or `predictor' or both are using
// GPU.
// 2. Use device 0 as default.
@@ -159,7 +189,10 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
// 3. When booster is loaded from a memory image (Python pickle or R
// raw model), number of available GPUs could be different. Wrap around it.
if (this->gpu_id != kCpuId && this->gpu_id >= n_visible) {
int32_t n_gpus = common::AllVisibleGPUs();
if (n_gpus == 0) {
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
} else if (gpu_id != kCpuId && gpu_id >= n_gpus) {
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(gpu_id % n_gpus)}});
}
#else
@@ -175,25 +208,25 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
class LearnerImpl : public Learner {
public:
explicit LearnerImpl(std::vector<std::shared_ptr<DMatrix> > cache)
: configured_{false}, cache_(std::move(cache)) {
: need_configuration_{true}, cache_(std::move(cache)) {
monitor_.Init("Learner");
}
// Configuration before data is known.
void Configure() override {
if (configured_) { return; }
if (!this->need_configuration_) { return; }
monitor_.Start("Configure");
auto old_tparam = tparam_;
Args args = {cfg_.cbegin(), cfg_.cend()};
tparam_.UpdateAllowUnknown(args);
generic_param_.UpdateAllowUnknown(args);
generic_param_.CheckDeprecated();
mparam_.UpdateAllowUnknown(args);
generic_parameters_.UpdateAllowUnknown(args);
generic_parameters_.CheckDeprecated();
ConsoleLogger::Configure(args);
if (generic_param_.nthread != 0) {
omp_set_num_threads(generic_param_.nthread);
if (generic_parameters_.nthread != 0) {
omp_set_num_threads(generic_parameters_.nthread);
}
// add additional parameters
@@ -202,9 +235,9 @@ class LearnerImpl : public Learner {
tparam_.dsplit = DataSplitMode::kRow;
}
mparam_.InitAllowUnknown(args);
// set seed only before the model is initialized
common::GlobalRandom().seed(generic_param_.seed);
common::GlobalRandom().seed(generic_parameters_.seed);
// must precede configure gbm since num_features is required for gbm
this->ConfigureNumFeatures();
args = {cfg_.cbegin(), cfg_.cend()}; // renew
@@ -212,9 +245,12 @@ class LearnerImpl : public Learner {
this->ConfigureGBM(old_tparam, args);
this->ConfigureMetrics(args);
generic_param_.ConfigureGpuId(this->gbm_->UseGPU());
generic_parameters_.ConfigureGpuId(this->gbm_->UseGPU());
this->configured_ = true;
learner_model_param_ = LearnerModelParam(mparam_,
obj_->ProbToMargin(mparam_.base_score));
this->need_configuration_ = false;
monitor_.Stop("Configure");
}
@@ -241,7 +277,7 @@ class LearnerImpl : public Learner {
}
void Load(dmlc::Stream* fi) override {
generic_param_.UpdateAllowUnknown(Args{});
generic_parameters_.UpdateAllowUnknown(Args{});
tparam_.Init(std::vector<std::pair<std::string, std::string>>{});
// TODO(tqchen) mark deprecation of old format.
common::PeekableInStream fp(fi);
@@ -279,9 +315,9 @@ class LearnerImpl : public Learner {
}
CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
// duplicated code with LazyInitModel
obj_.reset(ObjFunction::Create(tparam_.objective, &generic_param_));
gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_param_,
cache_, mparam_.base_score));
obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
&learner_model_param_, cache_));
gbm_->Load(fi);
if (mparam_.contain_extra_attrs != 0) {
std::vector<std::pair<std::string, std::string> > attr;
@@ -340,7 +376,7 @@ class LearnerImpl : public Learner {
std::vector<std::string> metr;
fi->Read(&metr);
for (auto name : metr) {
metrics_.emplace_back(Metric::Create(name, &generic_param_));
metrics_.emplace_back(Metric::Create(name, &generic_parameters_));
}
}
@@ -351,7 +387,7 @@ class LearnerImpl : public Learner {
cfg_.insert(n.cbegin(), n.cend());
Args args = {cfg_.cbegin(), cfg_.cend()};
generic_param_.UpdateAllowUnknown(args);
generic_parameters_.UpdateAllowUnknown(args);
gbm_->Configure(args);
obj_->Configure({cfg_.begin(), cfg_.end()});
@@ -364,13 +400,14 @@ class LearnerImpl : public Learner {
tparam_.dsplit = DataSplitMode::kRow;
}
this->generic_param_.ConfigureGpuId(gbm_->UseGPU());
this->configured_ = true;
// There's no logic for state machine for binary IO, as it has a mix of everything and
// half loaded model.
this->Configure();
}
// rabit save model to rabit checkpoint
void Save(dmlc::Stream* fo) const override {
if (!this->configured_) {
if (this->need_configuration_) {
// Save empty model. Calling Configure in a dummy LearnerImpl avoids violating
// constness.
LearnerImpl empty(std::move(this->cache_));
@@ -383,7 +420,7 @@ class LearnerImpl : public Learner {
return;
}
LearnerModelParam mparam = mparam_; // make a copy to potentially modify
LearnerModelParamLegacy mparam = mparam_; // make a copy to potentially modify
std::vector<std::pair<std::string, std::string> > extra_attr;
// extra attributed to be added just before saving
if (tparam_.objective == "count:poisson") {
@@ -419,11 +456,12 @@ class LearnerImpl : public Learner {
return it.first == "SAVED_PARAM_gpu_id";
})) {
mparam.contain_extra_attrs = 1;
extra_attr.emplace_back("SAVED_PARAM_gpu_id", std::to_string(generic_param_.gpu_id));
extra_attr.emplace_back("SAVED_PARAM_gpu_id",
std::to_string(generic_parameters_.gpu_id));
}
}
#endif // defined(XGBOOST_USE_CUDA)
fo->Write(&mparam, sizeof(LearnerModelParam));
fo->Write(&mparam, sizeof(LearnerModelParamLegacy));
fo->Write(tparam_.objective);
fo->Write(tparam_.booster);
gbm_->Save(fo);
@@ -459,14 +497,16 @@ class LearnerImpl : public Learner {
std::vector<std::string> DumpModel(const FeatureMap& fmap,
bool with_stats,
std::string format) const override {
CHECK(!this->need_configuration_)
<< "The model hasn't been built yet. Are you using raw Booster interface?";
return gbm_->DumpModel(fmap, with_stats, format);
}
void UpdateOneIter(int iter, DMatrix* train) override {
monitor_.Start("UpdateOneIter");
this->Configure();
if (generic_param_.seed_per_iteration || rabit::IsDistributed()) {
common::GlobalRandom().seed(generic_param_.seed * kRandSeedMagic + iter);
if (generic_parameters_.seed_per_iteration || rabit::IsDistributed()) {
common::GlobalRandom().seed(generic_parameters_.seed * kRandSeedMagic + iter);
}
this->CheckDataSplitMode();
this->ValidateDMatrix(train);
@@ -485,8 +525,8 @@ class LearnerImpl : public Learner {
HostDeviceVector<GradientPair>* in_gpair) override {
monitor_.Start("BoostOneIter");
this->Configure();
if (generic_param_.seed_per_iteration || rabit::IsDistributed()) {
common::GlobalRandom().seed(generic_param_.seed * kRandSeedMagic + iter);
if (generic_parameters_.seed_per_iteration || rabit::IsDistributed()) {
common::GlobalRandom().seed(generic_parameters_.seed * kRandSeedMagic + iter);
}
this->CheckDataSplitMode();
this->ValidateDMatrix(train);
@@ -503,7 +543,7 @@ class LearnerImpl : public Learner {
std::ostringstream os;
os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
if (metrics_.size() == 0 && tparam_.disable_default_eval_metric <= 0) {
metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &generic_param_));
metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &generic_parameters_));
metrics_.back()->Configure({cfg_.begin(), cfg_.end()});
}
for (size_t i = 0; i < data_sets.size(); ++i) {
@@ -523,7 +563,7 @@ class LearnerImpl : public Learner {
}
void SetParam(const std::string& key, const std::string& value) override {
configured_ = false;
this->need_configuration_ = true;
if (key == kEvalMetric) {
if (std::find(metric_names_.cbegin(), metric_names_.cend(),
value) == metric_names_.cend()) {
@@ -535,7 +575,6 @@ class LearnerImpl : public Learner {
}
// Short hand for setting multiple parameters
void SetParams(std::vector<std::pair<std::string, std::string>> const& args) override {
configured_ = false;
for (auto const& kv : args) {
this->SetParam(kv.first, kv.second);
}
@@ -569,7 +608,7 @@ class LearnerImpl : public Learner {
}
GenericParameter const& GetGenericParameter() const override {
return generic_param_;
return generic_parameters_;
}
void Predict(DMatrix* data, bool output_margin,
@@ -617,6 +656,7 @@ class LearnerImpl : public Learner {
}
void ConfigureObjective(LearnerTrainParam const& old, Args* p_args) {
// Once binary IO is gone, NONE of these config is useful.
if (cfg_.find("num_class") != cfg_.cend() && cfg_.at("num_class") != "0") {
cfg_["num_output_group"] = cfg_["num_class"];
if (atoi(cfg_["num_class"].c_str()) > 1 && cfg_.count("objective") == 0) {
@@ -627,13 +667,13 @@ class LearnerImpl : public Learner {
if (cfg_.find("max_delta_step") == cfg_.cend() &&
cfg_.find("objective") != cfg_.cend() &&
tparam_.objective == "count:poisson") {
// max_delta_step is a duplicated parameter in Poisson regression and tree param.
// Rename one of them once binary IO is gone.
cfg_["max_delta_step"] = kMaxDeltaStepDefaultValue;
}
if (obj_ == nullptr || tparam_.objective != old.objective) {
obj_.reset(ObjFunction::Create(tparam_.objective, &generic_param_));
obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
}
// reset the base score
mparam_.base_score = obj_->ProbToMargin(mparam_.base_score);
auto& args = *p_args;
args = {cfg_.cbegin(), cfg_.cend()}; // renew
obj_->Configure(args);
@@ -645,7 +685,7 @@ class LearnerImpl : public Learner {
return m->Name() != name;
};
if (std::all_of(metrics_.begin(), metrics_.end(), DupCheck)) {
metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &generic_param_)));
metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &generic_parameters_)));
mparam_.contain_eval_metrics = 1;
}
}
@@ -656,8 +696,8 @@ class LearnerImpl : public Learner {
void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {
if (gbm_ == nullptr || old.booster != tparam_.booster) {
gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_param_,
cache_, mparam_.base_score));
gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
&learner_model_param_, cache_));
}
gbm_->Configure(args);
}
@@ -682,7 +722,8 @@ class LearnerImpl : public Learner {
}
CHECK_NE(mparam_.num_feature, 0)
<< "0 feature is supplied. Are you using raw Booster interface?";
// setup
learner_model_param_.num_feature = mparam_.num_feature;
// Remove these once binary IO is gone.
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
cfg_["num_class"] = common::ToString(mparam_.num_class);
}
@@ -701,7 +742,8 @@ class LearnerImpl : public Learner {
}
// model parameter
LearnerModelParam mparam_;
LearnerModelParamLegacy mparam_;
LearnerModelParam learner_model_param_;
LearnerTrainParam tparam_;
// configurations
std::map<std::string, std::string> cfg_;
@@ -713,8 +755,7 @@ class LearnerImpl : public Learner {
std::map<DMatrix*, HostDeviceVector<bst_float>> preds_;
// gradient pairs
HostDeviceVector<GradientPair> gpair_;
bool configured_;
bool need_configuration_;
private:
/*! \brief random number transformation seed. */

View File

@@ -252,7 +252,7 @@ class CyclicFeatureSelector : public FeatureSelector {
int NextFeature(int iteration, const gbm::GBLinearModel &model,
int group_idx, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda) override {
return iteration % model.param.num_feature;
return iteration % model.learner_model_param_->num_feature;
}
};
@@ -266,7 +266,7 @@ class ShuffleFeatureSelector : public FeatureSelector {
const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda, int param) override {
if (feat_index_.size() == 0) {
feat_index_.resize(model.param.num_feature);
feat_index_.resize(model.learner_model_param_->num_feature);
std::iota(feat_index_.begin(), feat_index_.end(), 0);
}
std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom());
@@ -275,7 +275,7 @@ class ShuffleFeatureSelector : public FeatureSelector {
int NextFeature(int iteration, const gbm::GBLinearModel &model,
int group_idx, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda) override {
return feat_index_[iteration % model.param.num_feature];
return feat_index_[iteration % model.learner_model_param_->num_feature];
}
protected:
@@ -291,7 +291,7 @@ class RandomFeatureSelector : public FeatureSelector {
int NextFeature(int iteration, const gbm::GBLinearModel &model,
int group_idx, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda) override {
return common::GlobalRandom()() % model.param.num_feature;
return common::GlobalRandom()() % model.learner_model_param_->num_feature;
}
};
@@ -310,11 +310,11 @@ class GreedyFeatureSelector : public FeatureSelector {
const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda, int param) override {
top_k_ = static_cast<bst_uint>(param);
const bst_uint ngroup = model.param.num_output_group;
const bst_uint ngroup = model.learner_model_param_->num_output_group;
if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
if (counter_.size() == 0) {
counter_.resize(ngroup);
gpair_sums_.resize(model.param.num_feature * ngroup);
gpair_sums_.resize(model.learner_model_param_->num_feature * ngroup);
}
for (bst_uint gid = 0u; gid < ngroup; ++gid) {
counter_[gid] = 0u;
@@ -327,10 +327,10 @@ class GreedyFeatureSelector : public FeatureSelector {
// k-th selected feature for a group
auto k = counter_[group_idx]++;
// stop after either reaching top-K or going through all the features in a group
if (k >= top_k_ || counter_[group_idx] == model.param.num_feature) return -1;
if (k >= top_k_ || counter_[group_idx] == model.learner_model_param_->num_feature) return -1;
const int ngroup = model.param.num_output_group;
const bst_omp_uint nfeat = model.param.num_feature;
const int ngroup = model.learner_model_param_->num_output_group;
const bst_omp_uint nfeat = model.learner_model_param_->num_feature;
// Calculate univariate gradient sums
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
@@ -387,8 +387,8 @@ class ThriftyFeatureSelector : public FeatureSelector {
DMatrix *p_fmat, float alpha, float lambda, int param) override {
top_k_ = static_cast<bst_uint>(param);
if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
const bst_uint ngroup = model.param.num_output_group;
const bst_omp_uint nfeat = model.param.num_feature;
const bst_uint ngroup = model.learner_model_param_->num_output_group;
const bst_omp_uint nfeat = model.learner_model_param_->num_feature;
if (deltaw_.size() == 0) {
deltaw_.resize(nfeat * ngroup);
@@ -444,9 +444,9 @@ class ThriftyFeatureSelector : public FeatureSelector {
// k-th selected feature for a group
auto k = counter_[group_idx]++;
// stop after either reaching top-N or going through all the features in a group
if (k >= top_k_ || counter_[group_idx] == model.param.num_feature) return -1;
if (k >= top_k_ || counter_[group_idx] == model.learner_model_param_->num_feature) return -1;
// note that sorted_idx stores the "long" indices
const size_t grp_offset = group_idx * model.param.num_feature;
const size_t grp_offset = group_idx * model.learner_model_param_->num_feature;
return static_cast<int>(sorted_idx_[grp_offset + k] - grp_offset);
}

View File

@@ -35,7 +35,7 @@ class CoordinateUpdater : public LinearUpdater {
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
gbm::GBLinearModel *model, double sum_instance_weight) override {
tparam_.DenormalizePenalties(sum_instance_weight);
const int ngroup = model->param.num_output_group;
const int ngroup = model->learner_model_param_->num_output_group;
// update bias
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
auto grad = GetBiasGradientParallel(group_idx, ngroup,
@@ -52,7 +52,7 @@ class CoordinateUpdater : public LinearUpdater {
tparam_.reg_lambda_denorm, cparam_.top_k);
// update weights
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
for (unsigned i = 0U; i < model->param.num_feature; i++) {
for (unsigned i = 0U; i < model->learner_model_param_->num_feature; i++) {
int fidx = selector_->NextFeature
(i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
@@ -65,7 +65,7 @@ class CoordinateUpdater : public LinearUpdater {
inline void UpdateFeature(int fidx, int group_idx, std::vector<GradientPair> *in_gpair,
DMatrix *p_fmat, gbm::GBLinearModel *model) {
const int ngroup = model->param.num_output_group;
const int ngroup = model->learner_model_param_->num_output_group;
bst_float &w = (*model)[fidx][group_idx];
auto gradient =
GetGradientParallel(group_idx, ngroup, fidx, *in_gpair, p_fmat);

View File

@@ -41,7 +41,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
monitor_.Init("GPUCoordinateUpdater");
}
void LazyInitDevice(DMatrix *p_fmat, const gbm::GBLinearModelParam &model_param) {
void LazyInitDevice(DMatrix *p_fmat, const LearnerModelParam &model_param) {
if (learner_param_->gpu_id < 0) return;
num_row_ = static_cast<size_t>(p_fmat->Info().num_row_);
@@ -88,14 +88,14 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
gbm::GBLinearModel *model, double sum_instance_weight) override {
tparam_.DenormalizePenalties(sum_instance_weight);
monitor_.Start("LazyInitDevice");
this->LazyInitDevice(p_fmat, model->param);
this->LazyInitDevice(p_fmat, *(model->learner_model_param_));
monitor_.Stop("LazyInitDevice");
monitor_.Start("UpdateGpair");
auto &in_gpair_host = in_gpair->ConstHostVector();
// Update gpair
if (learner_param_->gpu_id >= 0) {
this->UpdateGpair(in_gpair_host, model->param);
this->UpdateGpair(in_gpair_host);
}
monitor_.Stop("UpdateGpair");
@@ -107,8 +107,9 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm,
coord_param_.top_k);
monitor_.Start("UpdateFeature");
for (auto group_idx = 0; group_idx < model->param.num_output_group; ++group_idx) {
for (auto i = 0U; i < model->param.num_feature; i++) {
for (auto group_idx = 0; group_idx < model->learner_model_param_->num_output_group;
++group_idx) {
for (auto i = 0U; i < model->learner_model_param_->num_feature; i++) {
auto fidx = selector_->NextFeature(
i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
@@ -120,11 +121,12 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
}
void UpdateBias(DMatrix *p_fmat, gbm::GBLinearModel *model) {
for (int group_idx = 0; group_idx < model->param.num_output_group; ++group_idx) {
for (int group_idx = 0; group_idx < model->learner_model_param_->num_output_group;
++group_idx) {
// Get gradient
auto grad = GradientPair(0, 0);
if (learner_param_->gpu_id >= 0) {
grad = GetBiasGradient(group_idx, model->param.num_output_group);
grad = GetBiasGradient(group_idx, model->learner_model_param_->num_output_group);
}
auto dbias = static_cast<float>(
tparam_.learning_rate *
@@ -133,7 +135,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
// Update residual
if (learner_param_->gpu_id >= 0) {
UpdateBiasResidual(dbias, group_idx, model->param.num_output_group);
UpdateBiasResidual(dbias, group_idx, model->learner_model_param_->num_output_group);
}
}
}
@@ -145,7 +147,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
// Get gradient
auto grad = GradientPair(0, 0);
if (learner_param_->gpu_id >= 0) {
grad = GetGradient(group_idx, model->param.num_output_group, fidx);
grad = GetGradient(group_idx, model->learner_model_param_->num_output_group, fidx);
}
auto dw = static_cast<float>(tparam_.learning_rate *
CoordinateDelta(grad.GetGrad(), grad.GetHess(),
@@ -154,7 +156,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
w += dw;
if (learner_param_->gpu_id >= 0) {
UpdateResidual(dw, group_idx, model->param.num_output_group, fidx);
UpdateResidual(dw, group_idx, model->learner_model_param_->num_output_group, fidx);
}
}
@@ -217,8 +219,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
return num_row_ == 0;
}
void UpdateGpair(const std::vector<GradientPair> &host_gpair,
const gbm::GBLinearModelParam &model_param) {
void UpdateGpair(const std::vector<GradientPair> &host_gpair) {
dh::safe_cuda(cudaMemcpyAsync(
gpair_.data(),
host_gpair.data(),

View File

@@ -27,7 +27,7 @@ class ShotgunUpdater : public LinearUpdater {
gbm::GBLinearModel *model, double sum_instance_weight) override {
auto &gpair = in_gpair->HostVector();
param_.DenormalizePenalties(sum_instance_weight);
const int ngroup = model->param.num_output_group;
const int ngroup = model->learner_model_param_->num_output_group;
// update bias
for (int gid = 0; gid < ngroup; ++gid) {

View File

@@ -49,9 +49,9 @@ class CPUPredictor : public Predictor {
void PredLoopInternal(DMatrix* p_fmat, std::vector<bst_float>* out_preds,
gbm::GBTreeModel const& model, int32_t tree_begin,
int32_t tree_end) {
int32_t const num_group = model.param.num_output_group;
int32_t const num_group = model.learner_model_param_->num_output_group;
const int nthread = omp_get_max_threads();
InitThreadTemp(nthread, model.param.num_feature);
InitThreadTemp(nthread, model.learner_model_param_->num_feature);
std::vector<bst_float>& preds = *out_preds;
CHECK_EQ(model.param.size_leaf_vector, 0)
<< "size_leaf_vector is enforced to 0 so far";
@@ -108,7 +108,7 @@ class CPUPredictor : public Predictor {
unsigned ntree_limit) const {
CHECK(cache_);
if (ntree_limit == 0 ||
ntree_limit * model.param.num_output_group >= model.trees.size()) {
ntree_limit * model.learner_model_param_->num_output_group >= model.trees.size()) {
auto it = cache_->find(dmat);
if (it != cache_->end()) {
const HostDeviceVector<bst_float>& y = it->second.predictions;
@@ -126,8 +126,8 @@ class CPUPredictor : public Predictor {
void InitOutPredictions(const MetaInfo& info,
HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model) const {
CHECK_NE(model.param.num_output_group, 0);
size_t n = model.param.num_output_group * info.num_row_;
CHECK_NE(model.learner_model_param_->num_output_group, 0);
size_t n = model.learner_model_param_->num_output_group * info.num_row_;
const auto& base_margin = info.base_margin_.HostVector();
out_preds->Resize(n);
std::vector<bst_float>& out_preds_h = out_preds->HostVector();
@@ -139,19 +139,19 @@ class CPUPredictor : public Predictor {
std::ostringstream oss;
oss << "Ignoring the base margin, since it has incorrect length. "
<< "The base margin must be an array of length ";
if (model.param.num_output_group > 1) {
if (model.learner_model_param_->num_output_group > 1) {
oss << "[num_class] * [number of data points], i.e. "
<< model.param.num_output_group << " * " << info.num_row_
<< model.learner_model_param_->num_output_group << " * " << info.num_row_
<< " = " << n << ". ";
} else {
oss << "[number of data points], i.e. " << info.num_row_ << ". ";
}
oss << "Instead, all data points will use "
<< "base_margin = " << model.base_margin;
<< "base_score = " << model.learner_model_param_->base_score;
LOG(WARNING) << oss.str();
}
std::fill(out_preds_h.begin(), out_preds_h.end(),
model.base_margin);
model.learner_model_param_->base_score);
}
}
@@ -167,7 +167,7 @@ class CPUPredictor : public Predictor {
}
this->InitOutPredictions(dmat->Info(), out_preds, model);
ntree_limit *= model.param.num_output_group;
ntree_limit *= model.learner_model_param_->num_output_group;
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
ntree_limit = static_cast<unsigned>(model.trees.size());
}
@@ -198,7 +198,7 @@ class CPUPredictor : public Predictor {
InitOutPredictions(e.data->Info(), &(e.predictions), model);
PredLoopInternal(e.data.get(), &(e.predictions.HostVector()), model, 0,
model.trees.size());
} else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
} else if (model.learner_model_param_->num_output_group == 1 && updaters->size() > 0 &&
num_new_trees == 1 &&
updaters->back()->UpdatePredictionCache(e.data.get(),
&(e.predictions))) {
@@ -215,29 +215,29 @@ class CPUPredictor : public Predictor {
const gbm::GBTreeModel& model, unsigned ntree_limit) override {
if (thread_temp.size() == 0) {
thread_temp.resize(1, RegTree::FVec());
thread_temp[0].Init(model.param.num_feature);
thread_temp[0].Init(model.learner_model_param_->num_feature);
}
ntree_limit *= model.param.num_output_group;
ntree_limit *= model.learner_model_param_->num_output_group;
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
ntree_limit = static_cast<unsigned>(model.trees.size());
}
out_preds->resize(model.param.num_output_group *
out_preds->resize(model.learner_model_param_->num_output_group *
(model.param.size_leaf_vector + 1));
// loop over output groups
for (uint32_t gid = 0; gid < model.param.num_output_group; ++gid) {
for (uint32_t gid = 0; gid < model.learner_model_param_->num_output_group; ++gid) {
(*out_preds)[gid] =
PredValue(inst, model.trees, model.tree_info, gid,
&thread_temp[0], 0, ntree_limit) +
model.base_margin;
model.learner_model_param_->base_score;
}
}
void PredictLeaf(DMatrix* p_fmat, std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model, unsigned ntree_limit) override {
const int nthread = omp_get_max_threads();
InitThreadTemp(nthread, model.param.num_feature);
InitThreadTemp(nthread, model.learner_model_param_->num_feature);
const MetaInfo& info = p_fmat->Info();
// number of valid trees
ntree_limit *= model.param.num_output_group;
ntree_limit *= model.learner_model_param_->num_output_group;
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
ntree_limit = static_cast<unsigned>(model.trees.size());
}
@@ -268,20 +268,20 @@ class CPUPredictor : public Predictor {
bool approximate, int condition,
unsigned condition_feature) override {
const int nthread = omp_get_max_threads();
InitThreadTemp(nthread, model.param.num_feature);
InitThreadTemp(nthread, model.learner_model_param_->num_feature);
const MetaInfo& info = p_fmat->Info();
// number of valid trees
ntree_limit *= model.param.num_output_group;
ntree_limit *= model.learner_model_param_->num_output_group;
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
ntree_limit = static_cast<unsigned>(model.trees.size());
}
const int ngroup = model.param.num_output_group;
const int ngroup = model.learner_model_param_->num_output_group;
CHECK_NE(ngroup, 0);
size_t const ncolumns = model.param.num_feature + 1;
size_t const ncolumns = model.learner_model_param_->num_feature + 1;
CHECK_NE(ncolumns, 0);
// allocate space for (number of features + bias) times the number of rows
std::vector<bst_float>& contribs = *out_contribs;
contribs.resize(info.num_row_ * ncolumns * model.param.num_output_group);
contribs.resize(info.num_row_ * ncolumns * model.learner_model_param_->num_output_group);
// make sure contributions is zeroed, we could be reusing a previously
// allocated one
std::fill(contribs.begin(), contribs.end(), 0);
@@ -298,8 +298,8 @@ class CPUPredictor : public Predictor {
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) {
auto row_idx = static_cast<size_t>(batch.base_rowid + i);
std::vector<bst_float> this_tree_contribs(ncolumns);
RegTree::FVec& feats = thread_temp[omp_get_thread_num()];
std::vector<bst_float> this_tree_contribs(ncolumns);
// loop over all classes
for (int gid = 0; gid < ngroup; ++gid) {
bst_float* p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns];
@@ -326,7 +326,7 @@ class CPUPredictor : public Predictor {
if (base_margin.size() != 0) {
p_contribs[ncolumns - 1] += base_margin[row_idx * ngroup + gid];
} else {
p_contribs[ncolumns - 1] += model.base_margin;
p_contribs[ncolumns - 1] += model.learner_model_param_->base_score;
}
}
}
@@ -338,8 +338,8 @@ class CPUPredictor : public Predictor {
std::vector<bst_float>* tree_weights,
bool approximate) override {
const MetaInfo& info = p_fmat->Info();
const int ngroup = model.param.num_output_group;
size_t const ncolumns = model.param.num_feature;
const int ngroup = model.learner_model_param_->num_output_group;
size_t const ncolumns = model.learner_model_param_->num_feature;
const unsigned row_chunk = ngroup * (ncolumns + 1) * (ncolumns + 1);
const unsigned mrow_chunk = (ncolumns + 1) * (ncolumns + 1);
const unsigned crow_chunk = ngroup * (ncolumns + 1);

View File

@@ -217,7 +217,7 @@ class GPUPredictor : public xgboost::Predictor {
cudaMemcpyHostToDevice));
this->tree_begin_ = tree_begin;
this->tree_end_ = tree_end;
this->num_group_ = model.param.num_output_group;
this->num_group_ = model.learner_model_param_->num_output_group;
}
void PredictInternal(const SparsePage& batch,
@@ -286,9 +286,9 @@ class GPUPredictor : public xgboost::Predictor {
for (auto &batch : dmat->GetBatches<SparsePage>()) {
batch.offset.SetDevice(generic_param_->gpu_id);
batch.data.SetDevice(generic_param_->gpu_id);
PredictInternal(batch, model.param.num_feature,
PredictInternal(batch, model.learner_model_param_->num_feature,
out_preds, batch_offset);
batch_offset += batch.Size() * model.param.num_output_group;
batch_offset += batch.Size() * model.learner_model_param_->num_output_group;
}
monitor_.StopCuda("DevicePredictInternal");
@@ -317,7 +317,7 @@ class GPUPredictor : public xgboost::Predictor {
}
this->InitOutPredictions(dmat->Info(), out_preds, model);
int32_t tree_end = ntree_limit * model.param.num_output_group;
int32_t tree_end = ntree_limit * model.learner_model_param_->num_output_group;
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
tree_end = static_cast<unsigned>(model.trees.size());
@@ -347,7 +347,7 @@ class GPUPredictor : public xgboost::Predictor {
void InitOutPredictions(const MetaInfo& info,
HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model) const {
size_t n_classes = model.param.num_output_group;
size_t n_classes = model.learner_model_param_->num_output_group;
size_t n = n_classes * info.num_row_;
const HostDeviceVector<bst_float>& base_margin = info.base_margin_;
out_preds->SetDevice(generic_param_->gpu_id);
@@ -356,14 +356,14 @@ class GPUPredictor : public xgboost::Predictor {
CHECK_EQ(base_margin.Size(), n);
out_preds->Copy(base_margin);
} else {
out_preds->Fill(model.base_margin);
out_preds->Fill(model.learner_model_param_->base_score);
}
}
bool PredictFromCache(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, unsigned ntree_limit) {
if (ntree_limit == 0 ||
ntree_limit * model.param.num_output_group >= model.trees.size()) {
ntree_limit * model.learner_model_param_->num_output_group >= model.trees.size()) {
auto it = (*cache_).find(dmat);
if (it != cache_->cend()) {
const HostDeviceVector<bst_float>& y = it->second.predictions;
@@ -395,7 +395,7 @@ class GPUPredictor : public xgboost::Predictor {
this->InitOutPredictions(dmat->Info(), &predictions, model);
}
if (model.param.num_output_group == 1 && updaters->size() > 0 &&
if (model.learner_model_param_->num_output_group == 1 && updaters->size() > 0 &&
num_new_trees == 1 &&
updaters->back()->UpdatePredictionCache(e.data.get(), &predictions)) {
// do nothing

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2014 by Contributors
* Copyright 2014-2019 by Contributors
* \file param.h
* \brief training parameters, statistics used to support tree construction.
* \author Tianqi Chen

View File

@@ -39,7 +39,7 @@ struct GPUTrainingParam {
using NodeIdT = int32_t;
/** used to assign default id to a Node */
static const int kUnusedNode = -1;
static const bst_node_t kUnusedNode = -1;
/**
* @enum DefaultDirection node.cuh