Clang-tidy static analysis (#3222)

* Clang-tidy static analysis

* Modernise checks

* Google coding standard checks

* Identifier renaming according to Google style
This commit is contained in:
Rory Mitchell
2018-04-19 18:57:13 +12:00
committed by GitHub
parent 3242b0a378
commit ccf80703ef
97 changed files with 3407 additions and 3354 deletions

View File

@@ -52,9 +52,9 @@ class GBLinear : public GradientBooster {
explicit GBLinear(const std::vector<std::shared_ptr<DMatrix> > &cache,
bst_float base_margin)
: base_margin_(base_margin),
sum_instance_weight(0),
sum_weight_complete(false),
is_converged(false) {
sum_instance_weight_(0),
sum_weight_complete_(false),
is_converged_(false) {
// Add matrices to the prediction cache
for (auto &d : cache) {
PredictionCacheEntry e;
@@ -63,46 +63,46 @@ class GBLinear : public GradientBooster {
}
}
void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
if (model.weight.size() == 0) {
model.param.InitAllowUnknown(cfg);
if (model_.weight.size() == 0) {
model_.param.InitAllowUnknown(cfg);
}
param.InitAllowUnknown(cfg);
updater.reset(LinearUpdater::Create(param.updater));
updater->Init(cfg);
monitor.Init("GBLinear ", param.debug_verbose);
param_.InitAllowUnknown(cfg);
updater_.reset(LinearUpdater::Create(param_.updater));
updater_->Init(cfg);
monitor_.Init("GBLinear ", param_.debug_verbose);
}
void Load(dmlc::Stream* fi) override {
model.Load(fi);
model_.Load(fi);
}
void Save(dmlc::Stream* fo) const override {
model.Save(fo);
model_.Save(fo);
}
void DoBoost(DMatrix *p_fmat,
HostDeviceVector<bst_gpair> *in_gpair,
HostDeviceVector<GradientPair> *in_gpair,
ObjFunction* obj) override {
monitor.Start("DoBoost");
monitor_.Start("DoBoost");
if (!p_fmat->HaveColAccess(false)) {
std::vector<bool> enabled(p_fmat->info().num_col, true);
p_fmat->InitColAccess(enabled, 1.0f, param.max_row_perbatch, false);
std::vector<bool> enabled(p_fmat->Info().num_col_, true);
p_fmat->InitColAccess(enabled, 1.0f, param_.max_row_perbatch, false);
}
model.LazyInitModel();
model_.LazyInitModel();
this->LazySumWeights(p_fmat);
if (!this->CheckConvergence()) {
updater->Update(&in_gpair->data_h(), p_fmat, &model, sum_instance_weight);
updater_->Update(&in_gpair->HostVector(), p_fmat, &model_, sum_instance_weight_);
}
this->UpdatePredictionCache();
monitor.Stop("DoBoost");
monitor_.Stop("DoBoost");
}
void PredictBatch(DMatrix *p_fmat,
HostDeviceVector<bst_float> *out_preds,
unsigned ntree_limit) override {
monitor.Start("PredictBatch");
monitor_.Start("PredictBatch");
CHECK_EQ(ntree_limit, 0U)
<< "GBLinear::Predict ntrees is only valid for gbtree predictor";
@@ -110,19 +110,19 @@ class GBLinear : public GradientBooster {
auto it = cache_.find(p_fmat);
if (it != cache_.end() && it->second.predictions.size() != 0) {
std::vector<bst_float> &y = it->second.predictions;
out_preds->resize(y.size());
std::copy(y.begin(), y.end(), out_preds->data_h().begin());
out_preds->Resize(y.size());
std::copy(y.begin(), y.end(), out_preds->HostVector().begin());
} else {
this->PredictBatchInternal(p_fmat, &out_preds->data_h());
this->PredictBatchInternal(p_fmat, &out_preds->HostVector());
}
monitor.Stop("PredictBatch");
monitor_.Stop("PredictBatch");
}
// add base margin
void PredictInstance(const SparseBatch::Inst &inst,
std::vector<bst_float> *out_preds,
unsigned ntree_limit,
unsigned root_index) override {
const int ngroup = model.param.num_output_group;
const int ngroup = model_.param.num_output_group;
for (int gid = 0; gid < ngroup; ++gid) {
this->Pred(inst, dmlc::BeginPtr(*out_preds), gid, base_margin_);
}
@@ -138,15 +138,15 @@ class GBLinear : public GradientBooster {
std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate, int condition = 0,
unsigned condition_feature = 0) override {
model.LazyInitModel();
model_.LazyInitModel();
CHECK_EQ(ntree_limit, 0U)
<< "GBLinear::PredictContribution: ntrees is only valid for gbtree predictor";
const std::vector<bst_float>& base_margin = p_fmat->info().base_margin;
const int ngroup = model.param.num_output_group;
const size_t ncolumns = model.param.num_feature + 1;
const std::vector<bst_float>& base_margin = p_fmat->Info().base_margin_;
const int ngroup = model_.param.num_output_group;
const size_t ncolumns = model_.param.num_feature + 1;
// allocate space for (#features + bias) times #groups times #rows
std::vector<bst_float>& contribs = *out_contribs;
contribs.resize(p_fmat->info().num_row * ncolumns * ngroup);
contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup);
// make sure contributions is zeroed, we could be reusing a previously allocated one
std::fill(contribs.begin(), contribs.end(), 0);
// start collecting the contributions
@@ -155,21 +155,21 @@ class GBLinear : public GradientBooster {
while (iter->Next()) {
const RowBatch& batch = iter->Value();
// parallel over local batch
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
const auto nsize = static_cast<bst_omp_uint>(batch.size);
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) {
const RowBatch::Inst &inst = batch[i];
size_t row_idx = static_cast<size_t>(batch.base_rowid + i);
auto row_idx = static_cast<size_t>(batch.base_rowid + i);
// loop over output groups
for (int gid = 0; gid < ngroup; ++gid) {
bst_float *p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns];
// calculate linear terms' contributions
for (bst_uint c = 0; c < inst.length; ++c) {
if (inst[c].index >= model.param.num_feature) continue;
p_contribs[inst[c].index] = inst[c].fvalue * model[inst[c].index][gid];
if (inst[c].index >= model_.param.num_feature) continue;
p_contribs[inst[c].index] = inst[c].fvalue * model_[inst[c].index][gid];
}
// add base margin to BIAS
p_contribs[ncolumns - 1] = model.bias()[gid] +
p_contribs[ncolumns - 1] = model_.bias()[gid] +
((base_margin.size() != 0) ? base_margin[row_idx * ngroup + gid] : base_margin_);
}
}
@@ -182,34 +182,34 @@ class GBLinear : public GradientBooster {
std::vector<bst_float>& contribs = *out_contribs;
// linear models have no interaction effects
const size_t nelements = model.param.num_feature*model.param.num_feature;
contribs.resize(p_fmat->info().num_row * nelements * model.param.num_output_group);
const size_t nelements = model_.param.num_feature*model_.param.num_feature;
contribs.resize(p_fmat->Info().num_row_ * nelements * model_.param.num_output_group);
std::fill(contribs.begin(), contribs.end(), 0);
}
std::vector<std::string> DumpModel(const FeatureMap& fmap,
bool with_stats,
std::string format) const override {
return model.DumpModel(fmap, with_stats, format);
return model_.DumpModel(fmap, with_stats, format);
}
protected:
void PredictBatchInternal(DMatrix *p_fmat,
std::vector<bst_float> *out_preds) {
monitor.Start("PredictBatchInternal");
model.LazyInitModel();
monitor_.Start("PredictBatchInternal");
model_.LazyInitModel();
std::vector<bst_float> &preds = *out_preds;
const std::vector<bst_float>& base_margin = p_fmat->info().base_margin;
const std::vector<bst_float>& base_margin = p_fmat->Info().base_margin_;
// start collecting the prediction
dmlc::DataIter<RowBatch> *iter = p_fmat->RowIterator();
const int ngroup = model.param.num_output_group;
preds.resize(p_fmat->info().num_row * ngroup);
const int ngroup = model_.param.num_output_group;
preds.resize(p_fmat->Info().num_row_ * ngroup);
while (iter->Next()) {
const RowBatch &batch = iter->Value();
// output convention: nrow * k, where nrow is number of rows
// k is number of group
// parallel over local batch
const omp_ulong nsize = static_cast<omp_ulong>(batch.size);
const auto nsize = static_cast<omp_ulong>(batch.size);
#pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < nsize; ++i) {
const size_t ridx = batch.base_rowid + i;
@@ -221,14 +221,14 @@ class GBLinear : public GradientBooster {
}
}
}
monitor.Stop("PredictBatchInternal");
monitor_.Stop("PredictBatchInternal");
}
void UpdatePredictionCache() {
// update cache entry
for (auto &kv : cache_) {
PredictionCacheEntry &e = kv.second;
if (e.predictions.size() == 0) {
size_t n = model.param.num_output_group * e.data->info().num_row;
size_t n = model_.param.num_output_group * e.data->Info().num_row_;
e.predictions.resize(n);
}
this->PredictBatchInternal(e.data.get(), &e.predictions);
@@ -236,53 +236,53 @@ class GBLinear : public GradientBooster {
}
bool CheckConvergence() {
if (param.tolerance == 0.0f) return false;
if (is_converged) return true;
if (previous_model.weight.size() != model.weight.size()) {
previous_model = model;
if (param_.tolerance == 0.0f) return false;
if (is_converged_) return true;
if (previous_model_.weight.size() != model_.weight.size()) {
previous_model_ = model_;
return false;
}
float largest_dw = 0.0;
for (size_t i = 0; i < model.weight.size(); i++) {
for (size_t i = 0; i < model_.weight.size(); i++) {
largest_dw = std::max(
largest_dw, std::abs(model.weight[i] - previous_model.weight[i]));
largest_dw, std::abs(model_.weight[i] - previous_model_.weight[i]));
}
previous_model = model;
previous_model_ = model_;
is_converged = largest_dw <= param.tolerance;
return is_converged;
is_converged_ = largest_dw <= param_.tolerance;
return is_converged_;
}
void LazySumWeights(DMatrix *p_fmat) {
if (!sum_weight_complete) {
auto &info = p_fmat->info();
for (size_t i = 0; i < info.num_row; i++) {
sum_instance_weight += info.GetWeight(i);
if (!sum_weight_complete_) {
auto &info = p_fmat->Info();
for (size_t i = 0; i < info.num_row_; i++) {
sum_instance_weight_ += info.GetWeight(i);
}
sum_weight_complete = true;
sum_weight_complete_ = true;
}
}
inline void Pred(const RowBatch::Inst &inst, bst_float *preds, int gid,
bst_float base) {
bst_float psum = model.bias()[gid] + base;
bst_float psum = model_.bias()[gid] + base;
for (bst_uint i = 0; i < inst.length; ++i) {
if (inst[i].index >= model.param.num_feature) continue;
psum += inst[i].fvalue * model[inst[i].index][gid];
if (inst[i].index >= model_.param.num_feature) continue;
psum += inst[i].fvalue * model_[inst[i].index][gid];
}
preds[gid] = psum;
}
// biase margin score
bst_float base_margin_;
// model field
GBLinearModel model;
GBLinearModel previous_model;
GBLinearTrainParam param;
std::unique_ptr<LinearUpdater> updater;
double sum_instance_weight;
bool sum_weight_complete;
common::Monitor monitor;
bool is_converged;
GBLinearModel model_;
GBLinearModel previous_model_;
GBLinearTrainParam param_;
std::unique_ptr<LinearUpdater> updater_;
double sum_instance_weight_;
bool sum_weight_complete_;
common::Monitor monitor_;
bool is_converged_;
/**
* \struct PredictionCacheEntry

View File

@@ -40,7 +40,7 @@ class GBLinearModel {
// weight for each of feature, bias is the last one
std::vector<bst_float> weight;
// initialize the model parameter
inline void LazyInitModel(void) {
inline void LazyInitModel() {
if (!weight.empty()) return;
// bias is the last weight
weight.resize((param.num_feature + 1) * param.num_output_group);

View File

@@ -143,32 +143,32 @@ class GBTree : public GradientBooster {
}
void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
this->cfg = cfg;
this->cfg_ = cfg;
model_.Configure(cfg);
// initialize the updaters only when needed.
std::string updater_seq = tparam.updater_seq;
tparam.InitAllowUnknown(cfg);
if (updater_seq != tparam.updater_seq) updaters.clear();
for (const auto& up : updaters) {
std::string updater_seq = tparam_.updater_seq;
tparam_.InitAllowUnknown(cfg);
if (updater_seq != tparam_.updater_seq) updaters_.clear();
for (const auto& up : updaters_) {
up->Init(cfg);
}
// for the 'update' process_type, move trees into trees_to_update
if (tparam.process_type == kUpdate) {
if (tparam_.process_type == kUpdate) {
model_.InitTreesToUpdate();
}
// configure predictor
predictor = std::unique_ptr<Predictor>(Predictor::Create(tparam.predictor));
predictor->Init(cfg, cache_);
monitor.Init("GBTree", tparam.debug_verbose);
predictor_ = std::unique_ptr<Predictor>(Predictor::Create(tparam_.predictor));
predictor_->Init(cfg, cache_);
monitor_.Init("GBTree", tparam_.debug_verbose);
}
void Load(dmlc::Stream* fi) override {
model_.Load(fi);
this->cfg.clear();
this->cfg.push_back(std::make_pair(std::string("num_feature"),
common::ToString(model_.param.num_feature)));
this->cfg_.clear();
this->cfg_.emplace_back(std::string("num_feature"),
common::ToString(model_.param.num_feature));
}
void Save(dmlc::Stream* fo) const override {
@@ -177,29 +177,29 @@ class GBTree : public GradientBooster {
bool AllowLazyCheckPoint() const override {
return model_.param.num_output_group == 1 ||
tparam.updater_seq.find("distcol") != std::string::npos;
tparam_.updater_seq.find("distcol") != std::string::npos;
}
void DoBoost(DMatrix* p_fmat,
HostDeviceVector<bst_gpair>* in_gpair,
HostDeviceVector<GradientPair>* in_gpair,
ObjFunction* obj) override {
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
const int ngroup = model_.param.num_output_group;
monitor.Start("BoostNewTrees");
monitor_.Start("BoostNewTrees");
if (ngroup == 1) {
std::vector<std::unique_ptr<RegTree> > ret;
BoostNewTrees(in_gpair, p_fmat, 0, &ret);
new_trees.push_back(std::move(ret));
} else {
CHECK_EQ(in_gpair->size() % ngroup, 0U)
CHECK_EQ(in_gpair->Size() % ngroup, 0U)
<< "must have exactly ngroup*nrow gpairs";
// TODO(canonizer): perform this on GPU if HostDeviceVector has device set.
HostDeviceVector<bst_gpair> tmp(in_gpair->size() / ngroup,
bst_gpair(), in_gpair->device());
std::vector<bst_gpair>& gpair_h = in_gpair->data_h();
bst_omp_uint nsize = static_cast<bst_omp_uint>(tmp.size());
HostDeviceVector<GradientPair> tmp(in_gpair->Size() / ngroup,
GradientPair(), in_gpair->DeviceIdx());
std::vector<GradientPair>& gpair_h = in_gpair->HostVector();
auto nsize = static_cast<bst_omp_uint>(tmp.Size());
for (int gid = 0; gid < ngroup; ++gid) {
std::vector<bst_gpair>& tmp_h = tmp.data_h();
std::vector<GradientPair>& tmp_h = tmp.HostVector();
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) {
tmp_h[i] = gpair_h[i * ngroup + gid];
@@ -209,43 +209,43 @@ class GBTree : public GradientBooster {
new_trees.push_back(std::move(ret));
}
}
monitor.Stop("BoostNewTrees");
monitor.Start("CommitModel");
monitor_.Stop("BoostNewTrees");
monitor_.Start("CommitModel");
this->CommitModel(std::move(new_trees));
monitor.Stop("CommitModel");
monitor_.Stop("CommitModel");
}
void PredictBatch(DMatrix* p_fmat,
HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit) override {
predictor->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
}
void PredictInstance(const SparseBatch::Inst& inst,
std::vector<bst_float>* out_preds,
unsigned ntree_limit,
unsigned root_index) override {
predictor->PredictInstance(inst, out_preds, model_,
predictor_->PredictInstance(inst, out_preds, model_,
ntree_limit, root_index);
}
void PredictLeaf(DMatrix* p_fmat,
std::vector<bst_float>* out_preds,
unsigned ntree_limit) override {
predictor->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
}
void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate, int condition,
unsigned condition_feature) override {
predictor->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
}
void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
predictor->PredictInteractionContributions(p_fmat, out_contribs, model_,
predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
ntree_limit, approximate);
}
@@ -258,18 +258,18 @@ class GBTree : public GradientBooster {
protected:
// initialize updater before using them
inline void InitUpdater() {
if (updaters.size() != 0) return;
std::string tval = tparam.updater_seq;
if (updaters_.size() != 0) return;
std::string tval = tparam_.updater_seq;
std::vector<std::string> ups = common::Split(tval, ',');
for (const std::string& pstr : ups) {
std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str()));
up->Init(this->cfg);
updaters.push_back(std::move(up));
up->Init(this->cfg_);
updaters_.push_back(std::move(up));
}
}
// do group specific group
inline void BoostNewTrees(HostDeviceVector<bst_gpair>* gpair,
inline void BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
DMatrix *p_fmat,
int bst_group,
std::vector<std::unique_ptr<RegTree> >* ret) {
@@ -277,26 +277,27 @@ class GBTree : public GradientBooster {
std::vector<RegTree*> new_trees;
ret->clear();
// create the trees
for (int i = 0; i < tparam.num_parallel_tree; ++i) {
if (tparam.process_type == kDefault) {
for (int i = 0; i < tparam_.num_parallel_tree; ++i) {
if (tparam_.process_type == kDefault) {
// create new tree
std::unique_ptr<RegTree> ptr(new RegTree());
ptr->param.InitAllowUnknown(this->cfg);
ptr->param.InitAllowUnknown(this->cfg_);
ptr->InitModel();
new_trees.push_back(ptr.get());
ret->push_back(std::move(ptr));
} else if (tparam.process_type == kUpdate) {
} else if (tparam_.process_type == kUpdate) {
CHECK_LT(model_.trees.size(), model_.trees_to_update.size());
// move an existing tree from trees_to_update
auto t = std::move(model_.trees_to_update[model_.trees.size() +
bst_group * tparam.num_parallel_tree + i]);
bst_group * tparam_.num_parallel_tree + i]);
new_trees.push_back(t.get());
ret->push_back(std::move(t));
}
}
// update the trees
for (auto& up : updaters)
for (auto& up : updaters_) {
up->Update(gpair, p_fmat, new_trees);
}
}
// commit new trees all at once
@@ -307,22 +308,22 @@ class GBTree : public GradientBooster {
num_new_trees += new_trees[gid].size();
model_.CommitModel(std::move(new_trees[gid]), gid);
}
predictor->UpdatePredictionCache(model_, &updaters, num_new_trees);
predictor_->UpdatePredictionCache(model_, &updaters_, num_new_trees);
}
// --- data structure ---
GBTreeModel model_;
// training parameter
GBTreeTrainParam tparam;
GBTreeTrainParam tparam_;
// ----training fields----
// configurations for tree
std::vector<std::pair<std::string, std::string> > cfg;
std::vector<std::pair<std::string, std::string> > cfg_;
// the updaters that can be applied to each of tree
std::vector<std::unique_ptr<TreeUpdater>> updaters;
std::vector<std::unique_ptr<TreeUpdater>> updaters_;
// Cached matrices
std::vector<std::shared_ptr<DMatrix>> cache_;
std::unique_ptr<Predictor> predictor;
common::Monitor monitor;
std::unique_ptr<Predictor> predictor_;
common::Monitor monitor_;
};
// dart
@@ -333,22 +334,22 @@ class Dart : public GBTree {
void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
GBTree::Configure(cfg);
if (model_.trees.size() == 0) {
dparam.InitAllowUnknown(cfg);
dparam_.InitAllowUnknown(cfg);
}
}
void Load(dmlc::Stream* fi) override {
GBTree::Load(fi);
weight_drop.resize(model_.param.num_trees);
weight_drop_.resize(model_.param.num_trees);
if (model_.param.num_trees != 0) {
fi->Read(&weight_drop);
fi->Read(&weight_drop_);
}
}
void Save(dmlc::Stream* fo) const override {
GBTree::Save(fo);
if (weight_drop.size() != 0) {
fo->Write(weight_drop);
if (weight_drop_.size() != 0) {
fo->Write(weight_drop_);
}
}
@@ -357,7 +358,7 @@ class Dart : public GBTree {
HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit) override {
DropTrees(ntree_limit);
PredLoopInternal<Dart>(p_fmat, &out_preds->data_h(), 0, ntree_limit, true);
PredLoopInternal<Dart>(p_fmat, &out_preds->HostVector(), 0, ntree_limit, true);
}
void PredictInstance(const SparseBatch::Inst& inst,
@@ -365,9 +366,9 @@ class Dart : public GBTree {
unsigned ntree_limit,
unsigned root_index) override {
DropTrees(1);
if (thread_temp.size() == 0) {
thread_temp.resize(1, RegTree::FVec());
thread_temp[0].Init(model_.param.num_feature);
if (thread_temp_.size() == 0) {
thread_temp_.resize(1, RegTree::FVec());
thread_temp_[0].Init(model_.param.num_feature);
}
out_preds->resize(model_.param.num_output_group);
ntree_limit *= model_.param.num_output_group;
@@ -378,7 +379,7 @@ class Dart : public GBTree {
for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
(*out_preds)[gid]
= PredValue(inst, gid, root_index,
&thread_temp[0], 0, ntree_limit) + model_.base_margin;
&thread_temp_[0], 0, ntree_limit) + model_.base_margin;
}
}
@@ -400,8 +401,8 @@ class Dart : public GBTree {
}
if (init_out_preds) {
size_t n = num_group * p_fmat->info().num_row;
const std::vector<bst_float>& base_margin = p_fmat->info().base_margin;
size_t n = num_group * p_fmat->Info().num_row_;
const std::vector<bst_float>& base_margin = p_fmat->Info().base_margin_;
out_preds->resize(n);
if (base_margin.size() != 0) {
CHECK_EQ(out_preds->size(), n);
@@ -427,37 +428,37 @@ class Dart : public GBTree {
int num_group,
unsigned tree_begin,
unsigned tree_end) {
const MetaInfo& info = p_fmat->info();
const MetaInfo& info = p_fmat->Info();
const int nthread = omp_get_max_threads();
CHECK_EQ(num_group, model_.param.num_output_group);
InitThreadTemp(nthread);
std::vector<bst_float>& preds = *out_preds;
CHECK_EQ(model_.param.size_leaf_vector, 0)
<< "size_leaf_vector is enforced to 0 so far";
CHECK_EQ(preds.size(), p_fmat->info().num_row * num_group);
CHECK_EQ(preds.size(), p_fmat->Info().num_row_ * num_group);
// start collecting the prediction
dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
Derived* self = static_cast<Derived*>(this);
auto* self = static_cast<Derived*>(this);
iter->BeforeFirst();
while (iter->Next()) {
const RowBatch &batch = iter->Value();
// parallel over local batch
const int K = 8;
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
const bst_omp_uint rest = nsize % K;
constexpr int kUnroll = 8;
const auto nsize = static_cast<bst_omp_uint>(batch.size);
const bst_omp_uint rest = nsize % kUnroll;
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize - rest; i += K) {
for (bst_omp_uint i = 0; i < nsize - rest; i += kUnroll) {
const int tid = omp_get_thread_num();
RegTree::FVec& feats = thread_temp[tid];
int64_t ridx[K];
RowBatch::Inst inst[K];
for (int k = 0; k < K; ++k) {
RegTree::FVec& feats = thread_temp_[tid];
int64_t ridx[kUnroll];
RowBatch::Inst inst[kUnroll];
for (int k = 0; k < kUnroll; ++k) {
ridx[k] = static_cast<int64_t>(batch.base_rowid + i + k);
}
for (int k = 0; k < K; ++k) {
for (int k = 0; k < kUnroll; ++k) {
inst[k] = batch[i + k];
}
for (int k = 0; k < K; ++k) {
for (int k = 0; k < kUnroll; ++k) {
for (int gid = 0; gid < num_group; ++gid) {
const size_t offset = ridx[k] * num_group + gid;
preds[offset] +=
@@ -467,8 +468,8 @@ class Dart : public GBTree {
}
}
for (bst_omp_uint i = nsize - rest; i < nsize; ++i) {
RegTree::FVec& feats = thread_temp[0];
const int64_t ridx = static_cast<int64_t>(batch.base_rowid + i);
RegTree::FVec& feats = thread_temp_[0];
const auto ridx = static_cast<int64_t>(batch.base_rowid + i);
const RowBatch::Inst inst = batch[i];
for (int gid = 0; gid < num_group; ++gid) {
const size_t offset = ridx * num_group + gid;
@@ -489,9 +490,9 @@ class Dart : public GBTree {
model_.CommitModel(std::move(new_trees[gid]), gid);
}
size_t num_drop = NormalizeTrees(num_new_trees);
if (dparam.silent != 1) {
if (dparam_.silent != 1) {
LOG(INFO) << "drop " << num_drop << " trees, "
<< "weight = " << weight_drop.back();
<< "weight = " << weight_drop_.back();
}
}
@@ -506,10 +507,10 @@ class Dart : public GBTree {
p_feats->Fill(inst);
for (size_t i = tree_begin; i < tree_end; ++i) {
if (model_.tree_info[i] == bst_group) {
bool drop = (std::binary_search(idx_drop.begin(), idx_drop.end(), i));
bool drop = (std::binary_search(idx_drop_.begin(), idx_drop_.end(), i));
if (!drop) {
int tid = model_.trees[i]->GetLeafIndex(*p_feats, root_index);
psum += weight_drop[i] * (*model_.trees[i])[tid].leaf_value();
psum += weight_drop_[i] * (*model_.trees[i])[tid].LeafValue();
}
}
}
@@ -519,45 +520,45 @@ class Dart : public GBTree {
// select which trees to drop
inline void DropTrees(unsigned ntree_limit_drop) {
idx_drop.clear();
idx_drop_.clear();
if (ntree_limit_drop > 0) return;
std::uniform_real_distribution<> runif(0.0, 1.0);
auto& rnd = common::GlobalRandom();
bool skip = false;
if (dparam.skip_drop > 0.0) skip = (runif(rnd) < dparam.skip_drop);
if (dparam_.skip_drop > 0.0) skip = (runif(rnd) < dparam_.skip_drop);
// sample some trees to drop
if (!skip) {
if (dparam.sample_type == 1) {
if (dparam_.sample_type == 1) {
bst_float sum_weight = 0.0;
for (size_t i = 0; i < weight_drop.size(); ++i) {
sum_weight += weight_drop[i];
for (auto elem : weight_drop_) {
sum_weight += elem;
}
for (size_t i = 0; i < weight_drop.size(); ++i) {
if (runif(rnd) < dparam.rate_drop * weight_drop.size() * weight_drop[i] / sum_weight) {
idx_drop.push_back(i);
for (size_t i = 0; i < weight_drop_.size(); ++i) {
if (runif(rnd) < dparam_.rate_drop * weight_drop_.size() * weight_drop_[i] / sum_weight) {
idx_drop_.push_back(i);
}
}
if (dparam.one_drop && idx_drop.empty() && !weight_drop.empty()) {
if (dparam_.one_drop && idx_drop_.empty() && !weight_drop_.empty()) {
// the expression below is an ugly but MSVC2013-friendly equivalent of
// size_t i = std::discrete_distribution<size_t>(weight_drop.begin(),
// weight_drop.end())(rnd);
size_t i = std::discrete_distribution<size_t>(
weight_drop.size(), 0., static_cast<double>(weight_drop.size()),
weight_drop_.size(), 0., static_cast<double>(weight_drop_.size()),
[this](double x) -> double {
return weight_drop[static_cast<size_t>(x)];
return weight_drop_[static_cast<size_t>(x)];
})(rnd);
idx_drop.push_back(i);
idx_drop_.push_back(i);
}
} else {
for (size_t i = 0; i < weight_drop.size(); ++i) {
if (runif(rnd) < dparam.rate_drop) {
idx_drop.push_back(i);
for (size_t i = 0; i < weight_drop_.size(); ++i) {
if (runif(rnd) < dparam_.rate_drop) {
idx_drop_.push_back(i);
}
}
if (dparam.one_drop && idx_drop.empty() && !weight_drop.empty()) {
size_t i = std::uniform_int_distribution<size_t>(0, weight_drop.size() - 1)(rnd);
idx_drop.push_back(i);
if (dparam_.one_drop && idx_drop_.empty() && !weight_drop_.empty()) {
size_t i = std::uniform_int_distribution<size_t>(0, weight_drop_.size() - 1)(rnd);
idx_drop_.push_back(i);
}
}
}
@@ -565,58 +566,58 @@ class Dart : public GBTree {
// set normalization factors
inline size_t NormalizeTrees(size_t size_new_trees) {
float lr = 1.0 * dparam.learning_rate / size_new_trees;
size_t num_drop = idx_drop.size();
float lr = 1.0 * dparam_.learning_rate / size_new_trees;
size_t num_drop = idx_drop_.size();
if (num_drop == 0) {
for (size_t i = 0; i < size_new_trees; ++i) {
weight_drop.push_back(1.0);
weight_drop_.push_back(1.0);
}
} else {
if (dparam.normalize_type == 1) {
if (dparam_.normalize_type == 1) {
// normalize_type 1
float factor = 1.0 / (1.0 + lr);
for (size_t i = 0; i < idx_drop.size(); ++i) {
weight_drop[idx_drop[i]] *= factor;
for (auto i : idx_drop_) {
weight_drop_[i] *= factor;
}
for (size_t i = 0; i < size_new_trees; ++i) {
weight_drop.push_back(factor);
weight_drop_.push_back(factor);
}
} else {
// normalize_type 0
float factor = 1.0 * num_drop / (num_drop + lr);
for (size_t i = 0; i < idx_drop.size(); ++i) {
weight_drop[idx_drop[i]] *= factor;
for (auto i : idx_drop_) {
weight_drop_[i] *= factor;
}
for (size_t i = 0; i < size_new_trees; ++i) {
weight_drop.push_back(1.0 / (num_drop + lr));
weight_drop_.push_back(1.0 / (num_drop + lr));
}
}
}
// reset
idx_drop.clear();
idx_drop_.clear();
return num_drop;
}
// init thread buffers
inline void InitThreadTemp(int nthread) {
int prev_thread_temp_size = thread_temp.size();
int prev_thread_temp_size = thread_temp_.size();
if (prev_thread_temp_size < nthread) {
thread_temp.resize(nthread, RegTree::FVec());
thread_temp_.resize(nthread, RegTree::FVec());
for (int i = prev_thread_temp_size; i < nthread; ++i) {
thread_temp[i].Init(model_.param.num_feature);
thread_temp_[i].Init(model_.param.num_feature);
}
}
}
// --- data structure ---
// training parameter
DartTrainParam dparam;
DartTrainParam dparam_;
/*! \brief prediction buffer */
std::vector<bst_float> weight_drop;
std::vector<bst_float> weight_drop_;
// indexes of dropped trees
std::vector<size_t> idx_drop;
std::vector<size_t> idx_drop_;
// temporal storage for per thread
std::vector<RegTree::FVec> thread_temp;
std::vector<RegTree::FVec> thread_temp_;
};
// register the objective functions
@@ -627,7 +628,7 @@ DMLC_REGISTER_PARAMETER(DartTrainParam);
XGBOOST_REGISTER_GBM(GBTree, "gbtree")
.describe("Tree booster, gradient boosted trees.")
.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats, bst_float base_margin) {
GBTree* p = new GBTree(base_margin);
auto* p = new GBTree(base_margin);
p->InitCache(cached_mats);
return p;
});

View File

@@ -70,8 +70,8 @@ struct GBTreeModel {
void InitTreesToUpdate() {
if (trees_to_update.size() == 0u) {
for (size_t i = 0; i < trees.size(); ++i) {
trees_to_update.push_back(std::move(trees[i]));
for (auto & tree : trees) {
trees_to_update.push_back(std::move(tree));
}
trees.clear();
param.num_trees = 0;
@@ -100,8 +100,8 @@ struct GBTreeModel {
void Save(dmlc::Stream* fo) const {
CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
fo->Write(&param, sizeof(param));
for (size_t i = 0; i < trees.size(); ++i) {
trees[i]->Save(fo);
for (const auto & tree : trees) {
tree->Save(fo);
}
if (tree_info.size() != 0) {
fo->Write(dmlc::BeginPtr(tree_info), sizeof(int) * tree_info.size());
@@ -111,15 +111,15 @@ struct GBTreeModel {
std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
std::string format) const {
std::vector<std::string> dump;
for (size_t i = 0; i < trees.size(); i++) {
dump.push_back(trees[i]->DumpModel(fmap, with_stats, format));
for (const auto & tree : trees) {
dump.push_back(tree->DumpModel(fmap, with_stats, format));
}
return dump;
}
void CommitModel(std::vector<std::unique_ptr<RegTree> >&& new_trees,
int bst_group) {
for (size_t i = 0; i < new_trees.size(); ++i) {
trees.push_back(std::move(new_trees[i]));
for (auto & new_tree : new_trees) {
trees.push_back(std::move(new_tree));
tree_info.push_back(bst_group);
}
param.num_trees += static_cast<int>(new_trees.size());