Improve multi-threaded performance (#2104)
* Add UpdatePredictionCache() option to updaters Some updaters (e.g. fast_hist) has enough information to quickly compute prediction cache for the training data. Each updater may override UpdaterPredictionCache() method to update the prediction cache. Note: this trick does not apply to validation data. * Respond to code review * Disable some debug messages by default * Document UpdatePredictionCache() interface * Remove base_margin logic from UpdatePredictionCache() implementation * Do not take pointer to cfg, as reference may get stale * Improve multi-threaded performance * Use columnwise accessor to accelerate ApplySplit() step, with support for a compressed representation * Parallel sort for evaluation step * Inline BuildHist() function * Cache gradient pairs when building histograms in BuildHist() * Add missing #if macro * Respond to code review * Use wrapper to enable parallel sort on Linux * Fix C++ compatibility issues * MSVC doesn't support unsigned in OpenMP loops * gcc 4.6 doesn't support using keyword * Fix lint issues * Respond to code review * Fix bug in ApplySplitSparseData() * Attempting to read beyond the end of a sparse column * Mishandling the case where an entire range of rows have missing values * Fix training continuation bug Disable UpdatePredictionCache() in the first iteration. This way, we can accomodate the scenario where we build off of an existing (nonempty) ensemble. * Add regression test for fast_hist * Respond to code review * Add back old version of ApplySplitSparseData
This commit is contained in:
@@ -44,6 +44,8 @@ struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
|
||||
std::string updater_seq;
|
||||
/*! \brief type of boosting process to run */
|
||||
int process_type;
|
||||
// flag to print out detailed breakdown of runtime
|
||||
int debug_verbose;
|
||||
// declare parameters
|
||||
DMLC_DECLARE_PARAMETER(GBTreeTrainParam) {
|
||||
DMLC_DECLARE_FIELD(num_parallel_tree)
|
||||
@@ -60,6 +62,10 @@ struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
|
||||
.add_enum("update", kUpdate)
|
||||
.describe("Whether to run the normal boosting process that creates new trees,"\
|
||||
" or to update the trees in an existing model.");
|
||||
DMLC_DECLARE_FIELD(debug_verbose)
|
||||
.set_lower_bound(0)
|
||||
.set_default(0)
|
||||
.describe("flag to print out detailed breakdown of runtime");
|
||||
// add alias
|
||||
DMLC_DECLARE_ALIAS(updater_seq, updater);
|
||||
}
|
||||
@@ -260,9 +266,13 @@ class GBTree : public GradientBooster {
|
||||
new_trees.push_back(std::move(ret));
|
||||
}
|
||||
}
|
||||
double tstart = dmlc::GetTime();
|
||||
for (int gid = 0; gid < mparam.num_output_group; ++gid) {
|
||||
this->CommitModel(std::move(new_trees[gid]), gid);
|
||||
}
|
||||
if (tparam.debug_verbose > 0) {
|
||||
LOG(INFO) << "CommitModel(): " << dmlc::GetTime() - tstart << " sec";
|
||||
}
|
||||
}
|
||||
|
||||
void Predict(DMatrix* p_fmat,
|
||||
@@ -474,14 +484,20 @@ class GBTree : public GradientBooster {
|
||||
// update cache entry
|
||||
for (auto &kv : cache_) {
|
||||
CacheEntry& e = kv.second;
|
||||
|
||||
if (e.predictions.size() == 0) {
|
||||
PredLoopInternal<GBTree>(
|
||||
e.data.get(), &(e.predictions),
|
||||
0, trees.size(), true);
|
||||
} else {
|
||||
PredLoopInternal<GBTree>(
|
||||
e.data.get(), &(e.predictions),
|
||||
old_ntree, trees.size(), false);
|
||||
if (mparam.num_output_group == 1 && updaters.size() > 0 && new_trees.size() == 1
|
||||
&& updaters.back()->UpdatePredictionCache(e.data.get(), &(e.predictions)) ) {
|
||||
{} // do nothing
|
||||
} else {
|
||||
PredLoopInternal<GBTree>(
|
||||
e.data.get(), &(e.predictions),
|
||||
old_ntree, trees.size(), false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user