Improve multi-threaded performance (#2104)
* Add UpdatePredictionCache() option to updaters Some updaters (e.g. fast_hist) has enough information to quickly compute prediction cache for the training data. Each updater may override UpdaterPredictionCache() method to update the prediction cache. Note: this trick does not apply to validation data. * Respond to code review * Disable some debug messages by default * Document UpdatePredictionCache() interface * Remove base_margin logic from UpdatePredictionCache() implementation * Do not take pointer to cfg, as reference may get stale * Improve multi-threaded performance * Use columnwise accessor to accelerate ApplySplit() step, with support for a compressed representation * Parallel sort for evaluation step * Inline BuildHist() function * Cache gradient pairs when building histograms in BuildHist() * Add missing #if macro * Respond to code review * Use wrapper to enable parallel sort on Linux * Fix C++ compatibility issues * MSVC doesn't support unsigned in OpenMP loops * gcc 4.6 doesn't support using keyword * Fix lint issues * Respond to code review * Fix bug in ApplySplitSparseData() * Attempting to read beyond the end of a sparse column * Mishandling the case where an entire range of rows have missing values * Fix training continuation bug Disable UpdatePredictionCache() in the first iteration. This way, we can accomodate the scenario where we build off of an existing (nonempty) ensemble. * Add regression test for fast_hist * Respond to code review * Add back old version of ApplySplitSparseData
This commit is contained in:
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
#include <xgboost/logging.h>
|
||||
#include <xgboost/learner.h>
|
||||
#include <dmlc/timer.h>
|
||||
#include <dmlc/io.h>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
@@ -83,6 +84,8 @@ struct LearnerTrainParam
|
||||
// number of threads to use if OpenMP is enabled
|
||||
// if equals 0, use system default
|
||||
int nthread;
|
||||
// flag to print out detailed breakdown of runtime
|
||||
int debug_verbose;
|
||||
// declare parameters
|
||||
DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
|
||||
DMLC_DECLARE_FIELD(seed).set_default(0)
|
||||
@@ -109,6 +112,10 @@ struct LearnerTrainParam
|
||||
.describe("maximum row per batch.");
|
||||
DMLC_DECLARE_FIELD(nthread).set_default(0)
|
||||
.describe("Number of threads to use.");
|
||||
DMLC_DECLARE_FIELD(debug_verbose)
|
||||
.set_lower_bound(0)
|
||||
.set_default(0)
|
||||
.describe("flag to print out detailed breakdown of runtime");
|
||||
}
|
||||
};
|
||||
|
||||
@@ -170,28 +177,9 @@ class LearnerImpl : public Learner {
|
||||
|
||||
if (tparam.tree_method == 3) {
|
||||
/* histogram-based algorithm */
|
||||
if (cfg_.count("updater") == 0) {
|
||||
LOG(CONSOLE) << "Tree method is selected to be \'hist\', "
|
||||
<< "which uses histogram aggregation for faster training. "
|
||||
<< "Using default sequence of updaters: grow_fast_histmaker,prune";
|
||||
cfg_["updater"] = "grow_fast_histmaker,prune";
|
||||
} else {
|
||||
const std::string first_str = "grow_fast_histmaker";
|
||||
if (first_str.length() <= cfg_["updater"].length()
|
||||
&& std::equal(first_str.begin(), first_str.end(), cfg_["updater"].begin())) {
|
||||
// updater sequence starts with "grow_fast_histmaker"
|
||||
LOG(CONSOLE) << "Tree method is selected to be \'hist\', "
|
||||
<< "which uses histogram aggregation for faster training. "
|
||||
<< "Using custom sequence of updaters: " << cfg_["updater"];
|
||||
} else {
|
||||
// updater sequence does not start with "grow_fast_histmaker"
|
||||
LOG(CONSOLE) << "Tree method is selected to be \'hist\', but the given "
|
||||
<< "sequence of updaters is not compatible; "
|
||||
<< "grow_fast_histmaker must run first. "
|
||||
<< "Using default sequence of updaters: grow_fast_histmaker,prune";
|
||||
cfg_["updater"] = "grow_fast_histmaker,prune";
|
||||
}
|
||||
}
|
||||
LOG(CONSOLE) << "Tree method is selected to be \'hist\', which uses a single updater "
|
||||
<< "grow_fast_histmaker.";
|
||||
cfg_["updater"] = "grow_fast_histmaker";
|
||||
} else if (cfg_.count("updater") == 0) {
|
||||
if (tparam.dsplit == 1) {
|
||||
cfg_["updater"] = "distcol";
|
||||
@@ -333,6 +321,7 @@ class LearnerImpl : public Learner {
|
||||
std::string EvalOneIter(int iter,
|
||||
const std::vector<DMatrix*>& data_sets,
|
||||
const std::vector<std::string>& data_names) override {
|
||||
double tstart = dmlc::GetTime();
|
||||
std::ostringstream os;
|
||||
os << '[' << iter << ']'
|
||||
<< std::setiosflags(std::ios::fixed);
|
||||
@@ -347,6 +336,10 @@ class LearnerImpl : public Learner {
|
||||
<< ev->Eval(preds_, data_sets[i]->info(), tparam.dsplit == 2);
|
||||
}
|
||||
}
|
||||
|
||||
if (tparam.debug_verbose > 0) {
|
||||
LOG(INFO) << "EvalOneIter(): " << dmlc::GetTime() - tstart << " sec";
|
||||
}
|
||||
return os.str();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user