Improved gpu_hist_experimental algorithm (#2866)

- Implement colsampling, subsampling for gpu_hist_experimental

 - Optimised multi-GPU implementation for gpu_hist_experimental

 - Make nccl optional

 - Add Volta architecture flag

 - Optimise RegLossObj

 - Add timing utilities for debug verbose mode

 - Bump required cuda version to 8.0
This commit is contained in:
Rory Mitchell
2017-11-11 13:58:40 +13:00
committed by GitHub
parent 16c63f30d0
commit 40c6e2f0c8
14 changed files with 855 additions and 473 deletions

View File

@@ -18,6 +18,7 @@
#include "./common/common.h"
#include "./common/io.h"
#include "./common/random.h"
#include "common/timer.h"
namespace xgboost {
// implementation of base learner.
@@ -202,6 +203,7 @@ class LearnerImpl : public Learner {
const std::vector<std::pair<std::string, std::string> >& args) override {
// add to configurations
tparam.InitAllowUnknown(args);
monitor.Init("Learner", tparam.debug_verbose);
cfg_.clear();
for (const auto& kv : args) {
if (kv.first == "eval_metric") {
@@ -359,29 +361,37 @@ class LearnerImpl : public Learner {
}
void UpdateOneIter(int iter, DMatrix* train) override {
monitor.Start("UpdateOneIter");
CHECK(ModelInitialized())
<< "Always call InitModel or LoadModel before update";
if (tparam.seed_per_iteration || rabit::IsDistributed()) {
common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter);
}
this->LazyInitDMatrix(train);
monitor.Start("PredictRaw");
this->PredictRaw(train, &preds_);
monitor.Stop("PredictRaw");
monitor.Start("GetGradient");
obj_->GetGradient(preds_, train->info(), iter, &gpair_);
monitor.Stop("GetGradient");
gbm_->DoBoost(train, &gpair_, obj_.get());
monitor.Stop("UpdateOneIter");
}
void BoostOneIter(int iter, DMatrix* train,
std::vector<bst_gpair>* in_gpair) override {
monitor.Start("BoostOneIter");
if (tparam.seed_per_iteration || rabit::IsDistributed()) {
common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter);
}
this->LazyInitDMatrix(train);
gbm_->DoBoost(train, in_gpair);
monitor.Stop("BoostOneIter");
}
std::string EvalOneIter(int iter, const std::vector<DMatrix*>& data_sets,
const std::vector<std::string>& data_names) override {
double tstart = dmlc::GetTime();
monitor.Start("EvalOneIter");
std::ostringstream os;
os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
if (metrics_.size() == 0) {
@@ -396,9 +406,7 @@ class LearnerImpl : public Learner {
}
}
if (tparam.debug_verbose > 0) {
LOG(INFO) << "EvalOneIter(): " << dmlc::GetTime() - tstart << " sec";
}
monitor.Stop("EvalOneIter");
return os.str();
}
@@ -460,10 +468,11 @@ class LearnerImpl : public Learner {
// if not, initialize the column access.
inline void LazyInitDMatrix(DMatrix* p_train) {
if (tparam.tree_method == 3 || tparam.tree_method == 4 ||
tparam.tree_method == 5) {
tparam.tree_method == 5 || tparam.tree_method == 6) {
return;
}
monitor.Start("LazyInitDMatrix");
if (!p_train->HaveColAccess()) {
int ncol = static_cast<int>(p_train->info().num_col);
std::vector<bool> enabled(ncol, true);
@@ -504,6 +513,7 @@ class LearnerImpl : public Learner {
gbm_->Configure(cfg_.begin(), cfg_.end());
}
}
monitor.Stop("LazyInitDMatrix");
}
// return whether model is already initialized.
@@ -568,6 +578,8 @@ class LearnerImpl : public Learner {
static const int kRandSeedMagic = 127;
// internal cached dmatrix
std::vector<std::shared_ptr<DMatrix> > cache_;
common::Monitor monitor;
};
Learner* Learner::Create(