From 9ffe8596f293d337818e4b9fbf2c752e0e598272 Mon Sep 17 00:00:00 2001 From: Vadim Khotilovich Date: Thu, 15 Feb 2018 18:31:42 -0600 Subject: [PATCH] [core] fix slow predict-caching with many classes (#3109) * fix prediction caching inefficiency for multiclass * silence some warnings * redundant if * workaround for R v3.4.3 bug; fixes #3081 --- R-package/configure.win | 0 cmake/modules/FindLibR.cmake | 2 +- src/gbm/gbtree.cc | 44 ++++++++++++++++++--------------- src/objective/regression_obj.cc | 2 +- src/predictor/cpu_predictor.cc | 15 ++++------- 5 files changed, 31 insertions(+), 32 deletions(-) create mode 100644 R-package/configure.win diff --git a/R-package/configure.win b/R-package/configure.win new file mode 100644 index 000000000..e69de29bb diff --git a/cmake/modules/FindLibR.cmake b/cmake/modules/FindLibR.cmake index db2b89a10..97e39ba6c 100644 --- a/cmake/modules/FindLibR.cmake +++ b/cmake/modules/FindLibR.cmake @@ -117,7 +117,7 @@ else() # ask R for R_HOME if(LIBR_EXECUTABLE) execute_process( - COMMAND ${LIBR_EXECUTABLE} "--slave" "--no-save" "-e" "cat(normalizePath(R.home(), winslash='/'))" + COMMAND ${LIBR_EXECUTABLE} "--slave" "--no-save" "-e" "cat(normalizePath(R.home(),winslash='/'))" OUTPUT_VARIABLE LIBR_HOME) endif() # if R executable not available, query R_HOME path from registry diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc index 1a509d90d..7bbc57c1b 100644 --- a/src/gbm/gbtree.cc +++ b/src/gbm/gbtree.cc @@ -281,9 +281,7 @@ class GBTree : public GradientBooster { } monitor.Stop("BoostNewTrees"); monitor.Start("CommitModel"); - for (int gid = 0; gid < ngroup; ++gid) { - this->CommitModel(std::move(new_trees[gid]), gid); - } + this->CommitModel(std::move(new_trees)); monitor.Stop("CommitModel"); } @@ -338,11 +336,13 @@ class GBTree : public GradientBooster { // commit new trees all at once virtual void - CommitModel(std::vector >&& new_trees, - int bst_group) { - model_.CommitModel(std::move(new_trees), bst_group); - - predictor->UpdatePredictionCache(model_, &updaters, new_trees.size()); + CommitModel(std::vector>>&& new_trees) { + int num_new_trees = 0; + for (int gid = 0; gid < model_.param.num_output_group; ++gid) { + num_new_trees += new_trees[gid].size(); + model_.CommitModel(std::move(new_trees[gid]), gid); + } + predictor->UpdatePredictionCache(model_, &updaters, num_new_trees); } // --- data structure --- @@ -514,20 +514,22 @@ class Dart : public GBTree { } } } + // commit new trees all at once - void CommitModel(std::vector >&& new_trees, - int bst_group) override { - for (size_t i = 0; i < new_trees.size(); ++i) { - model_.trees.push_back(std::move(new_trees[i])); - model_.tree_info.push_back(bst_group); + void + CommitModel(std::vector>>&& new_trees) override { + int num_new_trees = 0; + for (int gid = 0; gid < model_.param.num_output_group; ++gid) { + num_new_trees += new_trees[gid].size(); + model_.CommitModel(std::move(new_trees[gid]), gid); } - model_.param.num_trees += static_cast(new_trees.size()); - size_t num_drop = NormalizeTrees(new_trees.size()); + size_t num_drop = NormalizeTrees(num_new_trees); if (dparam.silent != 1) { LOG(INFO) << "drop " << num_drop << " trees, " << "weight = " << weight_drop.back(); } } + // predict the leaf scores without dropped trees inline bst_float PredValue(const RowBatch::Inst &inst, int bst_group, @@ -550,16 +552,17 @@ class Dart : public GBTree { return psum; } - // select dropped trees + // select which trees to drop inline void DropTrees(unsigned ntree_limit_drop) { + idx_drop.clear(); + if (ntree_limit_drop > 0) return; + std::uniform_real_distribution<> runif(0.0, 1.0); auto& rnd = common::GlobalRandom(); - // reset - idx_drop.clear(); - // sample dropped trees bool skip = false; if (dparam.skip_drop > 0.0) skip = (runif(rnd) < dparam.skip_drop); - if (ntree_limit_drop == 0 && !skip) { + // sample some trees to drop + if (!skip) { if (dparam.sample_type == 1) { bst_float sum_weight = 0.0; for (size_t i = 0; i < weight_drop.size(); ++i) { @@ -594,6 +597,7 @@ class Dart : public GBTree { } } } + // set normalization factors inline size_t NormalizeTrees(size_t size_new_trees) { float lr = 1.0 * dparam.learning_rate / size_new_trees; diff --git a/src/objective/regression_obj.cc b/src/objective/regression_obj.cc index b3f74c8fb..9fb0cc981 100644 --- a/src/objective/regression_obj.cc +++ b/src/objective/regression_obj.cc @@ -56,7 +56,7 @@ class RegLossObj : public ObjFunction { int nthread = omp_get_max_threads(); // Use a maximum of 8 threads #pragma omp parallel for schedule(static) num_threads(std::min(8, nthread)) - for (int i = 0; i < n - remainder; i += 8) { + for (omp_ulong i = 0; i < n - remainder; i += 8) { avx::Float8 y(&info.labels[i]); avx::Float8 p = Loss::PredTransform(avx::Float8(&preds[i])); avx::Float8 w = info.weights.empty() ? avx::Float8(1.0f) diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 989a847a0..04bfd9f7d 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -100,12 +100,8 @@ class CPUPredictor : public Predictor { const gbm::GBTreeModel& model, int tree_begin, unsigned ntree_limit) { // TODO(Rory): Check if this specialisation actually improves performance - if (model.param.num_output_group == 1) { - PredLoopSpecalize(dmat, out_preds, model, 1, tree_begin, ntree_limit); - } else { - PredLoopSpecalize(dmat, out_preds, model, model.param.num_output_group, - tree_begin, ntree_limit); - } + PredLoopSpecalize(dmat, out_preds, model, model.param.num_output_group, + tree_begin, ntree_limit); } public: @@ -132,10 +128,9 @@ class CPUPredictor : public Predictor { this->PredLoopInternal(dmat, out_preds, model, tree_begin, ntree_limit); } - void UpdatePredictionCache( - const gbm::GBTreeModel& model, - std::vector>* updaters, - int num_new_trees) override { + void UpdatePredictionCache(const gbm::GBTreeModel& model, + std::vector>* updaters, + int num_new_trees) override { int old_ntree = model.trees.size() - num_new_trees; // update cache entry for (auto& kv : cache_) {