[core] fix slow predict-caching with many classes (#3109)

* fix prediction caching inefficiency for multiclass

* silence some warnings

* redundant if

* workaround for R v3.4.3 bug; fixes #3081
This commit is contained in:
Vadim Khotilovich 2018-02-15 18:31:42 -06:00 committed by GitHub
parent cf19caa46a
commit 9ffe8596f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 31 additions and 32 deletions

0
R-package/configure.win Normal file
View File

View File

@ -281,9 +281,7 @@ class GBTree : public GradientBooster {
} }
monitor.Stop("BoostNewTrees"); monitor.Stop("BoostNewTrees");
monitor.Start("CommitModel"); monitor.Start("CommitModel");
for (int gid = 0; gid < ngroup; ++gid) { this->CommitModel(std::move(new_trees));
this->CommitModel(std::move(new_trees[gid]), gid);
}
monitor.Stop("CommitModel"); monitor.Stop("CommitModel");
} }
@ -338,11 +336,13 @@ class GBTree : public GradientBooster {
// commit new trees all at once // commit new trees all at once
virtual void virtual void
CommitModel(std::vector<std::unique_ptr<RegTree> >&& new_trees, CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
int bst_group) { int num_new_trees = 0;
model_.CommitModel(std::move(new_trees), bst_group); for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
num_new_trees += new_trees[gid].size();
predictor->UpdatePredictionCache(model_, &updaters, new_trees.size()); model_.CommitModel(std::move(new_trees[gid]), gid);
}
predictor->UpdatePredictionCache(model_, &updaters, num_new_trees);
} }
// --- data structure --- // --- data structure ---
@ -514,20 +514,22 @@ class Dart : public GBTree {
} }
} }
} }
// commit new trees all at once // commit new trees all at once
void CommitModel(std::vector<std::unique_ptr<RegTree> >&& new_trees, void
int bst_group) override { CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) override {
for (size_t i = 0; i < new_trees.size(); ++i) { int num_new_trees = 0;
model_.trees.push_back(std::move(new_trees[i])); for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
model_.tree_info.push_back(bst_group); num_new_trees += new_trees[gid].size();
model_.CommitModel(std::move(new_trees[gid]), gid);
} }
model_.param.num_trees += static_cast<int>(new_trees.size()); size_t num_drop = NormalizeTrees(num_new_trees);
size_t num_drop = NormalizeTrees(new_trees.size());
if (dparam.silent != 1) { if (dparam.silent != 1) {
LOG(INFO) << "drop " << num_drop << " trees, " LOG(INFO) << "drop " << num_drop << " trees, "
<< "weight = " << weight_drop.back(); << "weight = " << weight_drop.back();
} }
} }
// predict the leaf scores without dropped trees // predict the leaf scores without dropped trees
inline bst_float PredValue(const RowBatch::Inst &inst, inline bst_float PredValue(const RowBatch::Inst &inst,
int bst_group, int bst_group,
@ -550,16 +552,17 @@ class Dart : public GBTree {
return psum; return psum;
} }
// select dropped trees // select which trees to drop
inline void DropTrees(unsigned ntree_limit_drop) { inline void DropTrees(unsigned ntree_limit_drop) {
idx_drop.clear();
if (ntree_limit_drop > 0) return;
std::uniform_real_distribution<> runif(0.0, 1.0); std::uniform_real_distribution<> runif(0.0, 1.0);
auto& rnd = common::GlobalRandom(); auto& rnd = common::GlobalRandom();
// reset
idx_drop.clear();
// sample dropped trees
bool skip = false; bool skip = false;
if (dparam.skip_drop > 0.0) skip = (runif(rnd) < dparam.skip_drop); if (dparam.skip_drop > 0.0) skip = (runif(rnd) < dparam.skip_drop);
if (ntree_limit_drop == 0 && !skip) { // sample some trees to drop
if (!skip) {
if (dparam.sample_type == 1) { if (dparam.sample_type == 1) {
bst_float sum_weight = 0.0; bst_float sum_weight = 0.0;
for (size_t i = 0; i < weight_drop.size(); ++i) { for (size_t i = 0; i < weight_drop.size(); ++i) {
@ -594,6 +597,7 @@ class Dart : public GBTree {
} }
} }
} }
// set normalization factors // set normalization factors
inline size_t NormalizeTrees(size_t size_new_trees) { inline size_t NormalizeTrees(size_t size_new_trees) {
float lr = 1.0 * dparam.learning_rate / size_new_trees; float lr = 1.0 * dparam.learning_rate / size_new_trees;

View File

@ -56,7 +56,7 @@ class RegLossObj : public ObjFunction {
int nthread = omp_get_max_threads(); int nthread = omp_get_max_threads();
// Use a maximum of 8 threads // Use a maximum of 8 threads
#pragma omp parallel for schedule(static) num_threads(std::min(8, nthread)) #pragma omp parallel for schedule(static) num_threads(std::min(8, nthread))
for (int i = 0; i < n - remainder; i += 8) { for (omp_ulong i = 0; i < n - remainder; i += 8) {
avx::Float8 y(&info.labels[i]); avx::Float8 y(&info.labels[i]);
avx::Float8 p = Loss::PredTransform(avx::Float8(&preds[i])); avx::Float8 p = Loss::PredTransform(avx::Float8(&preds[i]));
avx::Float8 w = info.weights.empty() ? avx::Float8(1.0f) avx::Float8 w = info.weights.empty() ? avx::Float8(1.0f)

View File

@ -100,13 +100,9 @@ class CPUPredictor : public Predictor {
const gbm::GBTreeModel& model, int tree_begin, const gbm::GBTreeModel& model, int tree_begin,
unsigned ntree_limit) { unsigned ntree_limit) {
// TODO(Rory): Check if this specialisation actually improves performance // TODO(Rory): Check if this specialisation actually improves performance
if (model.param.num_output_group == 1) {
PredLoopSpecalize(dmat, out_preds, model, 1, tree_begin, ntree_limit);
} else {
PredLoopSpecalize(dmat, out_preds, model, model.param.num_output_group, PredLoopSpecalize(dmat, out_preds, model, model.param.num_output_group,
tree_begin, ntree_limit); tree_begin, ntree_limit);
} }
}
public: public:
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds, void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
@ -132,8 +128,7 @@ class CPUPredictor : public Predictor {
this->PredLoopInternal(dmat, out_preds, model, tree_begin, ntree_limit); this->PredLoopInternal(dmat, out_preds, model, tree_begin, ntree_limit);
} }
void UpdatePredictionCache( void UpdatePredictionCache(const gbm::GBTreeModel& model,
const gbm::GBTreeModel& model,
std::vector<std::unique_ptr<TreeUpdater>>* updaters, std::vector<std::unique_ptr<TreeUpdater>>* updaters,
int num_new_trees) override { int num_new_trees) override {
int old_ntree = model.trees.size() - num_new_trees; int old_ntree = model.trees.size() - num_new_trees;