[core] fix slow predict-caching with many classes (#3109)
* fix prediction caching inefficiency for multiclass * silence some warnings * redundant if * workaround for R v3.4.3 bug; fixes #3081
This commit is contained in:
parent
cf19caa46a
commit
9ffe8596f2
0
R-package/configure.win
Normal file
0
R-package/configure.win
Normal file
@ -281,9 +281,7 @@ class GBTree : public GradientBooster {
|
|||||||
}
|
}
|
||||||
monitor.Stop("BoostNewTrees");
|
monitor.Stop("BoostNewTrees");
|
||||||
monitor.Start("CommitModel");
|
monitor.Start("CommitModel");
|
||||||
for (int gid = 0; gid < ngroup; ++gid) {
|
this->CommitModel(std::move(new_trees));
|
||||||
this->CommitModel(std::move(new_trees[gid]), gid);
|
|
||||||
}
|
|
||||||
monitor.Stop("CommitModel");
|
monitor.Stop("CommitModel");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -338,11 +336,13 @@ class GBTree : public GradientBooster {
|
|||||||
|
|
||||||
// commit new trees all at once
|
// commit new trees all at once
|
||||||
virtual void
|
virtual void
|
||||||
CommitModel(std::vector<std::unique_ptr<RegTree> >&& new_trees,
|
CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
|
||||||
int bst_group) {
|
int num_new_trees = 0;
|
||||||
model_.CommitModel(std::move(new_trees), bst_group);
|
for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
|
||||||
|
num_new_trees += new_trees[gid].size();
|
||||||
predictor->UpdatePredictionCache(model_, &updaters, new_trees.size());
|
model_.CommitModel(std::move(new_trees[gid]), gid);
|
||||||
|
}
|
||||||
|
predictor->UpdatePredictionCache(model_, &updaters, num_new_trees);
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- data structure ---
|
// --- data structure ---
|
||||||
@ -514,20 +514,22 @@ class Dart : public GBTree {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// commit new trees all at once
|
// commit new trees all at once
|
||||||
void CommitModel(std::vector<std::unique_ptr<RegTree> >&& new_trees,
|
void
|
||||||
int bst_group) override {
|
CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) override {
|
||||||
for (size_t i = 0; i < new_trees.size(); ++i) {
|
int num_new_trees = 0;
|
||||||
model_.trees.push_back(std::move(new_trees[i]));
|
for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
|
||||||
model_.tree_info.push_back(bst_group);
|
num_new_trees += new_trees[gid].size();
|
||||||
|
model_.CommitModel(std::move(new_trees[gid]), gid);
|
||||||
}
|
}
|
||||||
model_.param.num_trees += static_cast<int>(new_trees.size());
|
size_t num_drop = NormalizeTrees(num_new_trees);
|
||||||
size_t num_drop = NormalizeTrees(new_trees.size());
|
|
||||||
if (dparam.silent != 1) {
|
if (dparam.silent != 1) {
|
||||||
LOG(INFO) << "drop " << num_drop << " trees, "
|
LOG(INFO) << "drop " << num_drop << " trees, "
|
||||||
<< "weight = " << weight_drop.back();
|
<< "weight = " << weight_drop.back();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// predict the leaf scores without dropped trees
|
// predict the leaf scores without dropped trees
|
||||||
inline bst_float PredValue(const RowBatch::Inst &inst,
|
inline bst_float PredValue(const RowBatch::Inst &inst,
|
||||||
int bst_group,
|
int bst_group,
|
||||||
@ -550,16 +552,17 @@ class Dart : public GBTree {
|
|||||||
return psum;
|
return psum;
|
||||||
}
|
}
|
||||||
|
|
||||||
// select dropped trees
|
// select which trees to drop
|
||||||
inline void DropTrees(unsigned ntree_limit_drop) {
|
inline void DropTrees(unsigned ntree_limit_drop) {
|
||||||
|
idx_drop.clear();
|
||||||
|
if (ntree_limit_drop > 0) return;
|
||||||
|
|
||||||
std::uniform_real_distribution<> runif(0.0, 1.0);
|
std::uniform_real_distribution<> runif(0.0, 1.0);
|
||||||
auto& rnd = common::GlobalRandom();
|
auto& rnd = common::GlobalRandom();
|
||||||
// reset
|
|
||||||
idx_drop.clear();
|
|
||||||
// sample dropped trees
|
|
||||||
bool skip = false;
|
bool skip = false;
|
||||||
if (dparam.skip_drop > 0.0) skip = (runif(rnd) < dparam.skip_drop);
|
if (dparam.skip_drop > 0.0) skip = (runif(rnd) < dparam.skip_drop);
|
||||||
if (ntree_limit_drop == 0 && !skip) {
|
// sample some trees to drop
|
||||||
|
if (!skip) {
|
||||||
if (dparam.sample_type == 1) {
|
if (dparam.sample_type == 1) {
|
||||||
bst_float sum_weight = 0.0;
|
bst_float sum_weight = 0.0;
|
||||||
for (size_t i = 0; i < weight_drop.size(); ++i) {
|
for (size_t i = 0; i < weight_drop.size(); ++i) {
|
||||||
@ -594,6 +597,7 @@ class Dart : public GBTree {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// set normalization factors
|
// set normalization factors
|
||||||
inline size_t NormalizeTrees(size_t size_new_trees) {
|
inline size_t NormalizeTrees(size_t size_new_trees) {
|
||||||
float lr = 1.0 * dparam.learning_rate / size_new_trees;
|
float lr = 1.0 * dparam.learning_rate / size_new_trees;
|
||||||
|
|||||||
@ -56,7 +56,7 @@ class RegLossObj : public ObjFunction {
|
|||||||
int nthread = omp_get_max_threads();
|
int nthread = omp_get_max_threads();
|
||||||
// Use a maximum of 8 threads
|
// Use a maximum of 8 threads
|
||||||
#pragma omp parallel for schedule(static) num_threads(std::min(8, nthread))
|
#pragma omp parallel for schedule(static) num_threads(std::min(8, nthread))
|
||||||
for (int i = 0; i < n - remainder; i += 8) {
|
for (omp_ulong i = 0; i < n - remainder; i += 8) {
|
||||||
avx::Float8 y(&info.labels[i]);
|
avx::Float8 y(&info.labels[i]);
|
||||||
avx::Float8 p = Loss::PredTransform(avx::Float8(&preds[i]));
|
avx::Float8 p = Loss::PredTransform(avx::Float8(&preds[i]));
|
||||||
avx::Float8 w = info.weights.empty() ? avx::Float8(1.0f)
|
avx::Float8 w = info.weights.empty() ? avx::Float8(1.0f)
|
||||||
|
|||||||
@ -100,13 +100,9 @@ class CPUPredictor : public Predictor {
|
|||||||
const gbm::GBTreeModel& model, int tree_begin,
|
const gbm::GBTreeModel& model, int tree_begin,
|
||||||
unsigned ntree_limit) {
|
unsigned ntree_limit) {
|
||||||
// TODO(Rory): Check if this specialisation actually improves performance
|
// TODO(Rory): Check if this specialisation actually improves performance
|
||||||
if (model.param.num_output_group == 1) {
|
|
||||||
PredLoopSpecalize(dmat, out_preds, model, 1, tree_begin, ntree_limit);
|
|
||||||
} else {
|
|
||||||
PredLoopSpecalize(dmat, out_preds, model, model.param.num_output_group,
|
PredLoopSpecalize(dmat, out_preds, model, model.param.num_output_group,
|
||||||
tree_begin, ntree_limit);
|
tree_begin, ntree_limit);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
|
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
|
||||||
@ -132,8 +128,7 @@ class CPUPredictor : public Predictor {
|
|||||||
this->PredLoopInternal(dmat, out_preds, model, tree_begin, ntree_limit);
|
this->PredLoopInternal(dmat, out_preds, model, tree_begin, ntree_limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
void UpdatePredictionCache(
|
void UpdatePredictionCache(const gbm::GBTreeModel& model,
|
||||||
const gbm::GBTreeModel& model,
|
|
||||||
std::vector<std::unique_ptr<TreeUpdater>>* updaters,
|
std::vector<std::unique_ptr<TreeUpdater>>* updaters,
|
||||||
int num_new_trees) override {
|
int num_new_trees) override {
|
||||||
int old_ntree = model.trees.size() - num_new_trees;
|
int old_ntree = model.trees.size() - num_new_trees;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user