Remove update prediction cache from predictors. (#5312)

Move this function into gbtree, and uses only updater for doing so. As now the predictor knows exactly how many trees to predict, there's no need for it to update the prediction cache.
This commit is contained in:
Jiaming Yuan
2020-02-17 11:35:47 +08:00
committed by GitHub
parent e433a379e4
commit 0110754a76
16 changed files with 118 additions and 164 deletions

View File

@@ -199,7 +199,7 @@ class CutsBuilder {
}
void AddCutPoint(WQSketch::SummaryContainer const& summary, int max_bin) {
int required_cuts = std::min(static_cast<int>(summary.size), max_bin);
size_t required_cuts = std::min(summary.size, static_cast<size_t>(max_bin));
for (size_t i = 1; i < required_cuts; ++i) {
bst_float cpt = summary.data[i].value;
if (i == 1 || cpt > p_cuts_->cut_values_.back()) {

View File

@@ -181,8 +181,7 @@ class Transform {
* \param func A callable object, accepting a size_t thread index,
* followed by a set of Span classes.
* \param range Range object specifying parallel threads index range.
* \param devices GPUSet specifying GPUs to use, when compiling for CPU,
* this should be GPUSet::Empty().
* \param device Specify GPU to use.
* \param shard Whether Shard for HostDeviceVector is needed.
*/
template <typename Functor>

View File

@@ -296,8 +296,15 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
num_new_trees += new_trees[gid].size();
model_.CommitModel(std::move(new_trees[gid]), gid);
}
CHECK(configured_);
GetPredictor()->UpdatePredictionCache(model_, &updaters_, num_new_trees, m, predts);
auto* out = &predts->predictions;
if (model_.learner_model_param_->num_output_group == 1 &&
updaters_.size() > 0 &&
num_new_trees == 1 &&
out->Size() > 0 &&
updaters_.back()->UpdatePredictionCache(m, out)) {
auto delta = num_new_trees / model_.learner_model_param_->num_output_group;
predts->Update(delta);
}
monitor_.Stop("CommitModel");
}
@@ -357,6 +364,76 @@ void GBTree::SaveModel(Json* p_out) const {
model_.SaveModel(&model);
}
void GBTree::PredictBatch(DMatrix* p_fmat,
PredictionCacheEntry* out_preds,
bool training,
unsigned ntree_limit) {
CHECK(configured_);
GetPredictor(&out_preds->predictions, p_fmat)
->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
}
std::unique_ptr<Predictor> const &
GBTree::GetPredictor(HostDeviceVector<float> const *out_pred,
DMatrix *f_dmat) const {
CHECK(configured_);
if (tparam_.predictor != PredictorType::kAuto) {
if (tparam_.predictor == PredictorType::kGPUPredictor) {
#if defined(XGBOOST_USE_CUDA)
CHECK(gpu_predictor_);
return gpu_predictor_;
#else
this->AssertGPUSupport();
#endif // defined(XGBOOST_USE_CUDA)
}
CHECK(cpu_predictor_);
return cpu_predictor_;
}
auto on_device =
f_dmat &&
(*(f_dmat->GetBatches<SparsePage>().begin())).data.DeviceCanRead();
// Use GPU Predictor if data is already on device.
if (on_device) {
#if defined(XGBOOST_USE_CUDA)
CHECK(gpu_predictor_);
return gpu_predictor_;
#else
LOG(FATAL) << "Data is on CUDA device, but XGBoost is not compiled with "
"CUDA support.";
return cpu_predictor_;
#endif // defined(XGBOOST_USE_CUDA)
}
// GPU_Hist by default has prediction cache calculated from quantile values,
// so GPU Predictor is not used for training dataset. But when XGBoost
// performs continue training with an existing model, the prediction cache is
// not availbale and number of trees doesn't equal zero, the whole training
// dataset got copied into GPU for precise prediction. This condition tries
// to avoid such copy by calling CPU Predictor instead.
if ((out_pred && out_pred->Size() == 0) && (model_.param.num_trees != 0) &&
// FIXME(trivialfis): Implement a better method for testing whether data
// is on device after DMatrix refactoring is done.
!on_device) {
CHECK(cpu_predictor_);
return cpu_predictor_;
}
if (tparam_.tree_method == TreeMethod::kGPUHist) {
#if defined(XGBOOST_USE_CUDA)
CHECK(gpu_predictor_);
return gpu_predictor_;
#else
this->AssertGPUSupport();
return cpu_predictor_;
#endif // defined(XGBOOST_USE_CUDA)
}
CHECK(cpu_predictor_);
return cpu_predictor_;
}
class Dart : public GBTree {
public:
explicit Dart(LearnerModelParam const* booster_config) :

View File

@@ -201,11 +201,7 @@ class GBTree : public GradientBooster {
void PredictBatch(DMatrix* p_fmat,
PredictionCacheEntry* out_preds,
bool training,
unsigned ntree_limit) override {
CHECK(configured_);
GetPredictor(&out_preds->predictions, p_fmat)->PredictBatch(
p_fmat, out_preds, model_, 0, ntree_limit);
}
unsigned ntree_limit) override;
void PredictInstance(const SparsePage::Inst& inst,
std::vector<bst_float>* out_preds,
@@ -256,62 +252,7 @@ class GBTree : public GradientBooster {
std::vector<std::unique_ptr<RegTree> >* ret);
std::unique_ptr<Predictor> const& GetPredictor(HostDeviceVector<float> const* out_pred = nullptr,
DMatrix* f_dmat = nullptr) const {
CHECK(configured_);
if (tparam_.predictor != PredictorType::kAuto) {
if (tparam_.predictor == PredictorType::kGPUPredictor) {
#if defined(XGBOOST_USE_CUDA)
CHECK(gpu_predictor_);
return gpu_predictor_;
#else
this->AssertGPUSupport();
#endif // defined(XGBOOST_USE_CUDA)
}
CHECK(cpu_predictor_);
return cpu_predictor_;
}
auto on_device = f_dmat && (*(f_dmat->GetBatches<SparsePage>().begin())).data.DeviceCanRead();
// Use GPU Predictor if data is already on device.
if (on_device) {
#if defined(XGBOOST_USE_CUDA)
CHECK(gpu_predictor_);
return gpu_predictor_;
#else
LOG(FATAL) << "Data is on CUDA device, but XGBoost is not compiled with CUDA support.";
return cpu_predictor_;
#endif // defined(XGBOOST_USE_CUDA)
}
// GPU_Hist by default has prediction cache calculated from quantile values, so GPU
// Predictor is not used for training dataset. But when XGBoost performs continue
// training with an existing model, the prediction cache is not availbale and number
// of trees doesn't equal zero, the whole training dataset got copied into GPU for
// precise prediction. This condition tries to avoid such copy by calling CPU
// Predictor instead.
if ((out_pred && out_pred->Size() == 0) &&
(model_.param.num_trees != 0) &&
// FIXME(trivialfis): Implement a better method for testing whether data is on
// device after DMatrix refactoring is done.
!on_device) {
CHECK(cpu_predictor_);
return cpu_predictor_;
}
if (tparam_.tree_method == TreeMethod::kGPUHist) {
#if defined(XGBOOST_USE_CUDA)
CHECK(gpu_predictor_);
return gpu_predictor_;
#else
this->AssertGPUSupport();
return cpu_predictor_;
#endif // defined(XGBOOST_USE_CUDA)
}
CHECK(cpu_predictor_);
return cpu_predictor_;
}
DMatrix* f_dmat = nullptr) const;
// commit new trees all at once
virtual void CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees,

View File

@@ -146,8 +146,12 @@ class CPUPredictor : public Predictor {
CHECK_EQ(tree_begin, 0);
auto* out_preds = &predts->predictions;
CHECK_GE(predts->version, tree_begin);
if (out_preds->Size() == 0 && dmat->Info().num_row_ != 0) {
CHECK_EQ(predts->version, 0);
}
if (predts->version == 0) {
CHECK_EQ(out_preds->Size(), 0);
// out_preds->Size() can be non-zero as it's initialized here before any tree is
// built at the 0^th iterator.
this->InitOutPredictions(dmat->Info(), out_preds, model);
}
@@ -185,30 +189,6 @@ class CPUPredictor : public Predictor {
out_preds->Size() == dmat->Info().num_row_);
}
void UpdatePredictionCache(
const gbm::GBTreeModel& model,
std::vector<std::unique_ptr<TreeUpdater>>* updaters,
int num_new_trees,
DMatrix* m,
PredictionCacheEntry* predts) override {
int old_ntree = model.trees.size() - num_new_trees;
// update cache entry
auto* out = &predts->predictions;
if (predts->predictions.Size() == 0) {
this->InitOutPredictions(m->Info(), out, model);
this->PredInternal(m, &out->HostVector(), model, 0, model.trees.size());
} else if (model.learner_model_param_->num_output_group == 1 &&
updaters->size() > 0 &&
num_new_trees == 1 &&
updaters->back()->UpdatePredictionCache(m, out)) {
{}
} else {
PredInternal(m, &out->HostVector(), model, old_ntree, model.trees.size());
}
auto delta = num_new_trees / model.learner_model_param_->num_output_group;
predts->Update(delta);
}
void PredictInstance(const SparsePage::Inst& inst,
std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model, unsigned ntree_limit) override {

View File

@@ -7,7 +7,6 @@
#include <thrust/fill.h>
#include <memory>
#include "xgboost/parameter.h"
#include "xgboost/data.h"
#include "xgboost/predictor.h"
#include "xgboost/tree_model.h"
@@ -316,8 +315,10 @@ class GPUPredictor : public xgboost::Predictor {
CHECK_EQ(tree_begin, 0);
auto* out_preds = &predts->predictions;
CHECK_GE(predts->version, tree_begin);
if (out_preds->Size() == 0 && dmat->Info().num_row_ != 0) {
CHECK_EQ(predts->version, 0);
}
if (predts->version == 0) {
CHECK_EQ(out_preds->Size(), 0);
this->InitOutPredictions(dmat->Info(), out_preds, model);
}
@@ -370,32 +371,6 @@ class GPUPredictor : public xgboost::Predictor {
}
}
void UpdatePredictionCache(
const gbm::GBTreeModel& model,
std::vector<std::unique_ptr<TreeUpdater>>* updaters,
int num_new_trees,
DMatrix* m,
PredictionCacheEntry* predts) override {
int device = generic_param_->gpu_id;
ConfigureDevice(device);
auto old_ntree = model.trees.size() - num_new_trees;
// update cache entry
auto* out = &predts->predictions;
if (predts->predictions.Size() == 0) {
InitOutPredictions(m->Info(), out, model);
DevicePredictInternal(m, out, model, 0, model.trees.size());
} else if (model.learner_model_param_->num_output_group == 1 &&
updaters->size() > 0 &&
num_new_trees == 1 &&
updaters->back()->UpdatePredictionCache(m, out)) {
{}
} else {
DevicePredictInternal(m, out, model, old_ntree, model.trees.size());
}
auto delta = num_new_trees / model.learner_model_param_->num_output_group;
predts->Update(delta);
}
void PredictInstance(const SparsePage::Inst& inst,
std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model, unsigned ntree_limit) override {