Remove update prediction cache from predictors. (#5312)
Move this function into gbtree, and uses only updater for doing so. As now the predictor knows exactly how many trees to predict, there's no need for it to update the prediction cache.
This commit is contained in:
@@ -199,7 +199,7 @@ class CutsBuilder {
|
||||
}
|
||||
|
||||
void AddCutPoint(WQSketch::SummaryContainer const& summary, int max_bin) {
|
||||
int required_cuts = std::min(static_cast<int>(summary.size), max_bin);
|
||||
size_t required_cuts = std::min(summary.size, static_cast<size_t>(max_bin));
|
||||
for (size_t i = 1; i < required_cuts; ++i) {
|
||||
bst_float cpt = summary.data[i].value;
|
||||
if (i == 1 || cpt > p_cuts_->cut_values_.back()) {
|
||||
|
||||
@@ -181,8 +181,7 @@ class Transform {
|
||||
* \param func A callable object, accepting a size_t thread index,
|
||||
* followed by a set of Span classes.
|
||||
* \param range Range object specifying parallel threads index range.
|
||||
* \param devices GPUSet specifying GPUs to use, when compiling for CPU,
|
||||
* this should be GPUSet::Empty().
|
||||
* \param device Specify GPU to use.
|
||||
* \param shard Whether Shard for HostDeviceVector is needed.
|
||||
*/
|
||||
template <typename Functor>
|
||||
|
||||
@@ -296,8 +296,15 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
|
||||
num_new_trees += new_trees[gid].size();
|
||||
model_.CommitModel(std::move(new_trees[gid]), gid);
|
||||
}
|
||||
CHECK(configured_);
|
||||
GetPredictor()->UpdatePredictionCache(model_, &updaters_, num_new_trees, m, predts);
|
||||
auto* out = &predts->predictions;
|
||||
if (model_.learner_model_param_->num_output_group == 1 &&
|
||||
updaters_.size() > 0 &&
|
||||
num_new_trees == 1 &&
|
||||
out->Size() > 0 &&
|
||||
updaters_.back()->UpdatePredictionCache(m, out)) {
|
||||
auto delta = num_new_trees / model_.learner_model_param_->num_output_group;
|
||||
predts->Update(delta);
|
||||
}
|
||||
monitor_.Stop("CommitModel");
|
||||
}
|
||||
|
||||
@@ -357,6 +364,76 @@ void GBTree::SaveModel(Json* p_out) const {
|
||||
model_.SaveModel(&model);
|
||||
}
|
||||
|
||||
void GBTree::PredictBatch(DMatrix* p_fmat,
|
||||
PredictionCacheEntry* out_preds,
|
||||
bool training,
|
||||
unsigned ntree_limit) {
|
||||
CHECK(configured_);
|
||||
GetPredictor(&out_preds->predictions, p_fmat)
|
||||
->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
|
||||
}
|
||||
|
||||
std::unique_ptr<Predictor> const &
|
||||
GBTree::GetPredictor(HostDeviceVector<float> const *out_pred,
|
||||
DMatrix *f_dmat) const {
|
||||
CHECK(configured_);
|
||||
if (tparam_.predictor != PredictorType::kAuto) {
|
||||
if (tparam_.predictor == PredictorType::kGPUPredictor) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
CHECK(gpu_predictor_);
|
||||
return gpu_predictor_;
|
||||
#else
|
||||
this->AssertGPUSupport();
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
CHECK(cpu_predictor_);
|
||||
return cpu_predictor_;
|
||||
}
|
||||
|
||||
auto on_device =
|
||||
f_dmat &&
|
||||
(*(f_dmat->GetBatches<SparsePage>().begin())).data.DeviceCanRead();
|
||||
|
||||
// Use GPU Predictor if data is already on device.
|
||||
if (on_device) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
CHECK(gpu_predictor_);
|
||||
return gpu_predictor_;
|
||||
#else
|
||||
LOG(FATAL) << "Data is on CUDA device, but XGBoost is not compiled with "
|
||||
"CUDA support.";
|
||||
return cpu_predictor_;
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
// GPU_Hist by default has prediction cache calculated from quantile values,
|
||||
// so GPU Predictor is not used for training dataset. But when XGBoost
|
||||
// performs continue training with an existing model, the prediction cache is
|
||||
// not availbale and number of trees doesn't equal zero, the whole training
|
||||
// dataset got copied into GPU for precise prediction. This condition tries
|
||||
// to avoid such copy by calling CPU Predictor instead.
|
||||
if ((out_pred && out_pred->Size() == 0) && (model_.param.num_trees != 0) &&
|
||||
// FIXME(trivialfis): Implement a better method for testing whether data
|
||||
// is on device after DMatrix refactoring is done.
|
||||
!on_device) {
|
||||
CHECK(cpu_predictor_);
|
||||
return cpu_predictor_;
|
||||
}
|
||||
|
||||
if (tparam_.tree_method == TreeMethod::kGPUHist) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
CHECK(gpu_predictor_);
|
||||
return gpu_predictor_;
|
||||
#else
|
||||
this->AssertGPUSupport();
|
||||
return cpu_predictor_;
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
CHECK(cpu_predictor_);
|
||||
return cpu_predictor_;
|
||||
}
|
||||
|
||||
class Dart : public GBTree {
|
||||
public:
|
||||
explicit Dart(LearnerModelParam const* booster_config) :
|
||||
|
||||
@@ -201,11 +201,7 @@ class GBTree : public GradientBooster {
|
||||
void PredictBatch(DMatrix* p_fmat,
|
||||
PredictionCacheEntry* out_preds,
|
||||
bool training,
|
||||
unsigned ntree_limit) override {
|
||||
CHECK(configured_);
|
||||
GetPredictor(&out_preds->predictions, p_fmat)->PredictBatch(
|
||||
p_fmat, out_preds, model_, 0, ntree_limit);
|
||||
}
|
||||
unsigned ntree_limit) override;
|
||||
|
||||
void PredictInstance(const SparsePage::Inst& inst,
|
||||
std::vector<bst_float>* out_preds,
|
||||
@@ -256,62 +252,7 @@ class GBTree : public GradientBooster {
|
||||
std::vector<std::unique_ptr<RegTree> >* ret);
|
||||
|
||||
std::unique_ptr<Predictor> const& GetPredictor(HostDeviceVector<float> const* out_pred = nullptr,
|
||||
DMatrix* f_dmat = nullptr) const {
|
||||
CHECK(configured_);
|
||||
if (tparam_.predictor != PredictorType::kAuto) {
|
||||
if (tparam_.predictor == PredictorType::kGPUPredictor) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
CHECK(gpu_predictor_);
|
||||
return gpu_predictor_;
|
||||
#else
|
||||
this->AssertGPUSupport();
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
CHECK(cpu_predictor_);
|
||||
return cpu_predictor_;
|
||||
}
|
||||
|
||||
auto on_device = f_dmat && (*(f_dmat->GetBatches<SparsePage>().begin())).data.DeviceCanRead();
|
||||
|
||||
// Use GPU Predictor if data is already on device.
|
||||
if (on_device) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
CHECK(gpu_predictor_);
|
||||
return gpu_predictor_;
|
||||
#else
|
||||
LOG(FATAL) << "Data is on CUDA device, but XGBoost is not compiled with CUDA support.";
|
||||
return cpu_predictor_;
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
// GPU_Hist by default has prediction cache calculated from quantile values, so GPU
|
||||
// Predictor is not used for training dataset. But when XGBoost performs continue
|
||||
// training with an existing model, the prediction cache is not availbale and number
|
||||
// of trees doesn't equal zero, the whole training dataset got copied into GPU for
|
||||
// precise prediction. This condition tries to avoid such copy by calling CPU
|
||||
// Predictor instead.
|
||||
if ((out_pred && out_pred->Size() == 0) &&
|
||||
(model_.param.num_trees != 0) &&
|
||||
// FIXME(trivialfis): Implement a better method for testing whether data is on
|
||||
// device after DMatrix refactoring is done.
|
||||
!on_device) {
|
||||
CHECK(cpu_predictor_);
|
||||
return cpu_predictor_;
|
||||
}
|
||||
|
||||
if (tparam_.tree_method == TreeMethod::kGPUHist) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
CHECK(gpu_predictor_);
|
||||
return gpu_predictor_;
|
||||
#else
|
||||
this->AssertGPUSupport();
|
||||
return cpu_predictor_;
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
CHECK(cpu_predictor_);
|
||||
return cpu_predictor_;
|
||||
}
|
||||
DMatrix* f_dmat = nullptr) const;
|
||||
|
||||
// commit new trees all at once
|
||||
virtual void CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees,
|
||||
|
||||
@@ -146,8 +146,12 @@ class CPUPredictor : public Predictor {
|
||||
CHECK_EQ(tree_begin, 0);
|
||||
auto* out_preds = &predts->predictions;
|
||||
CHECK_GE(predts->version, tree_begin);
|
||||
if (out_preds->Size() == 0 && dmat->Info().num_row_ != 0) {
|
||||
CHECK_EQ(predts->version, 0);
|
||||
}
|
||||
if (predts->version == 0) {
|
||||
CHECK_EQ(out_preds->Size(), 0);
|
||||
// out_preds->Size() can be non-zero as it's initialized here before any tree is
|
||||
// built at the 0^th iterator.
|
||||
this->InitOutPredictions(dmat->Info(), out_preds, model);
|
||||
}
|
||||
|
||||
@@ -185,30 +189,6 @@ class CPUPredictor : public Predictor {
|
||||
out_preds->Size() == dmat->Info().num_row_);
|
||||
}
|
||||
|
||||
void UpdatePredictionCache(
|
||||
const gbm::GBTreeModel& model,
|
||||
std::vector<std::unique_ptr<TreeUpdater>>* updaters,
|
||||
int num_new_trees,
|
||||
DMatrix* m,
|
||||
PredictionCacheEntry* predts) override {
|
||||
int old_ntree = model.trees.size() - num_new_trees;
|
||||
// update cache entry
|
||||
auto* out = &predts->predictions;
|
||||
if (predts->predictions.Size() == 0) {
|
||||
this->InitOutPredictions(m->Info(), out, model);
|
||||
this->PredInternal(m, &out->HostVector(), model, 0, model.trees.size());
|
||||
} else if (model.learner_model_param_->num_output_group == 1 &&
|
||||
updaters->size() > 0 &&
|
||||
num_new_trees == 1 &&
|
||||
updaters->back()->UpdatePredictionCache(m, out)) {
|
||||
{}
|
||||
} else {
|
||||
PredInternal(m, &out->HostVector(), model, old_ntree, model.trees.size());
|
||||
}
|
||||
auto delta = num_new_trees / model.learner_model_param_->num_output_group;
|
||||
predts->Update(delta);
|
||||
}
|
||||
|
||||
void PredictInstance(const SparsePage::Inst& inst,
|
||||
std::vector<bst_float>* out_preds,
|
||||
const gbm::GBTreeModel& model, unsigned ntree_limit) override {
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
#include <thrust/fill.h>
|
||||
#include <memory>
|
||||
|
||||
#include "xgboost/parameter.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/predictor.h"
|
||||
#include "xgboost/tree_model.h"
|
||||
@@ -316,8 +315,10 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
CHECK_EQ(tree_begin, 0);
|
||||
auto* out_preds = &predts->predictions;
|
||||
CHECK_GE(predts->version, tree_begin);
|
||||
if (out_preds->Size() == 0 && dmat->Info().num_row_ != 0) {
|
||||
CHECK_EQ(predts->version, 0);
|
||||
}
|
||||
if (predts->version == 0) {
|
||||
CHECK_EQ(out_preds->Size(), 0);
|
||||
this->InitOutPredictions(dmat->Info(), out_preds, model);
|
||||
}
|
||||
|
||||
@@ -370,32 +371,6 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
}
|
||||
}
|
||||
|
||||
void UpdatePredictionCache(
|
||||
const gbm::GBTreeModel& model,
|
||||
std::vector<std::unique_ptr<TreeUpdater>>* updaters,
|
||||
int num_new_trees,
|
||||
DMatrix* m,
|
||||
PredictionCacheEntry* predts) override {
|
||||
int device = generic_param_->gpu_id;
|
||||
ConfigureDevice(device);
|
||||
auto old_ntree = model.trees.size() - num_new_trees;
|
||||
// update cache entry
|
||||
auto* out = &predts->predictions;
|
||||
if (predts->predictions.Size() == 0) {
|
||||
InitOutPredictions(m->Info(), out, model);
|
||||
DevicePredictInternal(m, out, model, 0, model.trees.size());
|
||||
} else if (model.learner_model_param_->num_output_group == 1 &&
|
||||
updaters->size() > 0 &&
|
||||
num_new_trees == 1 &&
|
||||
updaters->back()->UpdatePredictionCache(m, out)) {
|
||||
{}
|
||||
} else {
|
||||
DevicePredictInternal(m, out, model, old_ntree, model.trees.size());
|
||||
}
|
||||
auto delta = num_new_trees / model.learner_model_param_->num_output_group;
|
||||
predts->Update(delta);
|
||||
}
|
||||
|
||||
void PredictInstance(const SparsePage::Inst& inst,
|
||||
std::vector<bst_float>* out_preds,
|
||||
const gbm::GBTreeModel& model, unsigned ntree_limit) override {
|
||||
|
||||
Reference in New Issue
Block a user