Move prediction cache to Learner. (#5220)

* Move prediction cache into Learner.

* Clean-ups

- Remove duplicated cache in Learner and GBM.
- Remove ad-hoc fix of invalid cache.
- Remove `PredictFromCache` in predictors.
- Remove prediction cache for linear altogether, as it's only moving the
  prediction into training process but doesn't provide any actual overall speed
  gain.
- The cache is now unique to Learner, which means the ownership is no longer
  shared by any other components.

* Changes

- Add version to prediction cache.
- Use weak ptr to check expired DMatrix.
- Pass shared pointer instead of raw pointer.
This commit is contained in:
Jiaming Yuan
2020-02-14 13:04:23 +08:00
committed by GitHub
parent 24ad9dec0b
commit c35cdecddd
19 changed files with 457 additions and 372 deletions

View File

@@ -15,6 +15,7 @@
#include "xgboost/gbm.h"
#include "xgboost/json.h"
#include "xgboost/predictor.h"
#include "xgboost/linear_updater.h"
#include "xgboost/logging.h"
#include "xgboost/learner.h"
@@ -50,21 +51,14 @@ struct GBLinearTrainParam : public XGBoostParameter<GBLinearTrainParam> {
*/
class GBLinear : public GradientBooster {
public:
explicit GBLinear(const std::vector<std::shared_ptr<DMatrix> > &cache,
LearnerModelParam const* learner_model_param)
explicit GBLinear(LearnerModelParam const* learner_model_param)
: learner_model_param_{learner_model_param},
model_{learner_model_param_},
previous_model_{learner_model_param_},
sum_instance_weight_(0),
sum_weight_complete_(false),
is_converged_(false) {
// Add matrices to the prediction cache
for (auto &d : cache) {
PredictionCacheEntry e;
e.data = d;
cache_[d.get()] = std::move(e);
}
}
is_converged_(false) {}
void Configure(const Args& cfg) override {
if (model_.weight.size() == 0) {
model_.Configure(cfg);
@@ -118,7 +112,7 @@ class GBLinear : public GradientBooster {
void DoBoost(DMatrix *p_fmat,
HostDeviceVector<GradientPair> *in_gpair,
ObjFunction* obj) override {
PredictionCacheEntry* predt) override {
monitor_.Start("DoBoost");
model_.LazyInitModel();
@@ -127,28 +121,19 @@ class GBLinear : public GradientBooster {
if (!this->CheckConvergence()) {
updater_->Update(in_gpair, p_fmat, &model_, sum_instance_weight_);
}
this->UpdatePredictionCache();
monitor_.Stop("DoBoost");
}
void PredictBatch(DMatrix *p_fmat,
HostDeviceVector<bst_float> *out_preds,
PredictionCacheEntry *predts,
bool training,
unsigned ntree_limit) override {
monitor_.Start("PredictBatch");
auto* out_preds = &predts->predictions;
CHECK_EQ(ntree_limit, 0U)
<< "GBLinear::Predict ntrees is only valid for gbtree predictor";
// Try to predict from cache
auto it = cache_.find(p_fmat);
if (it != cache_.end() && it->second.predictions.size() != 0) {
std::vector<bst_float> &y = it->second.predictions;
out_preds->Resize(y.size());
std::copy(y.begin(), y.end(), out_preds->HostVector().begin());
} else {
this->PredictBatchInternal(p_fmat, &out_preds->HostVector());
}
this->PredictBatchInternal(p_fmat, &out_preds->HostVector());
monitor_.Stop("PredictBatch");
}
// add base margin
@@ -258,7 +243,8 @@ class GBLinear : public GradientBooster {
const size_t ridx = batch.base_rowid + i;
// loop over output groups
for (int gid = 0; gid < ngroup; ++gid) {
bst_float margin = (base_margin.size() != 0) ?
bst_float margin =
(base_margin.size() != 0) ?
base_margin[ridx * ngroup + gid] : learner_model_param_->base_score;
this->Pred(batch[i], &preds[ridx * ngroup], gid, margin);
}
@@ -266,17 +252,6 @@ class GBLinear : public GradientBooster {
}
monitor_.Stop("PredictBatchInternal");
}
void UpdatePredictionCache() {
// update cache entry
for (auto &kv : cache_) {
PredictionCacheEntry &e = kv.second;
if (e.predictions.size() == 0) {
size_t n = model_.learner_model_param_->num_output_group * e.data->Info().num_row_;
e.predictions.resize(n);
}
this->PredictBatchInternal(e.data.get(), &e.predictions);
}
}
bool CheckConvergence() {
if (param_.tolerance == 0.0f) return false;
@@ -327,22 +302,6 @@ class GBLinear : public GradientBooster {
bool sum_weight_complete_;
common::Monitor monitor_;
bool is_converged_;
/**
* \struct PredictionCacheEntry
*
* \brief Contains pointer to input matrix and associated cached predictions.
*/
struct PredictionCacheEntry {
std::shared_ptr<DMatrix> data;
std::vector<bst_float> predictions;
};
/**
* \brief Map of matrices and associated cached predictions to facilitate
* storing and looking up predictions.
*/
std::unordered_map<DMatrix*, PredictionCacheEntry> cache_;
};
// register the objective functions
@@ -350,9 +309,8 @@ DMLC_REGISTER_PARAMETER(GBLinearTrainParam);
XGBOOST_REGISTER_GBM(GBLinear, "gblinear")
.describe("Linear booster, implement generalized linear model.")
.set_body([](const std::vector<std::shared_ptr<DMatrix> > &cache,
LearnerModelParam const* booster_config) {
return new GBLinear(cache, booster_config);
.set_body([](LearnerModelParam const* booster_config) {
return new GBLinear(booster_config);
});
} // namespace gbm
} // namespace xgboost

View File

@@ -55,8 +55,9 @@ class GBLinearModel : public Model {
std::vector<bst_float> weight;
// initialize the model parameter
inline void LazyInitModel() {
if (!weight.empty())
if (!weight.empty()) {
return;
}
// bias is the last weight
weight.resize((learner_model_param_->num_feature + 1) *
learner_model_param_->num_output_group);

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2015 by Contributors
* Copyright 2015-2020 by Contributors
* \file gbm.cc
* \brief Registry of gradient boosters.
*/
@@ -20,13 +20,12 @@ namespace xgboost {
GradientBooster* GradientBooster::Create(
const std::string& name,
GenericParameter const* generic_param,
LearnerModelParam const* learner_model_param,
const std::vector<std::shared_ptr<DMatrix> >& cache_mats) {
LearnerModelParam const* learner_model_param) {
auto *e = ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->Find(name);
if (e == nullptr) {
LOG(FATAL) << "Unknown gbm type " << name;
}
auto p_bst = (e->body)(cache_mats, learner_model_param);
auto p_bst = (e->body)(learner_model_param);
p_bst->generic_param_ = generic_param;
return p_bst;
}

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2014-2019 by Contributors
* Copyright 2014-2020 by Contributors
* \file gbtree.cc
* \brief gradient boosted tree implementation.
* \author Tianqi Chen
@@ -14,6 +14,7 @@
#include <limits>
#include <algorithm>
#include "xgboost/data.h"
#include "xgboost/gbm.h"
#include "xgboost/logging.h"
#include "xgboost/json.h"
@@ -47,14 +48,14 @@ void GBTree::Configure(const Args& cfg) {
// configure predictors
if (!cpu_predictor_) {
cpu_predictor_ = std::unique_ptr<Predictor>(
Predictor::Create("cpu_predictor", this->generic_param_, cache_));
Predictor::Create("cpu_predictor", this->generic_param_));
}
cpu_predictor_->Configure(cfg);
#if defined(XGBOOST_USE_CUDA)
auto n_gpus = common::AllVisibleGPUs();
if (!gpu_predictor_ && n_gpus != 0) {
gpu_predictor_ = std::unique_ptr<Predictor>(
Predictor::Create("gpu_predictor", this->generic_param_, cache_));
Predictor::Create("gpu_predictor", this->generic_param_));
}
if (n_gpus != 0) {
gpu_predictor_->Configure(cfg);
@@ -183,7 +184,7 @@ void GBTree::ConfigureUpdaters() {
void GBTree::DoBoost(DMatrix* p_fmat,
HostDeviceVector<GradientPair>* in_gpair,
ObjFunction* obj) {
PredictionCacheEntry* predt) {
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
const int ngroup = model_.learner_model_param_->num_output_group;
ConfigureWithKnownData(this->cfg_, p_fmat);
@@ -195,7 +196,7 @@ void GBTree::DoBoost(DMatrix* p_fmat,
new_trees.push_back(std::move(ret));
} else {
CHECK_EQ(in_gpair->Size() % ngroup, 0U)
<< "must have exactly ngroup*nrow gpairs";
<< "must have exactly ngroup * nrow gpairs";
// TODO(canonizer): perform this on GPU if HostDeviceVector has device set.
HostDeviceVector<GradientPair> tmp(in_gpair->Size() / ngroup,
GradientPair(),
@@ -214,7 +215,7 @@ void GBTree::DoBoost(DMatrix* p_fmat,
}
}
monitor_.Stop("BoostNewTrees");
this->CommitModel(std::move(new_trees));
this->CommitModel(std::move(new_trees), p_fmat, predt);
}
void GBTree::InitUpdater(Args const& cfg) {
@@ -286,7 +287,9 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
}
}
void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees,
DMatrix* m,
PredictionCacheEntry* predts) {
monitor_.Start("CommitModel");
int num_new_trees = 0;
for (uint32_t gid = 0; gid < model_.learner_model_param_->num_output_group; ++gid) {
@@ -294,7 +297,7 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
model_.CommitModel(std::move(new_trees[gid]), gid);
}
CHECK(configured_);
GetPredictor()->UpdatePredictionCache(model_, &updaters_, num_new_trees);
GetPredictor()->UpdatePredictionCache(model_, &updaters_, num_new_trees, m, predts);
monitor_.Stop("CommitModel");
}
@@ -303,13 +306,16 @@ void GBTree::LoadConfig(Json const& in) {
fromJson(in["gbtree_train_param"], &tparam_);
int32_t const n_gpus = xgboost::common::AllVisibleGPUs();
if (n_gpus == 0 && tparam_.predictor == PredictorType::kGPUPredictor) {
LOG(WARNING)
<< "Loading from a raw memory buffer on CPU only machine. "
"Changing predictor to auto.";
tparam_.UpdateAllowUnknown(Args{{"predictor", "auto"}});
}
if (n_gpus == 0 && tparam_.tree_method == TreeMethod::kGPUHist) {
tparam_.UpdateAllowUnknown(Args{{"tree_method", "hist"}});
LOG(WARNING)
<< "Loading from a raw memory buffer on CPU only machine. "
"Change tree_method to hist.";
"Changing tree_method to hist.";
}
auto const& j_updaters = get<Object const>(in["updater"]);
@@ -415,7 +421,7 @@ class Dart : public GBTree {
}
void PredictBatch(DMatrix* p_fmat,
HostDeviceVector<bst_float>* p_out_preds,
PredictionCacheEntry* p_out_preds,
bool training,
unsigned ntree_limit) override {
DropTrees(training);
@@ -426,7 +432,7 @@ class Dart : public GBTree {
}
size_t n = num_group * p_fmat->Info().num_row_;
const auto &base_margin = p_fmat->Info().base_margin_.ConstHostVector();
auto& out_preds = p_out_preds->HostVector();
auto& out_preds = p_out_preds->predictions.HostVector();
out_preds.resize(n);
if (base_margin.size() != 0) {
CHECK_EQ(out_preds.size(), n);
@@ -539,7 +545,9 @@ class Dart : public GBTree {
// commit new trees all at once
void
CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) override {
CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees,
DMatrix* m,
PredictionCacheEntry* predts) override {
int num_new_trees = 0;
for (uint32_t gid = 0; gid < model_.learner_model_param_->num_output_group; ++gid) {
num_new_trees += new_trees[gid].size();
@@ -681,16 +689,13 @@ DMLC_REGISTER_PARAMETER(DartTrainParam);
XGBOOST_REGISTER_GBM(GBTree, "gbtree")
.describe("Tree booster, gradient boosted trees.")
.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats,
LearnerModelParam const* booster_config) {
.set_body([](LearnerModelParam const* booster_config) {
auto* p = new GBTree(booster_config);
p->InitCache(cached_mats);
return p;
});
XGBOOST_REGISTER_GBM(Dart, "dart")
.describe("Tree booster, dart.")
.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats,
LearnerModelParam const* booster_config) {
.set_body([](LearnerModelParam const* booster_config) {
GBTree* p = new Dart(booster_config);
return p;
});

View File

@@ -16,6 +16,7 @@
#include <string>
#include <unordered_map>
#include "xgboost/data.h"
#include "xgboost/logging.h"
#include "xgboost/gbm.h"
#include "xgboost/predictor.h"
@@ -151,14 +152,8 @@ struct DartTrainParam : public XGBoostParameter<DartTrainParam> {
// gradient boosted trees
class GBTree : public GradientBooster {
public:
explicit GBTree(LearnerModelParam const* booster_config) : model_(booster_config) {}
void InitCache(const std::vector<std::shared_ptr<DMatrix> > &cache) {
cache_ = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();
for (std::shared_ptr<DMatrix> const& d : cache) {
(*cache_)[d.get()].data = d;
}
}
explicit GBTree(LearnerModelParam const* booster_config) :
model_(booster_config) {}
void Configure(const Args& cfg) override;
// Revise `tree_method` and `updater` parameters after seeing the training
@@ -171,7 +166,7 @@ class GBTree : public GradientBooster {
/*! \brief Carry out one iteration of boosting */
void DoBoost(DMatrix* p_fmat,
HostDeviceVector<GradientPair>* in_gpair,
ObjFunction* obj) override;
PredictionCacheEntry* predt) override;
bool UseGPU() const override {
return
@@ -204,11 +199,12 @@ class GBTree : public GradientBooster {
}
void PredictBatch(DMatrix* p_fmat,
HostDeviceVector<bst_float>* out_preds,
PredictionCacheEntry* out_preds,
bool training,
unsigned ntree_limit) override {
CHECK(configured_);
GetPredictor(out_preds, p_fmat)->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
GetPredictor(&out_preds->predictions, p_fmat)->PredictBatch(
p_fmat, out_preds, model_, 0, ntree_limit);
}
void PredictInstance(const SparsePage::Inst& inst,
@@ -318,7 +314,9 @@ class GBTree : public GradientBooster {
}
// commit new trees all at once
virtual void CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees);
virtual void CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees,
DMatrix* m,
PredictionCacheEntry* predts);
// --- data structure ---
GBTreeModel model_;
@@ -332,11 +330,6 @@ class GBTree : public GradientBooster {
Args cfg_;
// the updaters that can be applied to each of tree
std::vector<std::unique_ptr<TreeUpdater>> updaters_;
/**
* \brief Map of matrices and associated cached predictions to facilitate
* storing and looking up predictions.
*/
std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>> cache_;
// Predictors
std::unique_ptr<Predictor> cpu_predictor_;
#if defined(XGBOOST_USE_CUDA)

View File

@@ -10,6 +10,7 @@
#include <algorithm>
#include <iomanip>
#include <limits>
#include <memory>
#include <sstream>
#include <string>
#include <stack>
@@ -17,6 +18,8 @@
#include <vector>
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/predictor.h"
#include "xgboost/feature_map.h"
#include "xgboost/gbm.h"
#include "xgboost/generic_parameters.h"
@@ -195,9 +198,12 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
*/
class LearnerImpl : public Learner {
public:
explicit LearnerImpl(std::vector<std::shared_ptr<DMatrix> > cache)
: need_configuration_{true}, cache_(std::move(cache)) {
explicit LearnerImpl(std::vector<std::shared_ptr<DMatrix> > cache)
: need_configuration_{true} {
monitor_.Init("Learner");
for (std::shared_ptr<DMatrix> const& d : cache) {
cache_.Cache(d, GenericParameter::kCpuId);
}
}
// Configuration before data is known.
void Configure() override {
@@ -358,8 +364,7 @@ class LearnerImpl : public Learner {
name = get<String>(gradient_booster["name"]);
tparam_.UpdateAllowUnknown(Args{{"booster", name}});
gbm_.reset(GradientBooster::Create(tparam_.booster,
&generic_parameters_, &learner_model_param_,
cache_));
&generic_parameters_, &learner_model_param_));
gbm_->LoadModel(gradient_booster);
auto const& j_attributes = get<Object const>(learner.at("attributes"));
@@ -413,8 +418,7 @@ class LearnerImpl : public Learner {
tparam_.booster = get<String>(gradient_booster["name"]);
if (!gbm_) {
gbm_.reset(GradientBooster::Create(tparam_.booster,
&generic_parameters_, &learner_model_param_,
cache_));
&generic_parameters_, &learner_model_param_));
}
gbm_->LoadConfig(gradient_booster);
@@ -500,7 +504,7 @@ class LearnerImpl : public Learner {
obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
&learner_model_param_, cache_));
&learner_model_param_));
gbm_->Load(fi);
if (mparam_.contain_extra_attrs != 0) {
std::vector<std::pair<std::string, std::string> > attr;
@@ -726,17 +730,18 @@ class LearnerImpl : public Learner {
this->CheckDataSplitMode();
this->ValidateDMatrix(train.get());
auto& predt = this->cache_.Cache(train, generic_parameters_.gpu_id);
monitor_.Start("PredictRaw");
this->PredictRaw(train.get(), &preds_[train.get()], true);
this->PredictRaw(train.get(), &predt, true);
monitor_.Stop("PredictRaw");
TrainingObserver::Instance().Observe(preds_[train.get()], "Predictions");
monitor_.Start("GetGradient");
obj_->GetGradient(preds_[train.get()], train->Info(), iter, &gpair_);
obj_->GetGradient(predt.predictions, train->Info(), iter, &gpair_);
monitor_.Stop("GetGradient");
TrainingObserver::Instance().Observe(gpair_, "Gradients");
gbm_->DoBoost(train.get(), &gpair_, obj_.get());
gbm_->DoBoost(train.get(), &gpair_, &predt);
monitor_.Stop("UpdateOneIter");
}
@@ -749,12 +754,14 @@ class LearnerImpl : public Learner {
}
this->CheckDataSplitMode();
this->ValidateDMatrix(train.get());
this->cache_.Cache(train, generic_parameters_.gpu_id);
gbm_->DoBoost(train.get(), in_gpair);
gbm_->DoBoost(train.get(), in_gpair, &cache_.Entry(train.get()));
monitor_.Stop("BoostOneIter");
}
std::string EvalOneIter(int iter, const std::vector<std::shared_ptr<DMatrix>>& data_sets,
std::string EvalOneIter(int iter,
const std::vector<std::shared_ptr<DMatrix>>& data_sets,
const std::vector<std::string>& data_names) override {
monitor_.Start("EvalOneIter");
this->Configure();
@@ -766,14 +773,19 @@ class LearnerImpl : public Learner {
metrics_.back()->Configure({cfg_.begin(), cfg_.end()});
}
for (size_t i = 0; i < data_sets.size(); ++i) {
DMatrix * dmat = data_sets[i].get();
this->ValidateDMatrix(dmat);
this->PredictRaw(dmat, &preds_[dmat], false);
obj_->EvalTransform(&preds_[dmat]);
std::shared_ptr<DMatrix> m = data_sets[i];
auto &predt = this->cache_.Cache(m, generic_parameters_.gpu_id);
this->ValidateDMatrix(m.get());
this->PredictRaw(m.get(), &predt, false);
auto &out = output_predictions_.Cache(m, generic_parameters_.gpu_id).predictions;
out.Resize(predt.predictions.Size());
out.Copy(predt.predictions);
obj_->EvalTransform(&out);
for (auto& ev : metrics_) {
os << '\t' << data_names[i] << '-' << ev->Name() << ':'
<< ev->Eval(preds_[dmat], data_sets[i]->Info(),
tparam_.dsplit == DataSplitMode::kRow);
<< ev->Eval(out, m->Info(), tparam_.dsplit == DataSplitMode::kRow);
}
}
@@ -848,7 +860,12 @@ class LearnerImpl : public Learner {
} else if (pred_leaf) {
gbm_->PredictLeaf(data.get(), &out_preds->HostVector(), ntree_limit);
} else {
this->PredictRaw(data.get(), out_preds, training, ntree_limit);
auto& prediction = cache_.Cache(data, generic_parameters_.gpu_id);
this->PredictRaw(data.get(), &prediction, training, ntree_limit);
// Copy the prediction cache to output prediction. out_preds comes from C API
out_preds->SetDevice(generic_parameters_.gpu_id);
out_preds->Resize(prediction.predictions.Size());
out_preds->Copy(prediction.predictions);
if (!output_margin) {
obj_->PredTransform(out_preds);
}
@@ -868,11 +885,10 @@ class LearnerImpl : public Learner {
* predictor, when it equals 0, this means we are using all the trees
* \param training allow dropout when the DART booster is being used
*/
void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds,
void PredictRaw(DMatrix* data, PredictionCacheEntry* out_preds,
bool training,
unsigned ntree_limit = 0) const {
CHECK(gbm_ != nullptr)
<< "Predict must happen after Load or configuration";
CHECK(gbm_ != nullptr) << "Predict must happen after Load or configuration";
this->ValidateDMatrix(data);
gbm_->PredictBatch(data, out_preds, training, ntree_limit);
}
@@ -920,7 +936,7 @@ class LearnerImpl : public Learner {
void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {
if (gbm_ == nullptr || old.booster != tparam_.booster) {
gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
&learner_model_param_, cache_));
&learner_model_param_));
}
gbm_->Configure(args);
}
@@ -930,9 +946,10 @@ class LearnerImpl : public Learner {
// estimate feature bound
// TODO(hcho3): Change num_feature to 64-bit integer
unsigned num_feature = 0;
for (auto & matrix : cache_) {
CHECK(matrix != nullptr);
const uint64_t num_col = matrix->Info().num_col_;
for (auto & matrix : cache_.Container()) {
CHECK(matrix.first);
CHECK(!matrix.second.ref.expired());
const uint64_t num_col = matrix.first->Info().num_col_;
CHECK_LE(num_col, static_cast<uint64_t>(std::numeric_limits<unsigned>::max()))
<< "Unfortunately, XGBoost does not support data matrices with "
<< std::numeric_limits<unsigned>::max() << " features or greater";
@@ -990,13 +1007,12 @@ class LearnerImpl : public Learner {
// `enable_experimental_json_serialization' is set to false. Will be removed once JSON
// takes over.
std::string const serialisation_header_ { u8"CONFIG-offset:" };
// configurations
// User provided configurations
std::map<std::string, std::string> cfg_;
// Stores information like best-iteration for early stopping.
std::map<std::string, std::string> attributes_;
std::vector<std::string> metric_names_;
static std::string const kEvalMetric; // NOLINT
// temporal storages for prediction
std::map<DMatrix*, HostDeviceVector<bst_float>> preds_;
// gradient pairs
HostDeviceVector<GradientPair> gpair_;
bool need_configuration_;
@@ -1004,8 +1020,11 @@ class LearnerImpl : public Learner {
private:
/*! \brief random number transformation seed. */
static int32_t constexpr kRandSeedMagic = 127;
// internal cached dmatrix
std::vector<std::shared_ptr<DMatrix> > cache_;
// internal cached dmatrix for prediction.
PredictionContainer cache_;
/*! \brief Temporary storage to prediction. Useful for storing data transformed by
* objective function */
PredictionContainer output_predictions_;
common::Monitor monitor_;

View File

@@ -1,5 +1,5 @@
/*!
* Copyright by Contributors 2017-2019
* Copyright by Contributors 2017-2020
*/
#include <dmlc/omp.h>
@@ -46,9 +46,9 @@ class CPUPredictor : public Predictor {
}
}
void PredLoopInternal(DMatrix* p_fmat, std::vector<bst_float>* out_preds,
gbm::GBTreeModel const& model, int32_t tree_begin,
int32_t tree_end) {
void PredInternal(DMatrix *p_fmat, std::vector<bst_float> *out_preds,
gbm::GBTreeModel const &model, int32_t tree_begin,
int32_t tree_end) {
int32_t const num_group = model.learner_model_param_->num_output_group;
const int nthread = omp_get_max_threads();
InitThreadTemp(nthread, model.learner_model_param_->num_feature);
@@ -102,27 +102,6 @@ class CPUPredictor : public Predictor {
}
}
bool PredictFromCache(DMatrix* dmat,
HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model,
unsigned ntree_limit) const {
CHECK(cache_);
if (ntree_limit == 0 ||
ntree_limit * model.learner_model_param_->num_output_group >= model.trees.size()) {
auto it = cache_->find(dmat);
if (it != cache_->end()) {
const HostDeviceVector<bst_float>& y = it->second.predictions;
if (y.Size() != 0) {
out_preds->Resize(y.Size());
std::copy(y.HostVector().begin(), y.HostVector().end(),
out_preds->HostVector().begin());
return true;
}
}
}
return false;
}
void InitOutPredictions(const MetaInfo& info,
HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model) const {
@@ -156,60 +135,78 @@ class CPUPredictor : public Predictor {
}
public:
CPUPredictor(GenericParameter const* generic_param,
std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>> cache) :
Predictor::Predictor{generic_param, cache} {}
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
explicit CPUPredictor(GenericParameter const* generic_param) :
Predictor::Predictor{generic_param} {}
// ntree_limit is a very problematic parameter, as it's ambiguous in the context of
// multi-output and forest. Same problem exists for tree_begin
void PredictBatch(DMatrix* dmat, PredictionCacheEntry* predts,
const gbm::GBTreeModel& model, int tree_begin,
unsigned ntree_limit = 0) override {
if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) {
return;
}
this->InitOutPredictions(dmat->Info(), out_preds, model);
ntree_limit *= model.learner_model_param_->num_output_group;
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
ntree_limit = static_cast<unsigned>(model.trees.size());
uint32_t const ntree_limit = 0) override {
// tree_begin is not used, right now we just enforce it to be 0.
CHECK_EQ(tree_begin, 0);
auto* out_preds = &predts->predictions;
CHECK_GE(predts->version, tree_begin);
if (predts->version == 0) {
CHECK_EQ(out_preds->Size(), 0);
this->InitOutPredictions(dmat->Info(), out_preds, model);
}
this->PredLoopInternal(dmat, &out_preds->HostVector(), model,
tree_begin, ntree_limit);
uint32_t const output_groups = model.learner_model_param_->num_output_group;
CHECK_NE(output_groups, 0);
// Right now we just assume ntree_limit provided by users means number of tree layers
// in the context of multi-output model
uint32_t real_ntree_limit = ntree_limit * output_groups;
if (real_ntree_limit == 0 || real_ntree_limit > model.trees.size()) {
real_ntree_limit = static_cast<uint32_t>(model.trees.size());
}
auto cache_entry = this->FindCache(dmat);
if (cache_entry == cache_->cend()) {
return;
uint32_t const end_version = (tree_begin + real_ntree_limit) / output_groups;
// When users have provided ntree_limit, end_version can be lesser, cache is violated
if (predts->version > end_version) {
CHECK_NE(ntree_limit, 0);
this->InitOutPredictions(dmat->Info(), out_preds, model);
predts->version = 0;
}
if (cache_entry->second.predictions.Size() == 0) {
// See comment in GPUPredictor::PredictBatch.
InitOutPredictions(cache_entry->second.data->Info(),
&(cache_entry->second.predictions), model);
cache_entry->second.predictions.Copy(*out_preds);
uint32_t const beg_version = predts->version;
CHECK_LE(beg_version, end_version);
if (beg_version < end_version) {
this->PredInternal(dmat, &out_preds->HostVector(), model,
beg_version * output_groups,
end_version * output_groups);
}
// delta means {size of forest} * {number of newly accumulated layers}
uint32_t delta = end_version - beg_version;
CHECK_LE(delta, model.trees.size());
predts->Update(delta);
CHECK(out_preds->Size() == output_groups * dmat->Info().num_row_ ||
out_preds->Size() == dmat->Info().num_row_);
}
void UpdatePredictionCache(
const gbm::GBTreeModel& model,
std::vector<std::unique_ptr<TreeUpdater>>* updaters,
int num_new_trees) override {
int num_new_trees,
DMatrix* m,
PredictionCacheEntry* predts) override {
int old_ntree = model.trees.size() - num_new_trees;
// update cache entry
for (auto& kv : (*cache_)) {
PredictionCacheEntry& e = kv.second;
if (e.predictions.Size() == 0) {
InitOutPredictions(e.data->Info(), &(e.predictions), model);
PredLoopInternal(e.data.get(), &(e.predictions.HostVector()), model, 0,
model.trees.size());
} else if (model.learner_model_param_->num_output_group == 1 && updaters->size() > 0 &&
num_new_trees == 1 &&
updaters->back()->UpdatePredictionCache(e.data.get(),
&(e.predictions))) {
{} // do nothing
} else {
PredLoopInternal(e.data.get(), &(e.predictions.HostVector()), model, old_ntree,
model.trees.size());
}
auto* out = &predts->predictions;
if (predts->predictions.Size() == 0) {
this->InitOutPredictions(m->Info(), out, model);
this->PredInternal(m, &out->HostVector(), model, 0, model.trees.size());
} else if (model.learner_model_param_->num_output_group == 1 &&
updaters->size() > 0 &&
num_new_trees == 1 &&
updaters->back()->UpdatePredictionCache(m, out)) {
{}
} else {
PredInternal(m, &out->HostVector(), model, old_ntree, model.trees.size());
}
auto delta = num_new_trees / model.learner_model_param_->num_output_group;
predts->Update(delta);
}
void PredictInstance(const SparsePage::Inst& inst,
@@ -387,9 +384,8 @@ class CPUPredictor : public Predictor {
XGBOOST_REGISTER_PREDICTOR(CPUPredictor, "cpu_predictor")
.describe("Make predictions using CPU.")
.set_body([](GenericParameter const* generic_param,
std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>> cache) {
return new CPUPredictor(generic_param, cache);
.set_body([](GenericParameter const* generic_param) {
return new CPUPredictor(generic_param);
});
} // namespace predictor
} // namespace xgboost

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2017-2018 by Contributors
* Copyright 2017-2020 by Contributors
*/
#include <thrust/copy.h>
#include <thrust/device_ptr.h>
@@ -295,9 +295,8 @@ class GPUPredictor : public xgboost::Predictor {
}
public:
GPUPredictor(GenericParameter const* generic_param,
std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>> cache) :
Predictor::Predictor{generic_param, cache} {}
explicit GPUPredictor(GenericParameter const* generic_param) :
Predictor::Predictor{generic_param} {}
~GPUPredictor() override {
if (generic_param_->gpu_id >= 0) {
@@ -305,43 +304,53 @@ class GPUPredictor : public xgboost::Predictor {
}
}
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
void PredictBatch(DMatrix* dmat, PredictionCacheEntry* predts,
const gbm::GBTreeModel& model, int tree_begin,
unsigned ntree_limit = 0) override {
// This function is duplicated with CPU predictor PredictBatch, see comments in there.
// FIXME(trivialfis): Remove the duplication.
int device = generic_param_->gpu_id;
CHECK_GE(device, 0) << "Set `gpu_id' to positive value for processing GPU data.";
ConfigureDevice(device);
if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) {
return;
}
this->InitOutPredictions(dmat->Info(), out_preds, model);
int32_t tree_end = ntree_limit * model.learner_model_param_->num_output_group;
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
tree_end = static_cast<unsigned>(model.trees.size());
CHECK_EQ(tree_begin, 0);
auto* out_preds = &predts->predictions;
CHECK_GE(predts->version, tree_begin);
if (predts->version == 0) {
CHECK_EQ(out_preds->Size(), 0);
this->InitOutPredictions(dmat->Info(), out_preds, model);
}
DevicePredictInternal(dmat, out_preds, model, tree_begin, tree_end);
uint32_t const output_groups = model.learner_model_param_->num_output_group;
CHECK_NE(output_groups, 0);
auto cache_emtry = this->FindCache(dmat);
if (cache_emtry == cache_->cend()) { return; }
if (cache_emtry->second.predictions.Size() == 0) {
// Initialise the cache on first iteration, this comes useful
// when performing training continuation:
//
// 1. PredictBatch
// 2. CommitModel
// - updater->UpdatePredictionCache
//
// If we don't initialise this cache, the 2 step will recieve an invalid cache as
// the first step only modifies prediction store in learner without following code.
InitOutPredictions(cache_emtry->second.data->Info(),
&(cache_emtry->second.predictions), model);
CHECK_EQ(cache_emtry->second.predictions.Size(), out_preds->Size());
cache_emtry->second.predictions.Copy(*out_preds);
uint32_t real_ntree_limit = ntree_limit * output_groups;
if (real_ntree_limit == 0 || real_ntree_limit > model.trees.size()) {
real_ntree_limit = static_cast<uint32_t>(model.trees.size());
}
uint32_t const end_version = (tree_begin + real_ntree_limit) / output_groups;
if (predts->version > end_version) {
CHECK_NE(ntree_limit, 0);
this->InitOutPredictions(dmat->Info(), out_preds, model);
predts->version = 0;
}
uint32_t const beg_version = predts->version;
CHECK_LE(beg_version, end_version);
if (beg_version < end_version) {
this->DevicePredictInternal(dmat, out_preds, model,
beg_version * output_groups,
end_version * output_groups);
}
uint32_t delta = end_version - beg_version;
CHECK_LE(delta, model.trees.size());
predts->Update(delta);
CHECK(out_preds->Size() == output_groups * dmat->Info().num_row_ ||
out_preds->Size() == dmat->Info().num_row_);
}
protected:
@@ -361,49 +370,30 @@ class GPUPredictor : public xgboost::Predictor {
}
}
bool PredictFromCache(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, unsigned ntree_limit) {
if (ntree_limit == 0 ||
ntree_limit * model.learner_model_param_->num_output_group >= model.trees.size()) {
auto it = (*cache_).find(dmat);
if (it != cache_->cend()) {
const HostDeviceVector<bst_float>& y = it->second.predictions;
if (y.Size() != 0) {
monitor_.StartCuda("PredictFromCache");
out_preds->SetDevice(y.DeviceIdx());
out_preds->Resize(y.Size());
out_preds->Copy(y);
monitor_.StopCuda("PredictFromCache");
return true;
}
}
}
return false;
}
void UpdatePredictionCache(
const gbm::GBTreeModel& model,
std::vector<std::unique_ptr<TreeUpdater>>* updaters,
int num_new_trees) override {
int num_new_trees,
DMatrix* m,
PredictionCacheEntry* predts) override {
int device = generic_param_->gpu_id;
ConfigureDevice(device);
auto old_ntree = model.trees.size() - num_new_trees;
// update cache entry
for (auto& kv : (*cache_)) {
PredictionCacheEntry& e = kv.second;
DMatrix* dmat = kv.first;
HostDeviceVector<bst_float>& predictions = e.predictions;
if (predictions.Size() == 0) {
this->InitOutPredictions(dmat->Info(), &predictions, model);
}
if (model.learner_model_param_->num_output_group == 1 && updaters->size() > 0 &&
num_new_trees == 1 &&
updaters->back()->UpdatePredictionCache(e.data.get(), &predictions)) {
// do nothing
} else {
DevicePredictInternal(dmat, &predictions, model, old_ntree, model.trees.size());
}
auto* out = &predts->predictions;
if (predts->predictions.Size() == 0) {
InitOutPredictions(m->Info(), out, model);
DevicePredictInternal(m, out, model, 0, model.trees.size());
} else if (model.learner_model_param_->num_output_group == 1 &&
updaters->size() > 0 &&
num_new_trees == 1 &&
updaters->back()->UpdatePredictionCache(m, out)) {
{}
} else {
DevicePredictInternal(m, out, model, old_ntree, model.trees.size());
}
auto delta = num_new_trees / model.learner_model_param_->num_output_group;
predts->Update(delta);
}
void PredictInstance(const SparsePage::Inst& inst,
@@ -442,11 +432,6 @@ class GPUPredictor : public xgboost::Predictor {
void Configure(const std::vector<std::pair<std::string, std::string>>& cfg) override {
Predictor::Configure(cfg);
int device = generic_param_->gpu_id;
if (device >= 0) {
ConfigureDevice(device);
}
}
private:
@@ -469,9 +454,8 @@ class GPUPredictor : public xgboost::Predictor {
XGBOOST_REGISTER_PREDICTOR(GPUPredictor, "gpu_predictor")
.describe("Make predictions using GPU.")
.set_body([](GenericParameter const* generic_param,
std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>> cache) {
return new GPUPredictor(generic_param, cache);
.set_body([](GenericParameter const* generic_param) {
return new GPUPredictor(generic_param);
});
} // namespace predictor

View File

@@ -1,24 +1,60 @@
/*!
* Copyright by Contributors 2017
* Copyright 2017-2020 by Contributors
*/
#include <dmlc/registry.h>
#include <xgboost/predictor.h>
#include "xgboost/data.h"
#include "xgboost/generic_parameters.h"
namespace dmlc {
DMLC_REGISTRY_ENABLE(::xgboost::PredictorReg);
} // namespace dmlc
namespace xgboost {
void PredictionContainer::ClearExpiredEntries() {
std::vector<DMatrix*> expired;
for (auto& kv : container_) {
if (kv.second.ref.expired()) {
expired.emplace_back(kv.first);
}
}
for (auto const& ptr : expired) {
container_.erase(ptr);
}
}
PredictionCacheEntry &PredictionContainer::Cache(std::shared_ptr<DMatrix> m, int32_t device) {
this->ClearExpiredEntries();
container_[m.get()].ref = m;
if (device != GenericParameter::kCpuId) {
container_[m.get()].predictions.SetDevice(device);
}
return container_[m.get()];
}
PredictionCacheEntry &PredictionContainer::Entry(DMatrix *m) {
CHECK(container_.find(m) != container_.cend());
CHECK(container_.at(m).ref.lock())
<< "[Internal error]: DMatrix: " << m << " has expired.";
return container_.at(m);
}
decltype(PredictionContainer::container_) const& PredictionContainer::Container() {
this->ClearExpiredEntries();
return container_;
}
void Predictor::Configure(
const std::vector<std::pair<std::string, std::string>>& cfg) {
}
Predictor* Predictor::Create(
std::string const& name, GenericParameter const* generic_param,
std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>> cache) {
std::string const& name, GenericParameter const* generic_param) {
auto* e = ::dmlc::Registry<PredictorReg>::Get()->Find(name);
if (e == nullptr) {
LOG(FATAL) << "Unknown predictor type " << name;
}
auto p_predictor = (e->body)(generic_param, cache);
auto p_predictor = (e->body)(generic_param);
return p_predictor;
}
} // namespace xgboost