Replaced std::vector-based interfaces with HostDeviceVector-based interfaces. (#3116)

* Replaced std::vector-based interfaces with HostDeviceVector-based interfaces.

- replacement was performed in the learner, boosters, predictors,
  updaters, and objective functions
- only interfaces used in training were replaced;
  interfaces like PredictInstance() still use std::vector
- refactoring necessary for replacement of interfaces was also performed,
  such as using HostDeviceVector in prediction cache

* HostDeviceVector-based interfaces for custom objective function example plugin.
This commit is contained in:
Andrew V. Adinetz
2018-02-28 01:00:04 +01:00
committed by Rory Mitchell
parent 11bfa8584d
commit d5992dd881
38 changed files with 371 additions and 519 deletions

View File

@@ -68,12 +68,9 @@ class GradientBooster {
* \param obj The objective function, optional, can be nullptr when use customized version
* the booster may change content of gpair
*/
virtual void DoBoost(DMatrix* p_fmat,
std::vector<bst_gpair>* in_gpair,
ObjFunction* obj = nullptr) = 0;
virtual void DoBoost(DMatrix* p_fmat,
HostDeviceVector<bst_gpair>* in_gpair,
ObjFunction* obj = nullptr);
ObjFunction* obj = nullptr) = 0;
/*!
* \brief generate predictions for given feature matrix
@@ -82,12 +79,9 @@ class GradientBooster {
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
* we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
*/
virtual void PredictBatch(DMatrix* dmat,
std::vector<bst_float>* out_preds,
unsigned ntree_limit = 0) = 0;
virtual void PredictBatch(DMatrix* dmat,
HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit = 0);
unsigned ntree_limit = 0) = 0;
/*!
* \brief online prediction function, predict score for one instance at a time
* NOTE: use the batch prediction interface if possible, batch prediction is usually

View File

@@ -84,7 +84,7 @@ class Learner : public rabit::Serializable {
*/
virtual void BoostOneIter(int iter,
DMatrix* train,
std::vector<bst_gpair>* in_gpair) = 0;
HostDeviceVector<bst_gpair>* in_gpair) = 0;
/*!
* \brief evaluate the model for specific iteration using the configured metrics.
* \param iter iteration number
@@ -109,7 +109,7 @@ class Learner : public rabit::Serializable {
*/
virtual void Predict(DMatrix* data,
bool output_margin,
std::vector<bst_float> *out_preds,
HostDeviceVector<bst_float> *out_preds,
unsigned ntree_limit = 0,
bool pred_leaf = false,
bool pred_contribs = false,
@@ -169,7 +169,7 @@ class Learner : public rabit::Serializable {
*/
inline void Predict(const SparseBatch::Inst &inst,
bool output_margin,
std::vector<bst_float> *out_preds,
HostDeviceVector<bst_float> *out_preds,
unsigned ntree_limit = 0) const;
/*!
* \brief Create a new instance of learner.
@@ -192,9 +192,9 @@ class Learner : public rabit::Serializable {
// implementation of inline functions.
inline void Learner::Predict(const SparseBatch::Inst& inst,
bool output_margin,
std::vector<bst_float>* out_preds,
HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit) const {
gbm_->PredictInstance(inst, out_preds, ntree_limit);
gbm_->PredictInstance(inst, &out_preds->data_h(), ntree_limit);
if (!output_margin) {
obj_->PredTransform(out_preds);
}

View File

@@ -44,14 +44,10 @@ class ObjFunction {
* \param iteration current iteration number.
* \param out_gpair output of get gradient, saves gradient and second order gradient in
*/
virtual void GetGradient(const std::vector<bst_float>& preds,
const MetaInfo& info,
int iteration,
std::vector<bst_gpair>* out_gpair) = 0;
virtual void GetGradient(HostDeviceVector<bst_float>* preds,
const MetaInfo& info,
int iteration,
HostDeviceVector<bst_gpair>* out_gpair);
HostDeviceVector<bst_gpair>* out_gpair) = 0;
/*! \return the default evaluation metric for the objective */
virtual const char* DefaultEvalMetric() const = 0;
@@ -60,17 +56,13 @@ class ObjFunction {
* \brief transform prediction values, this is only called when Prediction is called
* \param io_preds prediction values, saves to this vector as well
*/
virtual void PredTransform(std::vector<bst_float> *io_preds) {}
virtual void PredTransform(HostDeviceVector<bst_float> *io_preds);
virtual void PredTransform(HostDeviceVector<bst_float> *io_preds) {}
/*!
* \brief transform prediction values, this is only called when Eval is called,
* usually it redirect to PredTransform
* \param io_preds prediction values, saves to this vector as well
*/
virtual void EvalTransform(std::vector<bst_float> *io_preds) {
this->PredTransform(io_preds);
}
virtual void EvalTransform(HostDeviceVector<bst_float> *io_preds) {
this->PredTransform(io_preds);
}

View File

@@ -63,22 +63,6 @@ class Predictor {
* limit trees.
*/
virtual void PredictBatch(DMatrix* dmat, std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model, int tree_begin,
unsigned ntree_limit = 0) = 0;
/**
* \brief Generate batch predictions for a given feature matrix. May use
* cached predictions if available instead of calculating from scratch.
*
* \param [in,out] dmat Feature matrix.
* \param [in,out] out_preds The output preds.
* \param model The model to predict from.
* \param tree_begin The tree begin index.
* \param ntree_limit (Optional) The ntree limit. 0 means do not
* limit trees.
*/
virtual void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, int tree_begin,
unsigned ntree_limit = 0) = 0;
@@ -186,41 +170,14 @@ class Predictor {
static Predictor* Create(std::string name);
protected:
/**
* \fn bool PredictFromCache(DMatrix* dmat, std::vector<bst_float>*
* out_preds, const gbm::GBTreeModel& model, unsigned ntree_limit = 0)
*
* \brief Attempt to predict from cache.
*
* \return True if it succeeds, false if it fails.
*/
bool PredictFromCache(DMatrix* dmat, std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model,
unsigned ntree_limit = 0);
/**
* \fn void Predictor::InitOutPredictions(const MetaInfo& info,
* std::vector<bst_float>* out_preds, const gbm::GBTreeModel& model) const;
*
* \brief Init out predictions according to base margin.
*
* \param info Dmatrix info possibly containing base margin.
* \param [in,out] out_preds The out preds.
* \param model The model.
*/
void InitOutPredictions(const MetaInfo& info,
std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model) const;
/**
* \struct PredictionCacheEntry
*
* \brief Contains pointer to input matrix and associated cached predictions.
*/
struct PredictionCacheEntry {
std::shared_ptr<DMatrix> data;
std::vector<bst_float> predictions;
HostDeviceVector<bst_float> predictions;
};
/**

View File

@@ -40,12 +40,9 @@ class TreeUpdater {
* but maybe different random seeds, usually one tree is passed in at a time,
* there can be multiple trees when we train random forest style model
*/
virtual void Update(const std::vector<bst_gpair>& gpair,
DMatrix* data,
const std::vector<RegTree*>& trees) = 0;
virtual void Update(HostDeviceVector<bst_gpair>* gpair,
DMatrix* data,
const std::vector<RegTree*>& trees);
const std::vector<RegTree*>& trees) = 0;
/*!
* \brief determines whether updater has enough knowledge about a given dataset
@@ -58,11 +55,9 @@ class TreeUpdater {
* updated by the time this function returns.
*/
virtual bool UpdatePredictionCache(const DMatrix* data,
std::vector<bst_float>* out_preds) {
HostDeviceVector<bst_float>* out_preds) {
return false;
}
virtual bool UpdatePredictionCache(const DMatrix* data,
HostDeviceVector<bst_float>* out_preds);
/*!
* \brief Create a tree updater given name