Replaced std::vector-based interfaces with HostDeviceVector-based interfaces. (#3116)

* Replaced std::vector-based interfaces with HostDeviceVector-based interfaces.

- replacement was performed in the learner, boosters, predictors,
  updaters, and objective functions
- only interfaces used in training were replaced;
  interfaces like PredictInstance() still use std::vector
- refactoring necessary for replacement of interfaces was also performed,
  such as using HostDeviceVector in prediction cache

* HostDeviceVector-based interfaces for custom objective function example plugin.
This commit is contained in:
Andrew V. Adinetz 2018-02-28 01:00:04 +01:00 committed by Rory Mitchell
parent 11bfa8584d
commit d5992dd881
38 changed files with 371 additions and 519 deletions

View File

@ -68,12 +68,9 @@ class GradientBooster {
* \param obj The objective function, optional, can be nullptr when use customized version * \param obj The objective function, optional, can be nullptr when use customized version
* the booster may change content of gpair * the booster may change content of gpair
*/ */
virtual void DoBoost(DMatrix* p_fmat,
std::vector<bst_gpair>* in_gpair,
ObjFunction* obj = nullptr) = 0;
virtual void DoBoost(DMatrix* p_fmat, virtual void DoBoost(DMatrix* p_fmat,
HostDeviceVector<bst_gpair>* in_gpair, HostDeviceVector<bst_gpair>* in_gpair,
ObjFunction* obj = nullptr); ObjFunction* obj = nullptr) = 0;
/*! /*!
* \brief generate predictions for given feature matrix * \brief generate predictions for given feature matrix
@ -82,12 +79,9 @@ class GradientBooster {
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
* we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear * we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
*/ */
virtual void PredictBatch(DMatrix* dmat,
std::vector<bst_float>* out_preds,
unsigned ntree_limit = 0) = 0;
virtual void PredictBatch(DMatrix* dmat, virtual void PredictBatch(DMatrix* dmat,
HostDeviceVector<bst_float>* out_preds, HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit = 0); unsigned ntree_limit = 0) = 0;
/*! /*!
* \brief online prediction function, predict score for one instance at a time * \brief online prediction function, predict score for one instance at a time
* NOTE: use the batch prediction interface if possible, batch prediction is usually * NOTE: use the batch prediction interface if possible, batch prediction is usually

View File

@ -84,7 +84,7 @@ class Learner : public rabit::Serializable {
*/ */
virtual void BoostOneIter(int iter, virtual void BoostOneIter(int iter,
DMatrix* train, DMatrix* train,
std::vector<bst_gpair>* in_gpair) = 0; HostDeviceVector<bst_gpair>* in_gpair) = 0;
/*! /*!
* \brief evaluate the model for specific iteration using the configured metrics. * \brief evaluate the model for specific iteration using the configured metrics.
* \param iter iteration number * \param iter iteration number
@ -109,7 +109,7 @@ class Learner : public rabit::Serializable {
*/ */
virtual void Predict(DMatrix* data, virtual void Predict(DMatrix* data,
bool output_margin, bool output_margin,
std::vector<bst_float> *out_preds, HostDeviceVector<bst_float> *out_preds,
unsigned ntree_limit = 0, unsigned ntree_limit = 0,
bool pred_leaf = false, bool pred_leaf = false,
bool pred_contribs = false, bool pred_contribs = false,
@ -169,7 +169,7 @@ class Learner : public rabit::Serializable {
*/ */
inline void Predict(const SparseBatch::Inst &inst, inline void Predict(const SparseBatch::Inst &inst,
bool output_margin, bool output_margin,
std::vector<bst_float> *out_preds, HostDeviceVector<bst_float> *out_preds,
unsigned ntree_limit = 0) const; unsigned ntree_limit = 0) const;
/*! /*!
* \brief Create a new instance of learner. * \brief Create a new instance of learner.
@ -192,9 +192,9 @@ class Learner : public rabit::Serializable {
// implementation of inline functions. // implementation of inline functions.
inline void Learner::Predict(const SparseBatch::Inst& inst, inline void Learner::Predict(const SparseBatch::Inst& inst,
bool output_margin, bool output_margin,
std::vector<bst_float>* out_preds, HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit) const { unsigned ntree_limit) const {
gbm_->PredictInstance(inst, out_preds, ntree_limit); gbm_->PredictInstance(inst, &out_preds->data_h(), ntree_limit);
if (!output_margin) { if (!output_margin) {
obj_->PredTransform(out_preds); obj_->PredTransform(out_preds);
} }

View File

@ -44,14 +44,10 @@ class ObjFunction {
* \param iteration current iteration number. * \param iteration current iteration number.
* \param out_gpair output of get gradient, saves gradient and second order gradient in * \param out_gpair output of get gradient, saves gradient and second order gradient in
*/ */
virtual void GetGradient(const std::vector<bst_float>& preds,
const MetaInfo& info,
int iteration,
std::vector<bst_gpair>* out_gpair) = 0;
virtual void GetGradient(HostDeviceVector<bst_float>* preds, virtual void GetGradient(HostDeviceVector<bst_float>* preds,
const MetaInfo& info, const MetaInfo& info,
int iteration, int iteration,
HostDeviceVector<bst_gpair>* out_gpair); HostDeviceVector<bst_gpair>* out_gpair) = 0;
/*! \return the default evaluation metric for the objective */ /*! \return the default evaluation metric for the objective */
virtual const char* DefaultEvalMetric() const = 0; virtual const char* DefaultEvalMetric() const = 0;
@ -60,17 +56,13 @@ class ObjFunction {
* \brief transform prediction values, this is only called when Prediction is called * \brief transform prediction values, this is only called when Prediction is called
* \param io_preds prediction values, saves to this vector as well * \param io_preds prediction values, saves to this vector as well
*/ */
virtual void PredTransform(std::vector<bst_float> *io_preds) {} virtual void PredTransform(HostDeviceVector<bst_float> *io_preds) {}
virtual void PredTransform(HostDeviceVector<bst_float> *io_preds);
/*! /*!
* \brief transform prediction values, this is only called when Eval is called, * \brief transform prediction values, this is only called when Eval is called,
* usually it redirect to PredTransform * usually it redirect to PredTransform
* \param io_preds prediction values, saves to this vector as well * \param io_preds prediction values, saves to this vector as well
*/ */
virtual void EvalTransform(std::vector<bst_float> *io_preds) {
this->PredTransform(io_preds);
}
virtual void EvalTransform(HostDeviceVector<bst_float> *io_preds) { virtual void EvalTransform(HostDeviceVector<bst_float> *io_preds) {
this->PredTransform(io_preds); this->PredTransform(io_preds);
} }

View File

@ -63,22 +63,6 @@ class Predictor {
* limit trees. * limit trees.
*/ */
virtual void PredictBatch(DMatrix* dmat, std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model, int tree_begin,
unsigned ntree_limit = 0) = 0;
/**
* \brief Generate batch predictions for a given feature matrix. May use
* cached predictions if available instead of calculating from scratch.
*
* \param [in,out] dmat Feature matrix.
* \param [in,out] out_preds The output preds.
* \param model The model to predict from.
* \param tree_begin The tree begin index.
* \param ntree_limit (Optional) The ntree limit. 0 means do not
* limit trees.
*/
virtual void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds, virtual void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, int tree_begin, const gbm::GBTreeModel& model, int tree_begin,
unsigned ntree_limit = 0) = 0; unsigned ntree_limit = 0) = 0;
@ -186,41 +170,14 @@ class Predictor {
static Predictor* Create(std::string name); static Predictor* Create(std::string name);
protected: protected:
/**
* \fn bool PredictFromCache(DMatrix* dmat, std::vector<bst_float>*
* out_preds, const gbm::GBTreeModel& model, unsigned ntree_limit = 0)
*
* \brief Attempt to predict from cache.
*
* \return True if it succeeds, false if it fails.
*/
bool PredictFromCache(DMatrix* dmat, std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model,
unsigned ntree_limit = 0);
/**
* \fn void Predictor::InitOutPredictions(const MetaInfo& info,
* std::vector<bst_float>* out_preds, const gbm::GBTreeModel& model) const;
*
* \brief Init out predictions according to base margin.
*
* \param info Dmatrix info possibly containing base margin.
* \param [in,out] out_preds The out preds.
* \param model The model.
*/
void InitOutPredictions(const MetaInfo& info,
std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model) const;
/** /**
* \struct PredictionCacheEntry * \struct PredictionCacheEntry
* *
* \brief Contains pointer to input matrix and associated cached predictions. * \brief Contains pointer to input matrix and associated cached predictions.
*/ */
struct PredictionCacheEntry { struct PredictionCacheEntry {
std::shared_ptr<DMatrix> data; std::shared_ptr<DMatrix> data;
std::vector<bst_float> predictions; HostDeviceVector<bst_float> predictions;
}; };
/** /**

View File

@ -40,12 +40,9 @@ class TreeUpdater {
* but maybe different random seeds, usually one tree is passed in at a time, * but maybe different random seeds, usually one tree is passed in at a time,
* there can be multiple trees when we train random forest style model * there can be multiple trees when we train random forest style model
*/ */
virtual void Update(const std::vector<bst_gpair>& gpair,
DMatrix* data,
const std::vector<RegTree*>& trees) = 0;
virtual void Update(HostDeviceVector<bst_gpair>* gpair, virtual void Update(HostDeviceVector<bst_gpair>* gpair,
DMatrix* data, DMatrix* data,
const std::vector<RegTree*>& trees); const std::vector<RegTree*>& trees) = 0;
/*! /*!
* \brief determines whether updater has enough knowledge about a given dataset * \brief determines whether updater has enough knowledge about a given dataset
@ -58,11 +55,9 @@ class TreeUpdater {
* updated by the time this function returns. * updated by the time this function returns.
*/ */
virtual bool UpdatePredictionCache(const DMatrix* data, virtual bool UpdatePredictionCache(const DMatrix* data,
std::vector<bst_float>* out_preds) { HostDeviceVector<bst_float>* out_preds) {
return false; return false;
} }
virtual bool UpdatePredictionCache(const DMatrix* data,
HostDeviceVector<bst_float>* out_preds);
/*! /*!
* \brief Create a tree updater given name * \brief Create a tree updater given name

View File

@ -33,30 +33,32 @@ class MyLogistic : public ObjFunction {
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override { void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args); param_.InitAllowUnknown(args);
} }
void GetGradient(const std::vector<bst_float> &preds, void GetGradient(HostDeviceVector<bst_float> *preds,
const MetaInfo &info, const MetaInfo &info,
int iter, int iter,
std::vector<bst_gpair> *out_gpair) override { HostDeviceVector<bst_gpair> *out_gpair) override {
out_gpair->resize(preds.size()); out_gpair->resize(preds->size());
for (size_t i = 0; i < preds.size(); ++i) { std::vector<bst_float>& preds_h = preds->data_h();
std::vector<bst_gpair>& out_gpair_h = out_gpair->data_h();
for (size_t i = 0; i < preds_h.size(); ++i) {
bst_float w = info.GetWeight(i); bst_float w = info.GetWeight(i);
// scale the negative examples! // scale the negative examples!
if (info.labels[i] == 0.0f) w *= param_.scale_neg_weight; if (info.labels[i] == 0.0f) w *= param_.scale_neg_weight;
// logistic transformation // logistic transformation
bst_float p = 1.0f / (1.0f + std::exp(-preds[i])); bst_float p = 1.0f / (1.0f + std::exp(-preds_h[i]));
// this is the gradient // this is the gradient
bst_float grad = (p - info.labels[i]) * w; bst_float grad = (p - info.labels[i]) * w;
// this is the second order gradient // this is the second order gradient
bst_float hess = p * (1.0f - p) * w; bst_float hess = p * (1.0f - p) * w;
out_gpair->at(i) = bst_gpair(grad, hess); out_gpair_h.at(i) = bst_gpair(grad, hess);
} }
} }
const char* DefaultEvalMetric() const override { const char* DefaultEvalMetric() const override {
return "error"; return "error";
} }
void PredTransform(std::vector<bst_float> *io_preds) override { void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
// transform margin value to probability. // transform margin value to probability.
std::vector<bst_float> &preds = *io_preds; std::vector<bst_float> &preds = io_preds->data_h();
for (size_t i = 0; i < preds.size(); ++i) { for (size_t i = 0; i < preds.size(); ++i) {
preds[i] = 1.0f / (1.0f + std::exp(-preds[i])); preds[i] = 1.0f / (1.0f + std::exp(-preds[i]));
} }

View File

@ -191,9 +191,9 @@ struct XGBAPIThreadLocalEntry {
/*! \brief result holder for returning string pointers */ /*! \brief result holder for returning string pointers */
std::vector<const char *> ret_vec_charp; std::vector<const char *> ret_vec_charp;
/*! \brief returning float vector. */ /*! \brief returning float vector. */
std::vector<bst_float> ret_vec_float; HostDeviceVector<bst_float> ret_vec_float;
/*! \brief temp variable of gradient pairs. */ /*! \brief temp variable of gradient pairs. */
std::vector<bst_gpair> tmp_gpair; HostDeviceVector<bst_gpair> tmp_gpair;
}; };
// define the threadlocal store. // define the threadlocal store.
@ -705,14 +705,15 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
bst_float *grad, bst_float *grad,
bst_float *hess, bst_float *hess,
xgboost::bst_ulong len) { xgboost::bst_ulong len) {
std::vector<bst_gpair>& tmp_gpair = XGBAPIThreadLocalStore::Get()->tmp_gpair; HostDeviceVector<bst_gpair>& tmp_gpair = XGBAPIThreadLocalStore::Get()->tmp_gpair;
API_BEGIN(); API_BEGIN();
Booster* bst = static_cast<Booster*>(handle); Booster* bst = static_cast<Booster*>(handle);
std::shared_ptr<DMatrix>* dtr = std::shared_ptr<DMatrix>* dtr =
static_cast<std::shared_ptr<DMatrix>*>(dtrain); static_cast<std::shared_ptr<DMatrix>*>(dtrain);
tmp_gpair.resize(len); tmp_gpair.resize(len);
std::vector<bst_gpair>& tmp_gpair_h = tmp_gpair.data_h();
for (xgboost::bst_ulong i = 0; i < len; ++i) { for (xgboost::bst_ulong i = 0; i < len; ++i) {
tmp_gpair[i] = bst_gpair(grad[i], hess[i]); tmp_gpair_h[i] = bst_gpair(grad[i], hess[i]);
} }
bst->LazyInit(); bst->LazyInit();
@ -749,7 +750,8 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
unsigned ntree_limit, unsigned ntree_limit,
xgboost::bst_ulong *len, xgboost::bst_ulong *len,
const bst_float **out_result) { const bst_float **out_result) {
std::vector<bst_float>& preds = XGBAPIThreadLocalStore::Get()->ret_vec_float; HostDeviceVector<bst_float>& preds =
XGBAPIThreadLocalStore::Get()->ret_vec_float;
API_BEGIN(); API_BEGIN();
Booster *bst = static_cast<Booster*>(handle); Booster *bst = static_cast<Booster*>(handle);
bst->LazyInit(); bst->LazyInit();
@ -761,7 +763,7 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
(option_mask & 4) != 0, (option_mask & 4) != 0,
(option_mask & 8) != 0, (option_mask & 8) != 0,
(option_mask & 16) != 0); (option_mask & 16) != 0);
*out_result = dmlc::BeginPtr(preds); *out_result = dmlc::BeginPtr(preds.data_h());
*len = static_cast<xgboost::bst_ulong>(preds.size()); *len = static_cast<xgboost::bst_ulong>(preds.size());
API_END(); API_END();
} }

View File

@ -324,7 +324,7 @@ void CLIPredict(const CLIParam& param) {
if (param.silent == 0) { if (param.silent == 0) {
LOG(CONSOLE) << "start prediction..."; LOG(CONSOLE) << "start prediction...";
} }
std::vector<bst_float> preds; HostDeviceVector<bst_float> preds;
learner->Predict(dtest.get(), param.pred_margin, &preds, param.ntree_limit); learner->Predict(dtest.get(), param.pred_margin, &preds, param.ntree_limit);
if (param.silent == 0) { if (param.silent == 0) {
LOG(CONSOLE) << "writing prediction to " << param.name_pred; LOG(CONSOLE) << "writing prediction to " << param.name_pred;
@ -332,7 +332,7 @@ void CLIPredict(const CLIParam& param) {
std::unique_ptr<dmlc::Stream> fo( std::unique_ptr<dmlc::Stream> fo(
dmlc::Stream::Create(param.name_pred.c_str(), "w")); dmlc::Stream::Create(param.name_pred.c_str(), "w"));
dmlc::ostream os(fo.get()); dmlc::ostream os(fo.get());
for (bst_float p : preds) { for (bst_float p : preds.data_h()) {
os << p << '\n'; os << p << '\n';
} }
// force flush before fo destruct. // force flush before fo destruct.

View File

@ -12,13 +12,27 @@ namespace xgboost {
template <typename T> template <typename T>
struct HostDeviceVectorImpl { struct HostDeviceVectorImpl {
explicit HostDeviceVectorImpl(size_t size) : data_h_(size) {} explicit HostDeviceVectorImpl(size_t size, T v) : data_h_(size, v) {}
explicit HostDeviceVectorImpl(std::initializer_list<T> init) : data_h_(init) {}
explicit HostDeviceVectorImpl(const std::vector<T>& init) : data_h_(init) {}
std::vector<T> data_h_; std::vector<T> data_h_;
}; };
template <typename T> template <typename T>
HostDeviceVector<T>::HostDeviceVector(size_t size, int device) : impl_(nullptr) { HostDeviceVector<T>::HostDeviceVector(size_t size, T v, int device) : impl_(nullptr) {
impl_ = new HostDeviceVectorImpl<T>(size); impl_ = new HostDeviceVectorImpl<T>(size, v);
}
template <typename T>
HostDeviceVector<T>::HostDeviceVector(std::initializer_list<T> init, int device)
: impl_(nullptr) {
impl_ = new HostDeviceVectorImpl<T>(init);
}
template <typename T>
HostDeviceVector<T>::HostDeviceVector(const std::vector<T>& init, int device)
: impl_(nullptr) {
impl_ = new HostDeviceVectorImpl<T>(init);
} }
template <typename T> template <typename T>
@ -41,8 +55,8 @@ template <typename T>
std::vector<T>& HostDeviceVector<T>::data_h() { return impl_->data_h_; } std::vector<T>& HostDeviceVector<T>::data_h() { return impl_->data_h_; }
template <typename T> template <typename T>
void HostDeviceVector<T>::resize(size_t new_size, int new_device) { void HostDeviceVector<T>::resize(size_t new_size, T v, int new_device) {
impl_->data_h_.resize(new_size); impl_->data_h_.resize(new_size, v);
} }
// explicit instantiations are required, as HostDeviceVector isn't header-only // explicit instantiations are required, as HostDeviceVector isn't header-only

View File

@ -1,6 +1,7 @@
/*! /*!
* Copyright 2017 XGBoost contributors * Copyright 2017 XGBoost contributors
*/ */
#include "./host_device_vector.h" #include "./host_device_vector.h"
#include "./device_helpers.cuh" #include "./device_helpers.cuh"
@ -8,13 +9,25 @@ namespace xgboost {
template <typename T> template <typename T>
struct HostDeviceVectorImpl { struct HostDeviceVectorImpl {
HostDeviceVectorImpl(size_t size, int device) HostDeviceVectorImpl(size_t size, T v, int device)
: device_(device), on_d_(device >= 0) { : device_(device), on_d_(device >= 0) {
if (on_d_) { if (on_d_) {
dh::safe_cuda(cudaSetDevice(device_)); dh::safe_cuda(cudaSetDevice(device_));
data_d_.resize(size); data_d_.resize(size, v);
} else { } else {
data_h_.resize(size); data_h_.resize(size, v);
}
}
// Init can be std::vector<T> or std::initializer_list<T>
template <class Init>
HostDeviceVectorImpl(const Init& init, int device)
: device_(device), on_d_(device >= 0) {
if (on_d_) {
dh::safe_cuda(cudaSetDevice(device_));
data_d_.resize(init.size());
thrust::copy(init.begin(), init.end(), data_d_.begin());
} else {
data_h_ = init;
} }
} }
HostDeviceVectorImpl(const HostDeviceVectorImpl<T>&) = delete; HostDeviceVectorImpl(const HostDeviceVectorImpl<T>&) = delete;
@ -41,17 +54,18 @@ struct HostDeviceVectorImpl {
lazy_sync_host(); lazy_sync_host();
return data_h_; return data_h_;
} }
void resize(size_t new_size, int new_device) { void resize(size_t new_size, T v, int new_device) {
if (new_size == this->size() && new_device == device_) if (new_size == this->size() && new_device == device_)
return; return;
device_ = new_device; if (new_device != -1)
device_ = new_device;
// if !on_d_, but the data size is 0 and the device is set, // if !on_d_, but the data size is 0 and the device is set,
// resize the data on device instead // resize the data on device instead
if (!on_d_ && (data_h_.size() > 0 || device_ == -1)) { if (!on_d_ && (data_h_.size() > 0 || device_ == -1)) {
data_h_.resize(new_size); data_h_.resize(new_size, v);
} else { } else {
dh::safe_cuda(cudaSetDevice(device_)); dh::safe_cuda(cudaSetDevice(device_));
data_d_.resize(new_size); data_d_.resize(new_size, v);
on_d_ = true; on_d_ = true;
} }
} }
@ -90,8 +104,20 @@ struct HostDeviceVectorImpl {
}; };
template <typename T> template <typename T>
HostDeviceVector<T>::HostDeviceVector(size_t size, int device) : impl_(nullptr) { HostDeviceVector<T>::HostDeviceVector(size_t size, T v, int device) : impl_(nullptr) {
impl_ = new HostDeviceVectorImpl<T>(size, device); impl_ = new HostDeviceVectorImpl<T>(size, v, device);
}
template <typename T>
HostDeviceVector<T>::HostDeviceVector(std::initializer_list<T> init, int device)
: impl_(nullptr) {
impl_ = new HostDeviceVectorImpl<T>(init, device);
}
template <typename T>
HostDeviceVector<T>::HostDeviceVector(const std::vector<T>& init, int device)
: impl_(nullptr) {
impl_ = new HostDeviceVectorImpl<T>(init, device);
} }
template <typename T> template <typename T>
@ -124,8 +150,8 @@ template <typename T>
std::vector<T>& HostDeviceVector<T>::data_h() { return impl_->data_h(); } std::vector<T>& HostDeviceVector<T>::data_h() { return impl_->data_h(); }
template <typename T> template <typename T>
void HostDeviceVector<T>::resize(size_t new_size, int new_device) { void HostDeviceVector<T>::resize(size_t new_size, T v, int new_device) {
impl_->resize(new_size, new_device); impl_->resize(new_size, v, new_device);
} }
// explicit instantiations are required, as HostDeviceVector isn't header-only // explicit instantiations are required, as HostDeviceVector isn't header-only

View File

@ -5,6 +5,7 @@
#define XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_ #define XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_
#include <cstdlib> #include <cstdlib>
#include <initializer_list>
#include <vector> #include <vector>
// only include thrust-related files if host_device_vector.h // only include thrust-related files if host_device_vector.h
@ -61,7 +62,9 @@ template <typename T> struct HostDeviceVectorImpl;
template <typename T> template <typename T>
class HostDeviceVector { class HostDeviceVector {
public: public:
explicit HostDeviceVector(size_t size = 0, int device = -1); explicit HostDeviceVector(size_t size = 0, T v = T(), int device = -1);
HostDeviceVector(std::initializer_list<T> init, int device = -1);
explicit HostDeviceVector(const std::vector<T>& init, int device = -1);
~HostDeviceVector(); ~HostDeviceVector();
HostDeviceVector(const HostDeviceVector<T>&) = delete; HostDeviceVector(const HostDeviceVector<T>&) = delete;
HostDeviceVector(HostDeviceVector<T>&&) = delete; HostDeviceVector(HostDeviceVector<T>&&) = delete;
@ -70,6 +73,7 @@ class HostDeviceVector {
size_t size() const; size_t size() const;
int device() const; int device() const;
T* ptr_d(int device); T* ptr_d(int device);
T* ptr_h() { return data_h().data(); }
// only define functions returning device_ptr // only define functions returning device_ptr
// if HostDeviceVector.h is included from a .cu file // if HostDeviceVector.h is included from a .cu file
@ -79,17 +83,9 @@ class HostDeviceVector {
#endif #endif
std::vector<T>& data_h(); std::vector<T>& data_h();
void resize(size_t new_size, int new_device);
// helper functions in case a function needs to be templated // passing in new_device == -1 keeps the device as is
// to work for both HostDeviceVector and std::vector void resize(size_t new_size, T v = T(), int new_device = -1);
static std::vector<T>& data_h(HostDeviceVector<T>* v) {
return v->data_h();
}
static std::vector<T>& data_h(std::vector<T>* v) {
return *v;
}
private: private:
HostDeviceVectorImpl<T>* impl_; HostDeviceVectorImpl<T>* impl_;

View File

@ -76,8 +76,10 @@ class GBLinear : public GradientBooster {
void Save(dmlc::Stream* fo) const override { void Save(dmlc::Stream* fo) const override {
model.Save(fo); model.Save(fo);
} }
void DoBoost(DMatrix *p_fmat, std::vector<bst_gpair> *in_gpair,
ObjFunction *obj) override { void DoBoost(DMatrix *p_fmat,
HostDeviceVector<bst_gpair> *in_gpair,
ObjFunction* obj) override {
monitor.Start("DoBoost"); monitor.Start("DoBoost");
if (!p_fmat->HaveColAccess(false)) { if (!p_fmat->HaveColAccess(false)) {
@ -91,14 +93,15 @@ class GBLinear : public GradientBooster {
this->LazySumWeights(p_fmat); this->LazySumWeights(p_fmat);
if (!this->CheckConvergence()) { if (!this->CheckConvergence()) {
updater->Update(in_gpair, p_fmat, &model, sum_instance_weight); updater->Update(&in_gpair->data_h(), p_fmat, &model, sum_instance_weight);
} }
this->UpdatePredictionCache(); this->UpdatePredictionCache();
monitor.Stop("DoBoost"); monitor.Stop("DoBoost");
} }
void PredictBatch(DMatrix *p_fmat, std::vector<bst_float> *out_preds, void PredictBatch(DMatrix *p_fmat,
HostDeviceVector<bst_float> *out_preds,
unsigned ntree_limit) override { unsigned ntree_limit) override {
monitor.Start("PredictBatch"); monitor.Start("PredictBatch");
CHECK_EQ(ntree_limit, 0U) CHECK_EQ(ntree_limit, 0U)
@ -109,9 +112,9 @@ class GBLinear : public GradientBooster {
if (it != cache_.end() && it->second.predictions.size() != 0) { if (it != cache_.end() && it->second.predictions.size() != 0) {
std::vector<bst_float> &y = it->second.predictions; std::vector<bst_float> &y = it->second.predictions;
out_preds->resize(y.size()); out_preds->resize(y.size());
std::copy(y.begin(), y.end(), out_preds->begin()); std::copy(y.begin(), y.end(), out_preds->data_h().begin());
} else { } else {
this->PredictBatchInternal(p_fmat, out_preds); this->PredictBatchInternal(p_fmat, &out_preds->data_h());
} }
monitor.Stop("PredictBatch"); monitor.Stop("PredictBatch");
} }

View File

@ -22,18 +22,6 @@ GradientBooster* GradientBooster::Create(
return (e->body)(cache_mats, base_margin); return (e->body)(cache_mats, base_margin);
} }
void GradientBooster::DoBoost(DMatrix* p_fmat,
HostDeviceVector<bst_gpair>* in_gpair,
ObjFunction* obj) {
DoBoost(p_fmat, &in_gpair->data_h(), obj);
}
void GradientBooster::PredictBatch(DMatrix* dmat,
HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit) {
PredictBatch(dmat, &out_preds->data_h(), ntree_limit);
}
} // namespace xgboost } // namespace xgboost
namespace xgboost { namespace xgboost {

View File

@ -180,22 +180,39 @@ class GBTree : public GradientBooster {
tparam.updater_seq.find("distcol") != std::string::npos; tparam.updater_seq.find("distcol") != std::string::npos;
} }
void DoBoost(DMatrix* p_fmat,
std::vector<bst_gpair>* in_gpair,
ObjFunction* obj) override {
DoBoostHelper(p_fmat, in_gpair, obj);
}
void DoBoost(DMatrix* p_fmat, void DoBoost(DMatrix* p_fmat,
HostDeviceVector<bst_gpair>* in_gpair, HostDeviceVector<bst_gpair>* in_gpair,
ObjFunction* obj) override { ObjFunction* obj) override {
DoBoostHelper(p_fmat, in_gpair, obj); std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
} const int ngroup = model_.param.num_output_group;
monitor.Start("BoostNewTrees");
void PredictBatch(DMatrix* p_fmat, if (ngroup == 1) {
std::vector<bst_float>* out_preds, std::vector<std::unique_ptr<RegTree> > ret;
unsigned ntree_limit) override { BoostNewTrees(in_gpair, p_fmat, 0, &ret);
predictor->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit); new_trees.push_back(std::move(ret));
} else {
CHECK_EQ(in_gpair->size() % ngroup, 0U)
<< "must have exactly ngroup*nrow gpairs";
// TODO(canonizer): perform this on GPU if HostDeviceVector has device set.
HostDeviceVector<bst_gpair> tmp(in_gpair->size() / ngroup,
bst_gpair(), in_gpair->device());
std::vector<bst_gpair>& gpair_h = in_gpair->data_h();
bst_omp_uint nsize = static_cast<bst_omp_uint>(tmp.size());
for (int gid = 0; gid < ngroup; ++gid) {
std::vector<bst_gpair>& tmp_h = tmp.data_h();
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) {
tmp_h[i] = gpair_h[i * ngroup + gid];
}
std::vector<std::unique_ptr<RegTree> > ret;
BoostNewTrees(&tmp, p_fmat, gid, &ret);
new_trees.push_back(std::move(ret));
}
}
monitor.Stop("BoostNewTrees");
monitor.Start("CommitModel");
this->CommitModel(std::move(new_trees));
monitor.Stop("CommitModel");
} }
void PredictBatch(DMatrix* p_fmat, void PredictBatch(DMatrix* p_fmat,
@ -251,48 +268,11 @@ class GBTree : public GradientBooster {
} }
} }
// TVec is either std::vector<bst_gpair> or HostDeviceVector<bst_gpair>
template <typename TVec>
void DoBoostHelper(DMatrix* p_fmat,
TVec* in_gpair,
ObjFunction* obj) {
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
const int ngroup = model_.param.num_output_group;
monitor.Start("BoostNewTrees");
if (ngroup == 1) {
std::vector<std::unique_ptr<RegTree> > ret;
BoostNewTrees(in_gpair, p_fmat, 0, &ret);
new_trees.push_back(std::move(ret));
} else {
CHECK_EQ(in_gpair->size() % ngroup, 0U)
<< "must have exactly ngroup*nrow gpairs";
std::vector<bst_gpair> tmp(in_gpair->size() / ngroup);
auto& gpair_h = HostDeviceVector<bst_gpair>::data_h(in_gpair);
for (int gid = 0; gid < ngroup; ++gid) {
bst_omp_uint nsize = static_cast<bst_omp_uint>(tmp.size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) {
tmp[i] = gpair_h[i * ngroup + gid];
}
std::vector<std::unique_ptr<RegTree> > ret;
BoostNewTrees(&tmp, p_fmat, gid, &ret);
new_trees.push_back(std::move(ret));
}
}
monitor.Stop("BoostNewTrees");
monitor.Start("CommitModel");
this->CommitModel(std::move(new_trees));
monitor.Stop("CommitModel");
}
// do group specific group // do group specific group
// TVec is either const std::vector<bst_gpair> or HostDeviceVector<bst_gpair> inline void BoostNewTrees(HostDeviceVector<bst_gpair>* gpair,
template <typename TVec> DMatrix *p_fmat,
inline void int bst_group,
BoostNewTrees(TVec* gpair, std::vector<std::unique_ptr<RegTree> >* ret) {
DMatrix *p_fmat,
int bst_group,
std::vector<std::unique_ptr<RegTree> >* ret) {
this->InitUpdater(); this->InitUpdater();
std::vector<RegTree*> new_trees; std::vector<RegTree*> new_trees;
ret->clear(); ret->clear();
@ -315,23 +295,8 @@ class GBTree : public GradientBooster {
} }
} }
// update the trees // update the trees
for (auto& up : updaters) { for (auto& up : updaters)
UpdateHelper(up.get(), gpair, p_fmat, new_trees); up->Update(gpair, p_fmat, new_trees);
}
}
void UpdateHelper(TreeUpdater* updater,
std::vector<bst_gpair>* gpair,
DMatrix *p_fmat,
const std::vector<RegTree*>& new_trees) {
updater->Update(*gpair, p_fmat, new_trees);
}
void UpdateHelper(TreeUpdater* updater,
HostDeviceVector<bst_gpair>* gpair,
DMatrix *p_fmat,
const std::vector<RegTree*>& new_trees) {
updater->Update(gpair, p_fmat, new_trees);
} }
// commit new trees all at once // commit new trees all at once
@ -389,10 +354,10 @@ class Dart : public GBTree {
// predict the leaf scores with dropout if ntree_limit = 0 // predict the leaf scores with dropout if ntree_limit = 0
void PredictBatch(DMatrix* p_fmat, void PredictBatch(DMatrix* p_fmat,
std::vector<bst_float>* out_preds, HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit) override { unsigned ntree_limit) override {
DropTrees(ntree_limit); DropTrees(ntree_limit);
PredLoopInternal<Dart>(p_fmat, out_preds, 0, ntree_limit, true); PredLoopInternal<Dart>(p_fmat, &out_preds->data_h(), 0, ntree_limit, true);
} }
void PredictInstance(const SparseBatch::Inst& inst, void PredictInstance(const SparseBatch::Inst& inst,

View File

@ -362,17 +362,17 @@ class LearnerImpl : public Learner {
} }
this->LazyInitDMatrix(train); this->LazyInitDMatrix(train);
monitor.Start("PredictRaw"); monitor.Start("PredictRaw");
this->PredictRaw(train, &preds2_); this->PredictRaw(train, &preds_);
monitor.Stop("PredictRaw"); monitor.Stop("PredictRaw");
monitor.Start("GetGradient"); monitor.Start("GetGradient");
obj_->GetGradient(&preds2_, train->info(), iter, &gpair_); obj_->GetGradient(&preds_, train->info(), iter, &gpair_);
monitor.Stop("GetGradient"); monitor.Stop("GetGradient");
gbm_->DoBoost(train, &gpair_, obj_.get()); gbm_->DoBoost(train, &gpair_, obj_.get());
monitor.Stop("UpdateOneIter"); monitor.Stop("UpdateOneIter");
} }
void BoostOneIter(int iter, DMatrix* train, void BoostOneIter(int iter, DMatrix* train,
std::vector<bst_gpair>* in_gpair) override { HostDeviceVector<bst_gpair>* in_gpair) override {
monitor.Start("BoostOneIter"); monitor.Start("BoostOneIter");
if (tparam.seed_per_iteration || rabit::IsDistributed()) { if (tparam.seed_per_iteration || rabit::IsDistributed()) {
common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter); common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter);
@ -395,7 +395,7 @@ class LearnerImpl : public Learner {
obj_->EvalTransform(&preds_); obj_->EvalTransform(&preds_);
for (auto& ev : metrics_) { for (auto& ev : metrics_) {
os << '\t' << data_names[i] << '-' << ev->Name() << ':' os << '\t' << data_names[i] << '-' << ev->Name() << ':'
<< ev->Eval(preds_, data_sets[i]->info(), tparam.dsplit == 2); << ev->Eval(preds_.data_h(), data_sets[i]->info(), tparam.dsplit == 2);
} }
} }
@ -438,19 +438,20 @@ class LearnerImpl : public Learner {
this->PredictRaw(data, &preds_); this->PredictRaw(data, &preds_);
obj_->EvalTransform(&preds_); obj_->EvalTransform(&preds_);
return std::make_pair(metric, return std::make_pair(metric,
ev->Eval(preds_, data->info(), tparam.dsplit == 2)); ev->Eval(preds_.data_h(), data->info(), tparam.dsplit == 2));
} }
void Predict(DMatrix* data, bool output_margin, void Predict(DMatrix* data, bool output_margin,
std::vector<bst_float>* out_preds, unsigned ntree_limit, HostDeviceVector<bst_float>* out_preds, unsigned ntree_limit,
bool pred_leaf, bool pred_contribs, bool approx_contribs, bool pred_leaf, bool pred_contribs, bool approx_contribs,
bool pred_interactions) const override { bool pred_interactions) const override {
if (pred_contribs) { if (pred_contribs) {
gbm_->PredictContribution(data, out_preds, ntree_limit, approx_contribs); gbm_->PredictContribution(data, &out_preds->data_h(), ntree_limit, approx_contribs);
} else if (pred_interactions) { } else if (pred_interactions) {
gbm_->PredictInteractionContributions(data, out_preds, ntree_limit, approx_contribs); gbm_->PredictInteractionContributions(data, &out_preds->data_h(), ntree_limit,
approx_contribs);
} else if (pred_leaf) { } else if (pred_leaf) {
gbm_->PredictLeaf(data, out_preds, ntree_limit); gbm_->PredictLeaf(data, &out_preds->data_h(), ntree_limit);
} else { } else {
this->PredictRaw(data, out_preds, ntree_limit); this->PredictRaw(data, out_preds, ntree_limit);
if (!output_margin) { if (!output_margin) {
@ -546,12 +547,6 @@ class LearnerImpl : public Learner {
* \param ntree_limit limit number of trees used for boosted tree * \param ntree_limit limit number of trees used for boosted tree
* predictor, when it equals 0, this means we are using all the trees * predictor, when it equals 0, this means we are using all the trees
*/ */
inline void PredictRaw(DMatrix* data, std::vector<bst_float>* out_preds,
unsigned ntree_limit = 0) const {
CHECK(gbm_.get() != nullptr)
<< "Predict must happen after Load or InitModel";
gbm_->PredictBatch(data, out_preds, ntree_limit);
}
inline void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds, inline void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit = 0) const { unsigned ntree_limit = 0) const {
CHECK(gbm_.get() != nullptr) CHECK(gbm_.get() != nullptr)
@ -572,8 +567,7 @@ class LearnerImpl : public Learner {
// name of objective function // name of objective function
std::string name_obj_; std::string name_obj_;
// temporal storages for prediction // temporal storages for prediction
std::vector<bst_float> preds_; HostDeviceVector<bst_float> preds_;
HostDeviceVector<bst_float> preds2_;
// gradient pairs // gradient pairs
HostDeviceVector<bst_gpair> gpair_; HostDeviceVector<bst_gpair> gpair_;

View File

@ -35,16 +35,18 @@ class SoftmaxMultiClassObj : public ObjFunction {
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override { void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args); param_.InitAllowUnknown(args);
} }
void GetGradient(const std::vector<bst_float>& preds, void GetGradient(HostDeviceVector<bst_float>* preds,
const MetaInfo& info, const MetaInfo& info,
int iter, int iter,
std::vector<bst_gpair>* out_gpair) override { HostDeviceVector<bst_gpair>* out_gpair) override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
CHECK(preds.size() == (static_cast<size_t>(param_.num_class) * info.labels.size())) CHECK(preds->size() == (static_cast<size_t>(param_.num_class) * info.labels.size()))
<< "SoftmaxMultiClassObj: label size and pred size does not match"; << "SoftmaxMultiClassObj: label size and pred size does not match";
out_gpair->resize(preds.size()); std::vector<bst_float>& preds_h = preds->data_h();
out_gpair->resize(preds_h.size());
std::vector<bst_gpair>& gpair = out_gpair->data_h();
const int nclass = param_.num_class; const int nclass = param_.num_class;
const omp_ulong ndata = static_cast<omp_ulong>(preds.size() / nclass); const omp_ulong ndata = static_cast<omp_ulong>(preds_h.size() / nclass);
int label_error = 0; int label_error = 0;
#pragma omp parallel #pragma omp parallel
@ -53,7 +55,7 @@ class SoftmaxMultiClassObj : public ObjFunction {
#pragma omp for schedule(static) #pragma omp for schedule(static)
for (omp_ulong i = 0; i < ndata; ++i) { for (omp_ulong i = 0; i < ndata; ++i) {
for (int k = 0; k < nclass; ++k) { for (int k = 0; k < nclass; ++k) {
rec[k] = preds[i * nclass + k]; rec[k] = preds_h[i * nclass + k];
} }
common::Softmax(&rec); common::Softmax(&rec);
int label = static_cast<int>(info.labels[i]); int label = static_cast<int>(info.labels[i]);
@ -65,9 +67,9 @@ class SoftmaxMultiClassObj : public ObjFunction {
bst_float p = rec[k]; bst_float p = rec[k];
const bst_float h = 2.0f * p * (1.0f - p) * wt; const bst_float h = 2.0f * p * (1.0f - p) * wt;
if (label == k) { if (label == k) {
(*out_gpair)[i * nclass + k] = bst_gpair((p - 1.0f) * wt, h); gpair[i * nclass + k] = bst_gpair((p - 1.0f) * wt, h);
} else { } else {
(*out_gpair)[i * nclass + k] = bst_gpair(p* wt, h); gpair[i * nclass + k] = bst_gpair(p* wt, h);
} }
} }
} }
@ -77,10 +79,10 @@ class SoftmaxMultiClassObj : public ObjFunction {
<< " num_class=" << nclass << " num_class=" << nclass
<< " but found " << label_error << " in label."; << " but found " << label_error << " in label.";
} }
void PredTransform(std::vector<bst_float>* io_preds) override { void PredTransform(HostDeviceVector<bst_float>* io_preds) override {
this->Transform(io_preds, output_prob_); this->Transform(io_preds, output_prob_);
} }
void EvalTransform(std::vector<bst_float>* io_preds) override { void EvalTransform(HostDeviceVector<bst_float>* io_preds) override {
this->Transform(io_preds, true); this->Transform(io_preds, true);
} }
const char* DefaultEvalMetric() const override { const char* DefaultEvalMetric() const override {
@ -88,8 +90,8 @@ class SoftmaxMultiClassObj : public ObjFunction {
} }
private: private:
inline void Transform(std::vector<bst_float> *io_preds, bool prob) { inline void Transform(HostDeviceVector<bst_float> *io_preds, bool prob) {
std::vector<bst_float> &preds = *io_preds; std::vector<bst_float> &preds = io_preds->data_h();
std::vector<bst_float> tmp; std::vector<bst_float> tmp;
const int nclass = param_.num_class; const int nclass = param_.num_class;
const omp_ulong ndata = static_cast<omp_ulong>(preds.size() / nclass); const omp_ulong ndata = static_cast<omp_ulong>(preds.size() / nclass);

View File

@ -25,17 +25,6 @@ ObjFunction* ObjFunction::Create(const std::string& name) {
return (e->body)(); return (e->body)();
} }
void ObjFunction::GetGradient(HostDeviceVector<bst_float>* preds,
const MetaInfo& info,
int iteration,
HostDeviceVector<bst_gpair>* out_gpair) {
GetGradient(preds->data_h(), info, iteration, &out_gpair->data_h());
}
void ObjFunction::PredTransform(HostDeviceVector<bst_float> *io_preds) {
PredTransform(&io_preds->data_h());
}
} // namespace xgboost } // namespace xgboost
namespace xgboost { namespace xgboost {

View File

@ -37,13 +37,14 @@ class LambdaRankObj : public ObjFunction {
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override { void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args); param_.InitAllowUnknown(args);
} }
void GetGradient(const std::vector<bst_float>& preds, void GetGradient(HostDeviceVector<bst_float>* preds,
const MetaInfo& info, const MetaInfo& info,
int iter, int iter,
std::vector<bst_gpair>* out_gpair) override { HostDeviceVector<bst_gpair>* out_gpair) override {
CHECK_EQ(preds.size(), info.labels.size()) << "label size predict size not match"; CHECK_EQ(preds->size(), info.labels.size()) << "label size predict size not match";
std::vector<bst_gpair>& gpair = *out_gpair; auto& preds_h = preds->data_h();
gpair.resize(preds.size()); out_gpair->resize(preds_h.size());
std::vector<bst_gpair>& gpair = out_gpair->data_h();
// quick consistency when group is not available // quick consistency when group is not available
std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels.size()); std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels.size());
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr; const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
@ -63,7 +64,7 @@ class LambdaRankObj : public ObjFunction {
for (bst_omp_uint k = 0; k < ngroup; ++k) { for (bst_omp_uint k = 0; k < ngroup; ++k) {
lst.clear(); pairs.clear(); lst.clear(); pairs.clear();
for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) { for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
lst.push_back(ListEntry(preds[j], info.labels[j], j)); lst.push_back(ListEntry(preds_h[j], info.labels[j], j));
gpair[j] = bst_gpair(0.0f, 0.0f); gpair[j] = bst_gpair(0.0f, 0.0f);
} }
std::sort(lst.begin(), lst.end(), ListEntry::CmpPred); std::sort(lst.begin(), lst.end(), ListEntry::CmpPred);

View File

@ -38,18 +38,20 @@ class RegLossObj : public ObjFunction {
const std::vector<std::pair<std::string, std::string> > &args) override { const std::vector<std::pair<std::string, std::string> > &args) override {
param_.InitAllowUnknown(args); param_.InitAllowUnknown(args);
} }
void GetGradient(const std::vector<bst_float> &preds, const MetaInfo &info, void GetGradient(HostDeviceVector<bst_float> *preds, const MetaInfo &info,
int iter, std::vector<bst_gpair> *out_gpair) override { int iter, HostDeviceVector<bst_gpair> *out_gpair) override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels.size()) CHECK_EQ(preds->size(), info.labels.size())
<< "labels are not correctly provided" << "labels are not correctly provided"
<< "preds.size=" << preds.size() << "preds.size=" << preds->size()
<< ", label.size=" << info.labels.size(); << ", label.size=" << info.labels.size();
auto& preds_h = preds->data_h();
this->LazyCheckLabels(info.labels); this->LazyCheckLabels(info.labels);
out_gpair->resize(preds.size()); out_gpair->resize(preds_h.size());
const omp_ulong n = static_cast<omp_ulong>(preds.size()); auto& gpair = out_gpair->data_h();
auto gpair_ptr = out_gpair->data(); const omp_ulong n = static_cast<omp_ulong>(preds_h.size());
auto gpair_ptr = out_gpair->ptr_h();
avx::Float8 scale(param_.scale_pos_weight); avx::Float8 scale(param_.scale_pos_weight);
const omp_ulong remainder = n % 8; const omp_ulong remainder = n % 8;
@ -58,7 +60,7 @@ class RegLossObj : public ObjFunction {
#pragma omp parallel for schedule(static) num_threads(std::min(8, nthread)) #pragma omp parallel for schedule(static) num_threads(std::min(8, nthread))
for (omp_ulong i = 0; i < n - remainder; i += 8) { for (omp_ulong i = 0; i < n - remainder; i += 8) {
avx::Float8 y(&info.labels[i]); avx::Float8 y(&info.labels[i]);
avx::Float8 p = Loss::PredTransform(avx::Float8(&preds[i])); avx::Float8 p = Loss::PredTransform(avx::Float8(&preds_h[i]));
avx::Float8 w = info.weights.empty() ? avx::Float8(1.0f) avx::Float8 w = info.weights.empty() ? avx::Float8(1.0f)
: avx::Float8(&info.weights[i]); : avx::Float8(&info.weights[i]);
// Adjust weight // Adjust weight
@ -69,11 +71,11 @@ class RegLossObj : public ObjFunction {
} }
for (omp_ulong i = n - remainder; i < n; ++i) { for (omp_ulong i = n - remainder; i < n; ++i) {
auto y = info.labels[i]; auto y = info.labels[i];
bst_float p = Loss::PredTransform(preds[i]); bst_float p = Loss::PredTransform(preds_h[i]);
bst_float w = info.GetWeight(i); bst_float w = info.GetWeight(i);
w += y * ((param_.scale_pos_weight * w) - w); w += y * ((param_.scale_pos_weight * w) - w);
(*out_gpair)[i] = bst_gpair(Loss::FirstOrderGradient(p, y) * w, gpair[i] = bst_gpair(Loss::FirstOrderGradient(p, y) * w,
Loss::SecondOrderGradient(p, y) * w); Loss::SecondOrderGradient(p, y) * w);
} }
// Reset omp max threads // Reset omp max threads
@ -82,8 +84,8 @@ class RegLossObj : public ObjFunction {
const char *DefaultEvalMetric() const override { const char *DefaultEvalMetric() const override {
return Loss::DefaultEvalMetric(); return Loss::DefaultEvalMetric();
} }
void PredTransform(std::vector<bst_float> *io_preds) override { void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
std::vector<bst_float> &preds = *io_preds; std::vector<bst_float> &preds = io_preds->data_h();
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size()); const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) { for (bst_omp_uint j = 0; j < ndata; ++j) {
@ -143,40 +145,42 @@ class PoissonRegression : public ObjFunction {
param_.InitAllowUnknown(args); param_.InitAllowUnknown(args);
} }
void GetGradient(const std::vector<bst_float> &preds, void GetGradient(HostDeviceVector<bst_float> *preds,
const MetaInfo &info, const MetaInfo &info,
int iter, int iter,
std::vector<bst_gpair> *out_gpair) override { HostDeviceVector<bst_gpair> *out_gpair) override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels.size()) << "labels are not correctly provided"; CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided";
out_gpair->resize(preds.size()); auto& preds_h = preds->data_h();
out_gpair->resize(preds->size());
auto& gpair = out_gpair->data_h();
// check if label in range // check if label in range
bool label_correct = true; bool label_correct = true;
// start calculating gradient // start calculating gradient
const omp_ulong ndata = static_cast<omp_ulong>(preds.size()); // NOLINT(*) const omp_ulong ndata = static_cast<omp_ulong>(preds_h.size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*) for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
bst_float p = preds[i]; bst_float p = preds_h[i];
bst_float w = info.GetWeight(i); bst_float w = info.GetWeight(i);
bst_float y = info.labels[i]; bst_float y = info.labels[i];
if (y >= 0.0f) { if (y >= 0.0f) {
(*out_gpair)[i] = bst_gpair((std::exp(p) - y) * w, gpair[i] = bst_gpair((std::exp(p) - y) * w,
std::exp(p + param_.max_delta_step) * w); std::exp(p + param_.max_delta_step) * w);
} else { } else {
label_correct = false; label_correct = false;
} }
} }
CHECK(label_correct) << "PoissonRegression: label must be nonnegative"; CHECK(label_correct) << "PoissonRegression: label must be nonnegative";
} }
void PredTransform(std::vector<bst_float> *io_preds) override { void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
std::vector<bst_float> &preds = *io_preds; std::vector<bst_float> &preds = io_preds->data_h();
const long ndata = static_cast<long>(preds.size()); // NOLINT(*) const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) { // NOLINT(*) for (long j = 0; j < ndata; ++j) { // NOLINT(*)
preds[j] = std::exp(preds[j]); preds[j] = std::exp(preds[j]);
} }
} }
void EvalTransform(std::vector<bst_float> *io_preds) override { void EvalTransform(HostDeviceVector<bst_float> *io_preds) override {
PredTransform(io_preds); PredTransform(io_preds);
} }
bst_float ProbToMargin(bst_float base_score) const override { bst_float ProbToMargin(bst_float base_score) const override {
@ -202,21 +206,23 @@ class CoxRegression : public ObjFunction {
public: public:
// declare functions // declare functions
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {} void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {}
void GetGradient(const std::vector<bst_float> &preds, void GetGradient(HostDeviceVector<bst_float> *preds,
const MetaInfo &info, const MetaInfo &info,
int iter, int iter,
std::vector<bst_gpair> *out_gpair) override { HostDeviceVector<bst_gpair> *out_gpair) override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels.size()) << "labels are not correctly provided"; CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided";
out_gpair->resize(preds.size()); auto& preds_h = preds->data_h();
out_gpair->resize(preds_h.size());
auto& gpair = out_gpair->data_h();
const std::vector<size_t> &label_order = info.LabelAbsSort(); const std::vector<size_t> &label_order = info.LabelAbsSort();
const omp_ulong ndata = static_cast<omp_ulong>(preds.size()); // NOLINT(*) const omp_ulong ndata = static_cast<omp_ulong>(preds_h.size()); // NOLINT(*)
// pre-compute a sum // pre-compute a sum
double exp_p_sum = 0; // we use double because we might need the precision with large datasets double exp_p_sum = 0; // we use double because we might need the precision with large datasets
for (omp_ulong i = 0; i < ndata; ++i) { for (omp_ulong i = 0; i < ndata; ++i) {
exp_p_sum += std::exp(preds[label_order[i]]); exp_p_sum += std::exp(preds_h[label_order[i]]);
} }
// start calculating grad and hess // start calculating grad and hess
@ -227,7 +233,7 @@ class CoxRegression : public ObjFunction {
double accumulated_sum = 0; double accumulated_sum = 0;
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*) for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
const size_t ind = label_order[i]; const size_t ind = label_order[i];
const double p = preds[ind]; const double p = preds_h[ind];
const double exp_p = std::exp(p); const double exp_p = std::exp(p);
const double w = info.GetWeight(ind); const double w = info.GetWeight(ind);
const double y = info.labels[ind]; const double y = info.labels[ind];
@ -251,21 +257,21 @@ class CoxRegression : public ObjFunction {
const double grad = exp_p*r_k - static_cast<bst_float>(y > 0); const double grad = exp_p*r_k - static_cast<bst_float>(y > 0);
const double hess = exp_p*r_k - exp_p*exp_p * s_k; const double hess = exp_p*r_k - exp_p*exp_p * s_k;
out_gpair->at(ind) = bst_gpair(grad * w, hess * w); gpair.at(ind) = bst_gpair(grad * w, hess * w);
last_abs_y = abs_y; last_abs_y = abs_y;
last_exp_p = exp_p; last_exp_p = exp_p;
} }
} }
void PredTransform(std::vector<bst_float> *io_preds) override { void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
std::vector<bst_float> &preds = *io_preds; std::vector<bst_float> &preds = io_preds->data_h();
const long ndata = static_cast<long>(preds.size()); // NOLINT(*) const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) { // NOLINT(*) for (long j = 0; j < ndata; ++j) { // NOLINT(*)
preds[j] = std::exp(preds[j]); preds[j] = std::exp(preds[j]);
} }
} }
void EvalTransform(std::vector<bst_float> *io_preds) override { void EvalTransform(HostDeviceVector<bst_float> *io_preds) override {
PredTransform(io_preds); PredTransform(io_preds);
} }
bst_float ProbToMargin(bst_float base_score) const override { bst_float ProbToMargin(bst_float base_score) const override {
@ -288,39 +294,41 @@ class GammaRegression : public ObjFunction {
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override { void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
} }
void GetGradient(const std::vector<bst_float> &preds, void GetGradient(HostDeviceVector<bst_float> *preds,
const MetaInfo &info, const MetaInfo &info,
int iter, int iter,
std::vector<bst_gpair> *out_gpair) override { HostDeviceVector<bst_gpair> *out_gpair) override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels.size()) << "labels are not correctly provided"; CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided";
out_gpair->resize(preds.size()); auto& preds_h = preds->data_h();
out_gpair->resize(preds_h.size());
auto& gpair = out_gpair->data_h();
// check if label in range // check if label in range
bool label_correct = true; bool label_correct = true;
// start calculating gradient // start calculating gradient
const omp_ulong ndata = static_cast<omp_ulong>(preds.size()); // NOLINT(*) const omp_ulong ndata = static_cast<omp_ulong>(preds_h.size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*) for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
bst_float p = preds[i]; bst_float p = preds_h[i];
bst_float w = info.GetWeight(i); bst_float w = info.GetWeight(i);
bst_float y = info.labels[i]; bst_float y = info.labels[i];
if (y >= 0.0f) { if (y >= 0.0f) {
(*out_gpair)[i] = bst_gpair((1 - y / std::exp(p)) * w, y / std::exp(p) * w); gpair[i] = bst_gpair((1 - y / std::exp(p)) * w, y / std::exp(p) * w);
} else { } else {
label_correct = false; label_correct = false;
} }
} }
CHECK(label_correct) << "GammaRegression: label must be positive"; CHECK(label_correct) << "GammaRegression: label must be positive";
} }
void PredTransform(std::vector<bst_float> *io_preds) override { void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
std::vector<bst_float> &preds = *io_preds; std::vector<bst_float> &preds = io_preds->data_h();
const long ndata = static_cast<long>(preds.size()); // NOLINT(*) const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) { // NOLINT(*) for (long j = 0; j < ndata; ++j) { // NOLINT(*)
preds[j] = std::exp(preds[j]); preds[j] = std::exp(preds[j]);
} }
} }
void EvalTransform(std::vector<bst_float> *io_preds) override { void EvalTransform(HostDeviceVector<bst_float> *io_preds) override {
PredTransform(io_preds); PredTransform(io_preds);
} }
bst_float ProbToMargin(bst_float base_score) const override { bst_float ProbToMargin(bst_float base_score) const override {
@ -353,20 +361,22 @@ class TweedieRegression : public ObjFunction {
param_.InitAllowUnknown(args); param_.InitAllowUnknown(args);
} }
void GetGradient(const std::vector<bst_float> &preds, void GetGradient(HostDeviceVector<bst_float> *preds,
const MetaInfo &info, const MetaInfo &info,
int iter, int iter,
std::vector<bst_gpair> *out_gpair) override { HostDeviceVector<bst_gpair> *out_gpair) override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels.size()) << "labels are not correctly provided"; CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided";
out_gpair->resize(preds.size()); auto& preds_h = preds->data_h();
out_gpair->resize(preds->size());
auto& gpair = out_gpair->data_h();
// check if label in range // check if label in range
bool label_correct = true; bool label_correct = true;
// start calculating gradient // start calculating gradient
const omp_ulong ndata = static_cast<omp_ulong>(preds.size()); // NOLINT(*) const omp_ulong ndata = static_cast<omp_ulong>(preds->size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*) for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
bst_float p = preds[i]; bst_float p = preds_h[i];
bst_float w = info.GetWeight(i); bst_float w = info.GetWeight(i);
bst_float y = info.labels[i]; bst_float y = info.labels[i];
float rho = param_.tweedie_variance_power; float rho = param_.tweedie_variance_power;
@ -374,15 +384,15 @@ class TweedieRegression : public ObjFunction {
bst_float grad = -y * std::exp((1 - rho) * p) + std::exp((2 - rho) * p); bst_float grad = -y * std::exp((1 - rho) * p) + std::exp((2 - rho) * p);
bst_float hess = -y * (1 - rho) * \ bst_float hess = -y * (1 - rho) * \
std::exp((1 - rho) * p) + (2 - rho) * std::exp((2 - rho) * p); std::exp((1 - rho) * p) + (2 - rho) * std::exp((2 - rho) * p);
(*out_gpair)[i] = bst_gpair(grad * w, hess * w); gpair[i] = bst_gpair(grad * w, hess * w);
} else { } else {
label_correct = false; label_correct = false;
} }
} }
CHECK(label_correct) << "TweedieRegression: label must be nonnegative"; CHECK(label_correct) << "TweedieRegression: label must be nonnegative";
} }
void PredTransform(std::vector<bst_float> *io_preds) override { void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
std::vector<bst_float> &preds = *io_preds; std::vector<bst_float> &preds = io_preds->data_h();
const long ndata = static_cast<long>(preds.size()); // NOLINT(*) const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) { // NOLINT(*) for (long j = 0; j < ndata; ++j) { // NOLINT(*)

View File

@ -103,8 +103,8 @@ class GPURegLossObj : public ObjFunction {
// free the old data and allocate the new data // free the old data and allocate the new data
ba_.reset(new bulk_allocator<memory_type::DEVICE>()); ba_.reset(new bulk_allocator<memory_type::DEVICE>());
data_.reset(new DeviceData(ba_.get(), 0, n)); data_.reset(new DeviceData(ba_.get(), 0, n));
preds_d_.resize(n, param_.gpu_id); preds_d_.resize(n, 0.0f, param_.gpu_id);
out_gpair_d_.resize(n, param_.gpu_id); out_gpair_d_.resize(n, bst_gpair(), param_.gpu_id);
} }
public: public:
@ -114,23 +114,6 @@ class GPURegLossObj : public ObjFunction {
param_.InitAllowUnknown(args); param_.InitAllowUnknown(args);
CHECK(param_.n_gpus != 0) << "Must have at least one device"; CHECK(param_.n_gpus != 0) << "Must have at least one device";
} }
void GetGradient(const std::vector<float> &preds,
const MetaInfo &info,
int iter,
std::vector<bst_gpair> *out_gpair) override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels.size())
<< "labels are not correctly provided"
<< "preds.size=" << preds.size() << ", label.size=" << info.labels.size();
size_t ndata = preds.size();
out_gpair->resize(ndata);
LazyResize(ndata);
thrust::copy(preds.begin(), preds.end(), preds_d_.tbegin(param_.gpu_id));
GetGradientDevice(preds_d_.ptr_d(param_.gpu_id), info, iter,
out_gpair_d_.ptr_d(param_.gpu_id), ndata);
thrust::copy_n(out_gpair_d_.tbegin(param_.gpu_id), ndata, out_gpair->begin());
}
void GetGradient(HostDeviceVector<float>* preds, void GetGradient(HostDeviceVector<float>* preds,
const MetaInfo &info, const MetaInfo &info,
@ -141,7 +124,7 @@ class GPURegLossObj : public ObjFunction {
<< "labels are not correctly provided" << "labels are not correctly provided"
<< "preds.size=" << preds->size() << ", label.size=" << info.labels.size(); << "preds.size=" << preds->size() << ", label.size=" << info.labels.size();
size_t ndata = preds->size(); size_t ndata = preds->size();
out_gpair->resize(ndata, param_.gpu_id); out_gpair->resize(ndata, bst_gpair(), param_.gpu_id);
LazyResize(ndata); LazyResize(ndata);
GetGradientDevice(preds->ptr_d(param_.gpu_id), info, iter, GetGradientDevice(preds->ptr_d(param_.gpu_id), info, iter,
out_gpair->ptr_d(param_.gpu_id), ndata); out_gpair->ptr_d(param_.gpu_id), ndata);
@ -189,13 +172,6 @@ class GPURegLossObj : public ObjFunction {
return Loss::DefaultEvalMetric(); return Loss::DefaultEvalMetric();
} }
void PredTransform(std::vector<float> *io_preds) override {
LazyResize(io_preds->size());
thrust::copy(io_preds->begin(), io_preds->end(), preds_d_.tbegin(param_.gpu_id));
PredTransformDevice(preds_d_.ptr_d(param_.gpu_id), io_preds->size());
thrust::copy_n(preds_d_.tbegin(param_.gpu_id), io_preds->size(), io_preds->begin());
}
void PredTransform(HostDeviceVector<float> *io_preds) override { void PredTransform(HostDeviceVector<float> *io_preds) override {
PredTransformDevice(io_preds->ptr_d(param_.gpu_id), io_preds->size()); PredTransformDevice(io_preds->ptr_d(param_.gpu_id), io_preds->size());
} }

View File

@ -104,14 +104,43 @@ class CPUPredictor : public Predictor {
tree_begin, ntree_limit); tree_begin, ntree_limit);
} }
public: bool PredictFromCache(DMatrix* dmat,
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, int tree_begin, const gbm::GBTreeModel& model,
unsigned ntree_limit = 0) override { unsigned ntree_limit) {
PredictBatch(dmat, &out_preds->data_h(), model, tree_begin, ntree_limit); if (ntree_limit == 0 ||
ntree_limit * model.param.num_output_group >= model.trees.size()) {
auto it = cache_.find(dmat);
if (it != cache_.end()) {
HostDeviceVector<bst_float>& y = it->second.predictions;
if (y.size() != 0) {
out_preds->resize(y.size());
std::copy(y.data_h().begin(), y.data_h().end(),
out_preds->data_h().begin());
return true;
}
}
}
return false;
} }
void PredictBatch(DMatrix* dmat, std::vector<bst_float>* out_preds, void InitOutPredictions(const MetaInfo& info,
HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model) const {
size_t n = model.param.num_output_group * info.num_row;
const std::vector<bst_float>& base_margin = info.base_margin;
out_preds->resize(n);
std::vector<bst_float>& out_preds_h = out_preds->data_h();
if (base_margin.size() != 0) {
CHECK_EQ(out_preds->size(), n);
std::copy(base_margin.begin(), base_margin.end(), out_preds_h.begin());
} else {
std::fill(out_preds_h.begin(), out_preds_h.end(), model.base_margin);
}
}
public:
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, int tree_begin, const gbm::GBTreeModel& model, int tree_begin,
unsigned ntree_limit = 0) override { unsigned ntree_limit = 0) override {
if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) { if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) {
@ -125,12 +154,14 @@ class CPUPredictor : public Predictor {
ntree_limit = static_cast<unsigned>(model.trees.size()); ntree_limit = static_cast<unsigned>(model.trees.size());
} }
this->PredLoopInternal(dmat, out_preds, model, tree_begin, ntree_limit); this->PredLoopInternal(dmat, &out_preds->data_h(), model,
tree_begin, ntree_limit);
} }
void UpdatePredictionCache(const gbm::GBTreeModel& model, void UpdatePredictionCache(
std::vector<std::unique_ptr<TreeUpdater>>* updaters, const gbm::GBTreeModel& model,
int num_new_trees) override { std::vector<std::unique_ptr<TreeUpdater>>* updaters,
int num_new_trees) override {
int old_ntree = model.trees.size() - num_new_trees; int old_ntree = model.trees.size() - num_new_trees;
// update cache entry // update cache entry
for (auto& kv : cache_) { for (auto& kv : cache_) {
@ -138,7 +169,7 @@ class CPUPredictor : public Predictor {
if (e.predictions.size() == 0) { if (e.predictions.size() == 0) {
InitOutPredictions(e.data->info(), &(e.predictions), model); InitOutPredictions(e.data->info(), &(e.predictions), model);
PredLoopInternal(e.data.get(), &(e.predictions), model, 0, PredLoopInternal(e.data.get(), &(e.predictions.data_h()), model, 0,
model.trees.size()); model.trees.size());
} else if (model.param.num_output_group == 1 && updaters->size() > 0 && } else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
num_new_trees == 1 && num_new_trees == 1 &&
@ -146,7 +177,7 @@ class CPUPredictor : public Predictor {
&(e.predictions))) { &(e.predictions))) {
{} // do nothing {} // do nothing
} else { } else {
PredLoopInternal(e.data.get(), &(e.predictions), model, old_ntree, PredLoopInternal(e.data.get(), &(e.predictions.data_h()), model, old_ntree,
model.trees.size()); model.trees.size());
} }
} }

View File

@ -256,8 +256,6 @@ class GPUPredictor : public xgboost::Predictor {
HostDeviceVector<bst_float> predictions; HostDeviceVector<bst_float> predictions;
}; };
std::unordered_map<DMatrix*, DevicePredictionCacheEntry> device_cache_;
private: private:
void DevicePredictInternal(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds, void DevicePredictInternal(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, size_t tree_begin, const gbm::GBTreeModel& model, size_t tree_begin,
@ -337,25 +335,16 @@ class GPUPredictor : public xgboost::Predictor {
public: public:
GPUPredictor() : cpu_predictor(Predictor::Create("cpu_predictor")) {} GPUPredictor() : cpu_predictor(Predictor::Create("cpu_predictor")) {}
void PredictBatch(DMatrix* dmat, std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model, int tree_begin,
unsigned ntree_limit = 0) override {
HostDeviceVector<bst_float> out_preds_d;
PredictBatch(dmat, &out_preds_d, model, tree_begin, ntree_limit);
out_preds->resize(out_preds_d.size());
thrust::copy(out_preds_d.tbegin(param.gpu_id),
out_preds_d.tend(param.gpu_id), out_preds->begin());
}
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds, void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, int tree_begin, const gbm::GBTreeModel& model, int tree_begin,
unsigned ntree_limit = 0) override { unsigned ntree_limit = 0) override {
if (this->PredictFromCacheDevice(dmat, out_preds, model, ntree_limit)) { if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) {
return; return;
} }
this->InitOutPredictionsDevice(dmat->info(), out_preds, model); this->InitOutPredictions(dmat->info(), out_preds, model);
int tree_end = ntree_limit * model.param.num_output_group; int tree_end = ntree_limit * model.param.num_output_group;
if (ntree_limit == 0 || ntree_limit > model.trees.size()) { if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
tree_end = static_cast<unsigned>(model.trees.size()); tree_end = static_cast<unsigned>(model.trees.size());
} }
@ -363,13 +352,13 @@ class GPUPredictor : public xgboost::Predictor {
DevicePredictInternal(dmat, out_preds, model, tree_begin, tree_end); DevicePredictInternal(dmat, out_preds, model, tree_begin, tree_end);
} }
protected:
void InitOutPredictionsDevice(const MetaInfo& info, void InitOutPredictions(const MetaInfo& info,
HostDeviceVector<bst_float>* out_preds, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model) const { const gbm::GBTreeModel& model) const {
size_t n = model.param.num_output_group * info.num_row; size_t n = model.param.num_output_group * info.num_row;
const std::vector<bst_float>& base_margin = info.base_margin; const std::vector<bst_float>& base_margin = info.base_margin;
out_preds->resize(n, param.gpu_id); out_preds->resize(n, 0.0f, param.gpu_id);
if (base_margin.size() != 0) { if (base_margin.size() != 0) {
CHECK_EQ(out_preds->size(), n); CHECK_EQ(out_preds->size(), n);
thrust::copy(base_margin.begin(), base_margin.end(), out_preds->tbegin(param.gpu_id)); thrust::copy(base_margin.begin(), base_margin.end(), out_preds->tbegin(param.gpu_id));
@ -380,29 +369,16 @@ class GPUPredictor : public xgboost::Predictor {
} }
bool PredictFromCache(DMatrix* dmat, bool PredictFromCache(DMatrix* dmat,
std::vector<bst_float>* out_preds, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, const gbm::GBTreeModel& model,
unsigned ntree_limit) { unsigned ntree_limit) {
HostDeviceVector<bst_float> out_preds_d(0, -1);
bool result = PredictFromCacheDevice(dmat, &out_preds_d, model, ntree_limit);
if (!result) return false;
out_preds->resize(out_preds_d.size(), param.gpu_id);
thrust::copy(out_preds_d.tbegin(param.gpu_id),
out_preds_d.tend(param.gpu_id), out_preds->begin());
return true;
}
bool PredictFromCacheDevice(DMatrix* dmat,
HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model,
unsigned ntree_limit) {
if (ntree_limit == 0 || if (ntree_limit == 0 ||
ntree_limit * model.param.num_output_group >= model.trees.size()) { ntree_limit * model.param.num_output_group >= model.trees.size()) {
auto it = device_cache_.find(dmat); auto it = cache_.find(dmat);
if (it != device_cache_.end()) { if (it != cache_.end()) {
HostDeviceVector<bst_float>& y = it->second.predictions; HostDeviceVector<bst_float>& y = it->second.predictions;
if (y.size() != 0) { if (y.size() != 0) {
out_preds->resize(y.size(), param.gpu_id); out_preds->resize(y.size(), 0.0f, param.gpu_id);
thrust::copy(y.tbegin(param.gpu_id), y.tend(param.gpu_id), thrust::copy(y.tbegin(param.gpu_id), y.tend(param.gpu_id),
out_preds->tbegin(param.gpu_id)); out_preds->tbegin(param.gpu_id));
return true; return true;
@ -418,15 +394,15 @@ class GPUPredictor : public xgboost::Predictor {
int num_new_trees) override { int num_new_trees) override {
auto old_ntree = model.trees.size() - num_new_trees; auto old_ntree = model.trees.size() - num_new_trees;
// update cache entry // update cache entry
for (auto& kv : device_cache_) { for (auto& kv : cache_) {
DevicePredictionCacheEntry& e = kv.second; PredictionCacheEntry& e = kv.second;
DMatrix* dmat = kv.first; DMatrix* dmat = kv.first;
HostDeviceVector<bst_float>& predictions = e.predictions; HostDeviceVector<bst_float>& predictions = e.predictions;
if (predictions.size() == 0) { if (predictions.size() == 0) {
// ensure that the device in predictions is correct // ensure that the device in predictions is correct
predictions.resize(0, param.gpu_id); predictions.resize(0, 0.0f, param.gpu_id);
cpu_predictor->PredictBatch(dmat, &predictions.data_h(), model, 0, cpu_predictor->PredictBatch(dmat, &predictions, model, 0,
static_cast<bst_uint>(model.trees.size())); static_cast<bst_uint>(model.trees.size()));
} else if (model.param.num_output_group == 1 && updaters->size() > 0 && } else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
num_new_trees == 1 && num_new_trees == 1 &&
@ -477,8 +453,6 @@ class GPUPredictor : public xgboost::Predictor {
Predictor::Init(cfg, cache); Predictor::Init(cfg, cache);
cpu_predictor->Init(cfg, cache); cpu_predictor->Init(cfg, cache);
param.InitAllowUnknown(cfg); param.InitAllowUnknown(cfg);
for (const std::shared_ptr<DMatrix>& d : cache)
device_cache_[d.get()].data = d;
max_shared_memory_bytes = dh::max_shared_memory(param.gpu_id); max_shared_memory_bytes = dh::max_shared_memory(param.gpu_id);
} }

View File

@ -11,43 +11,8 @@ namespace xgboost {
void Predictor::Init( void Predictor::Init(
const std::vector<std::pair<std::string, std::string>>& cfg, const std::vector<std::pair<std::string, std::string>>& cfg,
const std::vector<std::shared_ptr<DMatrix>>& cache) { const std::vector<std::shared_ptr<DMatrix>>& cache) {
for (const std::shared_ptr<DMatrix>& d : cache) { for (const std::shared_ptr<DMatrix>& d : cache)
PredictionCacheEntry e; cache_[d.get()].data = d;
e.data = d;
cache_[d.get()] = std::move(e);
}
}
bool Predictor::PredictFromCache(DMatrix* dmat,
std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model,
unsigned ntree_limit) {
if (ntree_limit == 0 ||
ntree_limit * model.param.num_output_group >= model.trees.size()) {
auto it = cache_.find(dmat);
if (it != cache_.end()) {
std::vector<bst_float>& y = it->second.predictions;
if (y.size() != 0) {
out_preds->resize(y.size());
std::copy(y.begin(), y.end(), out_preds->begin());
return true;
}
}
}
return false;
}
void Predictor::InitOutPredictions(const MetaInfo& info,
std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model) const {
size_t n = model.param.num_output_group * info.num_row;
const std::vector<bst_float>& base_margin = info.base_margin;
out_preds->resize(n);
if (base_margin.size() != 0) {
CHECK_EQ(out_preds->size(), n);
std::copy(base_margin.begin(), base_margin.end(), out_preds->begin());
} else {
std::fill(out_preds->begin(), out_preds->end(), model.base_margin);
}
} }
Predictor* Predictor::Create(std::string name) { Predictor* Predictor::Create(std::string name) {
auto* e = ::dmlc::Registry<PredictorReg>::Get()->Find(name); auto* e = ::dmlc::Registry<PredictorReg>::Get()->Find(name);

View File

@ -22,17 +22,6 @@ TreeUpdater* TreeUpdater::Create(const std::string& name) {
return (e->body)(); return (e->body)();
} }
void TreeUpdater::Update(HostDeviceVector<bst_gpair>* gpair,
DMatrix* data,
const std::vector<RegTree*>& trees) {
Update(gpair->data_h(), data, trees);
}
bool TreeUpdater::UpdatePredictionCache(const DMatrix* data,
HostDeviceVector<bst_float>* out_preds) {
return UpdatePredictionCache(data, &out_preds->data_h());
}
} // namespace xgboost } // namespace xgboost
namespace xgboost { namespace xgboost {

View File

@ -26,7 +26,7 @@ class ColMaker: public TreeUpdater {
param.InitAllowUnknown(args); param.InitAllowUnknown(args);
} }
void Update(const std::vector<bst_gpair> &gpair, void Update(HostDeviceVector<bst_gpair> *gpair,
DMatrix* dmat, DMatrix* dmat,
const std::vector<RegTree*> &trees) override { const std::vector<RegTree*> &trees) override {
TStats::CheckInfo(dmat->info()); TStats::CheckInfo(dmat->info());
@ -37,7 +37,7 @@ class ColMaker: public TreeUpdater {
// build tree // build tree
for (size_t i = 0; i < trees.size(); ++i) { for (size_t i = 0; i < trees.size(); ++i) {
Builder builder(param); Builder builder(param);
builder.Update(gpair, dmat, trees[i]); builder.Update(gpair->data_h(), dmat, trees[i]);
} }
param.learning_rate = lr; param.learning_rate = lr;
} }
@ -806,13 +806,13 @@ class DistColMaker : public ColMaker<TStats, TConstraint> {
param.InitAllowUnknown(args); param.InitAllowUnknown(args);
pruner->Init(args); pruner->Init(args);
} }
void Update(const std::vector<bst_gpair> &gpair, void Update(HostDeviceVector<bst_gpair> *gpair,
DMatrix* dmat, DMatrix* dmat,
const std::vector<RegTree*> &trees) override { const std::vector<RegTree*> &trees) override {
TStats::CheckInfo(dmat->info()); TStats::CheckInfo(dmat->info());
CHECK_EQ(trees.size(), 1U) << "DistColMaker: only support one tree at a time"; CHECK_EQ(trees.size(), 1U) << "DistColMaker: only support one tree at a time";
// build the tree // build the tree
builder.Update(gpair, dmat, trees[0]); builder.Update(gpair->data_h(), dmat, trees[0]);
//// prune the tree, note that pruner will sync the tree //// prune the tree, note that pruner will sync the tree
pruner->Update(gpair, dmat, trees); pruner->Update(gpair, dmat, trees);
// update position after the tree is pruned // update position after the tree is pruned
@ -967,7 +967,7 @@ class TreeUpdaterSwitch : public TreeUpdater {
inner_->Init(args); inner_->Init(args);
} }
void Update(const std::vector<bst_gpair>& gpair, void Update(HostDeviceVector<bst_gpair>* gpair,
DMatrix* data, DMatrix* data,
const std::vector<RegTree*>& trees) override { const std::vector<RegTree*>& trees) override {
CHECK(inner_ != nullptr); CHECK(inner_ != nullptr);

View File

@ -55,7 +55,7 @@ class FastHistMaker: public TreeUpdater {
is_gmat_initialized_ = false; is_gmat_initialized_ = false;
} }
void Update(const std::vector<bst_gpair>& gpair, void Update(HostDeviceVector<bst_gpair>* gpair,
DMatrix* dmat, DMatrix* dmat,
const std::vector<RegTree*>& trees) override { const std::vector<RegTree*>& trees) override {
TStats::CheckInfo(dmat->info()); TStats::CheckInfo(dmat->info());
@ -82,13 +82,14 @@ class FastHistMaker: public TreeUpdater {
builder_.reset(new Builder(param, fhparam, std::move(pruner_))); builder_.reset(new Builder(param, fhparam, std::move(pruner_)));
} }
for (size_t i = 0; i < trees.size(); ++i) { for (size_t i = 0; i < trees.size(); ++i) {
builder_->Update(gmat_, gmatb_, column_matrix_, gpair, dmat, trees[i]); builder_->Update
(gmat_, gmatb_, column_matrix_, gpair, dmat, trees[i]);
} }
param.learning_rate = lr; param.learning_rate = lr;
} }
bool UpdatePredictionCache(const DMatrix* data, bool UpdatePredictionCache(const DMatrix* data,
std::vector<bst_float>* out_preds) override { HostDeviceVector<bst_float>* out_preds) override {
if (!builder_ || param.subsample < 1.0f) { if (!builder_ || param.subsample < 1.0f) {
return false; return false;
} else { } else {
@ -139,7 +140,7 @@ class FastHistMaker: public TreeUpdater {
virtual void Update(const GHistIndexMatrix& gmat, virtual void Update(const GHistIndexMatrix& gmat,
const GHistIndexBlockMatrix& gmatb, const GHistIndexBlockMatrix& gmatb,
const ColumnMatrix& column_matrix, const ColumnMatrix& column_matrix,
const std::vector<bst_gpair>& gpair, HostDeviceVector<bst_gpair>* gpair,
DMatrix* p_fmat, DMatrix* p_fmat,
RegTree* p_tree) { RegTree* p_tree) {
double gstart = dmlc::GetTime(); double gstart = dmlc::GetTime();
@ -154,8 +155,10 @@ class FastHistMaker: public TreeUpdater {
double time_evaluate_split = 0; double time_evaluate_split = 0;
double time_apply_split = 0; double time_apply_split = 0;
std::vector<bst_gpair>& gpair_h = gpair->data_h();
tstart = dmlc::GetTime(); tstart = dmlc::GetTime();
this->InitData(gmat, gpair, *p_fmat, *p_tree); this->InitData(gmat, gpair_h, *p_fmat, *p_tree);
std::vector<bst_uint> feat_set = feat_index; std::vector<bst_uint> feat_set = feat_index;
time_init_data = dmlc::GetTime() - tstart; time_init_data = dmlc::GetTime() - tstart;
@ -165,11 +168,11 @@ class FastHistMaker: public TreeUpdater {
for (int nid = 0; nid < p_tree->param.num_roots; ++nid) { for (int nid = 0; nid < p_tree->param.num_roots; ++nid) {
tstart = dmlc::GetTime(); tstart = dmlc::GetTime();
hist_.AddHistRow(nid); hist_.AddHistRow(nid);
BuildHist(gpair, row_set_collection_[nid], gmat, gmatb, feat_set, hist_[nid]); BuildHist(gpair_h, row_set_collection_[nid], gmat, gmatb, feat_set, hist_[nid]);
time_build_hist += dmlc::GetTime() - tstart; time_build_hist += dmlc::GetTime() - tstart;
tstart = dmlc::GetTime(); tstart = dmlc::GetTime();
this->InitNewNode(nid, gmat, gpair, *p_fmat, *p_tree); this->InitNewNode(nid, gmat, gpair_h, *p_fmat, *p_tree);
time_init_new_node += dmlc::GetTime() - tstart; time_init_new_node += dmlc::GetTime() - tstart;
tstart = dmlc::GetTime(); tstart = dmlc::GetTime();
@ -200,17 +203,17 @@ class FastHistMaker: public TreeUpdater {
hist_.AddHistRow(cleft); hist_.AddHistRow(cleft);
hist_.AddHistRow(cright); hist_.AddHistRow(cright);
if (row_set_collection_[cleft].size() < row_set_collection_[cright].size()) { if (row_set_collection_[cleft].size() < row_set_collection_[cright].size()) {
BuildHist(gpair, row_set_collection_[cleft], gmat, gmatb, feat_set, hist_[cleft]); BuildHist(gpair_h, row_set_collection_[cleft], gmat, gmatb, feat_set, hist_[cleft]);
SubtractionTrick(hist_[cright], hist_[cleft], hist_[nid]); SubtractionTrick(hist_[cright], hist_[cleft], hist_[nid]);
} else { } else {
BuildHist(gpair, row_set_collection_[cright], gmat, gmatb, feat_set, hist_[cright]); BuildHist(gpair_h, row_set_collection_[cright], gmat, gmatb, feat_set, hist_[cright]);
SubtractionTrick(hist_[cleft], hist_[cright], hist_[nid]); SubtractionTrick(hist_[cleft], hist_[cright], hist_[nid]);
} }
time_build_hist += dmlc::GetTime() - tstart; time_build_hist += dmlc::GetTime() - tstart;
tstart = dmlc::GetTime(); tstart = dmlc::GetTime();
this->InitNewNode(cleft, gmat, gpair, *p_fmat, *p_tree); this->InitNewNode(cleft, gmat, gpair_h, *p_fmat, *p_tree);
this->InitNewNode(cright, gmat, gpair, *p_fmat, *p_tree); this->InitNewNode(cright, gmat, gpair_h, *p_fmat, *p_tree);
time_init_new_node += dmlc::GetTime() - tstart; time_init_new_node += dmlc::GetTime() - tstart;
tstart = dmlc::GetTime(); tstart = dmlc::GetTime();
@ -293,8 +296,8 @@ class FastHistMaker: public TreeUpdater {
} }
inline bool UpdatePredictionCache(const DMatrix* data, inline bool UpdatePredictionCache(const DMatrix* data,
std::vector<bst_float>* p_out_preds) { HostDeviceVector<bst_float>* p_out_preds) {
std::vector<bst_float>& out_preds = *p_out_preds; std::vector<bst_float>& out_preds = p_out_preds->data_h();
// p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in
// conjunction with Update(). // conjunction with Update().

View File

@ -512,7 +512,7 @@ class GPUMaker : public TreeUpdater {
maxLeaves = 1 << param.max_depth; maxLeaves = 1 << param.max_depth;
} }
void Update(const std::vector<bst_gpair>& gpair, DMatrix* dmat, void Update(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
const std::vector<RegTree*>& trees) override { const std::vector<RegTree*>& trees) override {
GradStats::CheckInfo(dmat->info()); GradStats::CheckInfo(dmat->info());
// rescale learning rate according to size of trees // rescale learning rate according to size of trees
@ -530,7 +530,7 @@ class GPUMaker : public TreeUpdater {
param.learning_rate = lr; param.learning_rate = lr;
} }
/// @note: Update should be only after Init!! /// @note: Update should be only after Init!!
void UpdateTree(const std::vector<bst_gpair>& gpair, DMatrix* dmat, void UpdateTree(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
RegTree* hTree) { RegTree* hTree) {
if (!allocated) { if (!allocated) {
setupOneTimeData(dmat); setupOneTimeData(dmat);
@ -687,11 +687,11 @@ class GPUMaker : public TreeUpdater {
assignColIds<<<nCols, 512>>>(colIds.data(), colOffsets.data()); assignColIds<<<nCols, 512>>>(colIds.data(), colOffsets.data());
} }
void transferGrads(const std::vector<bst_gpair>& gpair) { void transferGrads(HostDeviceVector<bst_gpair>* gpair) {
// HACK // HACK
dh::safe_cuda(cudaMemcpy(gradsInst.data(), &(gpair[0]), dh::safe_cuda(cudaMemcpy(gradsInst.data(), gpair->ptr_d(param.gpu_id),
sizeof(bst_gpair) * nRows, sizeof(bst_gpair) * nRows,
cudaMemcpyHostToDevice)); cudaMemcpyDefault));
// evaluate the full-grad reduction for the root node // evaluate the full-grad reduction for the root node
dh::sumReduction<bst_gpair>(tmp_mem, gradsInst, gradSums, nRows); dh::sumReduction<bst_gpair>(tmp_mem, gradsInst, gradSums, nRows);
} }

View File

@ -506,27 +506,9 @@ class GPUHistMaker : public TreeUpdater {
monitor.Init("updater_gpu_hist", param.debug_verbose); monitor.Init("updater_gpu_hist", param.debug_verbose);
} }
void Update(const std::vector<bst_gpair>& gpair, DMatrix* dmat,
const std::vector<RegTree*>& trees) override {
monitor.Start("Update", dList);
// TODO(canonizer): move it into the class if this ever becomes a bottleneck
HostDeviceVector<bst_gpair> gpair_d(gpair.size(), param.gpu_id);
dh::safe_cuda(cudaSetDevice(param.gpu_id));
thrust::copy(gpair.begin(), gpair.end(), gpair_d.tbegin(param.gpu_id));
Update(&gpair_d, dmat, trees);
monitor.Stop("Update", dList);
}
void Update(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat, void Update(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
const std::vector<RegTree*>& trees) override { const std::vector<RegTree*>& trees) override {
monitor.Start("Update", dList); monitor.Start("Update", dList);
UpdateHelper(gpair, dmat, trees);
monitor.Stop("Update", dList);
}
private:
void UpdateHelper(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
const std::vector<RegTree*>& trees) {
GradStats::CheckInfo(dmat->info()); GradStats::CheckInfo(dmat->info());
// rescale learning rate according to size of trees // rescale learning rate according to size of trees
float lr = param.learning_rate; float lr = param.learning_rate;
@ -541,9 +523,9 @@ class GPUHistMaker : public TreeUpdater {
LOG(FATAL) << "GPU plugin exception: " << e.what() << std::endl; LOG(FATAL) << "GPU plugin exception: " << e.what() << std::endl;
} }
param.learning_rate = lr; param.learning_rate = lr;
monitor.Stop("Update", dList);
} }
public:
void InitDataOnce(DMatrix* dmat) { void InitDataOnce(DMatrix* dmat) {
info = &dmat->info(); info = &dmat->info();
monitor.Start("Quantiles", dList); monitor.Start("Quantiles", dList);
@ -876,16 +858,6 @@ class GPUHistMaker : public TreeUpdater {
omp_set_num_threads(nthread); omp_set_num_threads(nthread);
} }
bool UpdatePredictionCache(const DMatrix* data,
std::vector<bst_float>* p_out_preds) override {
return false;
}
bool UpdatePredictionCache(
const DMatrix* data, HostDeviceVector<bst_float>* p_out_preds) override {
return false;
}
struct ExpandEntry { struct ExpandEntry {
int nid; int nid;
int depth; int depth;

View File

@ -21,7 +21,7 @@ DMLC_REGISTRY_FILE_TAG(updater_histmaker);
template<typename TStats> template<typename TStats>
class HistMaker: public BaseMaker { class HistMaker: public BaseMaker {
public: public:
void Update(const std::vector<bst_gpair> &gpair, void Update(HostDeviceVector<bst_gpair> *gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector<RegTree*> &trees) override { const std::vector<RegTree*> &trees) override {
TStats::CheckInfo(p_fmat->info()); TStats::CheckInfo(p_fmat->info());
@ -30,7 +30,7 @@ class HistMaker: public BaseMaker {
param.learning_rate = lr / trees.size(); param.learning_rate = lr / trees.size();
// build tree // build tree
for (size_t i = 0; i < trees.size(); ++i) { for (size_t i = 0; i < trees.size(); ++i) {
this->Update(gpair, p_fmat, trees[i]); this->Update(gpair->data_h(), p_fmat, trees[i]);
} }
param.learning_rate = lr; param.learning_rate = lr;
} }

View File

@ -29,7 +29,7 @@ class TreePruner: public TreeUpdater {
syncher->Init(args); syncher->Init(args);
} }
// update the tree, do pruning // update the tree, do pruning
void Update(const std::vector<bst_gpair> &gpair, void Update(HostDeviceVector<bst_gpair> *gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector<RegTree*> &trees) override { const std::vector<RegTree*> &trees) override {
// rescale learning rate according to size of trees // rescale learning rate according to size of trees

View File

@ -25,10 +25,11 @@ class TreeRefresher: public TreeUpdater {
param.InitAllowUnknown(args); param.InitAllowUnknown(args);
} }
// update the tree, do pruning // update the tree, do pruning
void Update(const std::vector<bst_gpair> &gpair, void Update(HostDeviceVector<bst_gpair> *gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector<RegTree*> &trees) override { const std::vector<RegTree*> &trees) override {
if (trees.size() == 0) return; if (trees.size() == 0) return;
std::vector<bst_gpair> &gpair_h = gpair->data_h();
// number of threads // number of threads
// thread temporal space // thread temporal space
std::vector<std::vector<TStats> > stemp; std::vector<std::vector<TStats> > stemp;
@ -71,7 +72,7 @@ class TreeRefresher: public TreeUpdater {
feats.Fill(inst); feats.Fill(inst);
int offset = 0; int offset = 0;
for (size_t j = 0; j < trees.size(); ++j) { for (size_t j = 0; j < trees.size(); ++j) {
AddStats(*trees[j], feats, gpair, info, ridx, AddStats(*trees[j], feats, gpair_h, info, ridx,
dmlc::BeginPtr(stemp[tid]) + offset); dmlc::BeginPtr(stemp[tid]) + offset);
offset += trees[j]->param.num_nodes; offset += trees[j]->param.num_nodes;
} }

View File

@ -22,7 +22,7 @@ DMLC_REGISTRY_FILE_TAG(updater_skmaker);
class SketchMaker: public BaseMaker { class SketchMaker: public BaseMaker {
public: public:
void Update(const std::vector<bst_gpair> &gpair, void Update(HostDeviceVector<bst_gpair> *gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector<RegTree*> &trees) override { const std::vector<RegTree*> &trees) override {
// rescale learning rate according to size of trees // rescale learning rate according to size of trees
@ -30,7 +30,7 @@ class SketchMaker: public BaseMaker {
param.learning_rate = lr / trees.size(); param.learning_rate = lr / trees.size();
// build tree // build tree
for (size_t i = 0; i < trees.size(); ++i) { for (size_t i = 0; i < trees.size(); ++i) {
this->Update(gpair, p_fmat, trees[i]); this->Update(gpair->data_h(), p_fmat, trees[i]);
} }
param.learning_rate = lr; param.learning_rate = lr;
} }

View File

@ -23,7 +23,7 @@ class TreeSyncher: public TreeUpdater {
public: public:
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {} void Init(const std::vector<std::pair<std::string, std::string> >& args) override {}
void Update(const std::vector<bst_gpair> &gpair, void Update(HostDeviceVector<bst_gpair> *gpair,
DMatrix* dmat, DMatrix* dmat,
const std::vector<RegTree*> &trees) override { const std::vector<RegTree*> &trees) override {
if (rabit::GetWorldSize() == 1) return; if (rabit::GetWorldSize() == 1) return;

View File

@ -38,10 +38,13 @@ void CheckObjFunction(xgboost::ObjFunction * obj,
info.labels = labels; info.labels = labels;
info.weights = weights; info.weights = weights;
std::vector<xgboost::bst_gpair> gpair; xgboost::HostDeviceVector<xgboost::bst_float> in_preds(preds);
obj->GetGradient(preds, info, 1, &gpair);
ASSERT_EQ(gpair.size(), preds.size()); xgboost::HostDeviceVector<xgboost::bst_gpair> out_gpair;
obj->GetGradient(&in_preds, info, 1, &out_gpair);
std::vector<xgboost::bst_gpair>& gpair = out_gpair.data_h();
ASSERT_EQ(gpair.size(), in_preds.size());
for (int i = 0; i < static_cast<int>(gpair.size()); ++i) { for (int i = 0; i < static_cast<int>(gpair.size()); ++i) {
EXPECT_NEAR(gpair[i].GetGrad(), out_grad[i], 0.01) EXPECT_NEAR(gpair[i].GetGrad(), out_grad[i], 0.01)
<< "Unexpected grad for pred=" << preds[i] << " label=" << labels[i] << "Unexpected grad for pred=" << preds[i] << " label=" << labels[i]

View File

@ -46,10 +46,11 @@ TEST(Objective, LogisticRegressionBasic) {
<< "Expected error when base_score not in range [0,1f] for LogisticRegression"; << "Expected error when base_score not in range [0,1f] for LogisticRegression";
// test PredTransform // test PredTransform
std::vector<xgboost::bst_float> preds = {0, 0.1f, 0.5f, 0.9f, 1}; xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<xgboost::bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f}; std::vector<xgboost::bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f};
obj->PredTransform(&preds); obj->PredTransform(&io_preds);
for (int i = 0; i < static_cast<int>(preds.size()); ++i) { auto& preds = io_preds.data_h();
for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f); EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
} }
} }
@ -94,10 +95,11 @@ TEST(Objective, PoissonRegressionBasic) {
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f); EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
// test PredTransform // test PredTransform
std::vector<xgboost::bst_float> preds = {0, 0.1f, 0.5f, 0.9f, 1}; xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f}; std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
obj->PredTransform(&preds); obj->PredTransform(&io_preds);
for (int i = 0; i < static_cast<int>(preds.size()); ++i) { auto& preds = io_preds.data_h();
for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f); EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
} }
} }
@ -129,10 +131,11 @@ TEST(Objective, GammaRegressionBasic) {
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f); EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
// test PredTransform // test PredTransform
std::vector<xgboost::bst_float> preds = {0, 0.1f, 0.5f, 0.9f, 1}; xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f}; std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
obj->PredTransform(&preds); obj->PredTransform(&io_preds);
for (int i = 0; i < static_cast<int>(preds.size()); ++i) { auto& preds = io_preds.data_h();
for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f); EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
} }
} }
@ -165,10 +168,11 @@ TEST(Objective, TweedieRegressionBasic) {
EXPECT_NEAR(obj->ProbToMargin(0.9f), 0.89f, 0.01f); EXPECT_NEAR(obj->ProbToMargin(0.9f), 0.89f, 0.01f);
// test PredTransform // test PredTransform
std::vector<xgboost::bst_float> preds = {0, 0.1f, 0.5f, 0.9f, 1}; xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f}; std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
obj->PredTransform(&preds); obj->PredTransform(&io_preds);
for (int i = 0; i < static_cast<int>(preds.size()); ++i) { auto& preds = io_preds.data_h();
for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f); EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
} }
} }

View File

@ -48,10 +48,11 @@ TEST(Objective, GPULogisticRegressionBasic) {
<< "Expected error when base_score not in range [0,1f] for LogisticRegression"; << "Expected error when base_score not in range [0,1f] for LogisticRegression";
// test PredTransform // test PredTransform
std::vector<xgboost::bst_float> preds = {0, 0.1f, 0.5f, 0.9f, 1}; xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<xgboost::bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f}; std::vector<xgboost::bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f};
obj->PredTransform(&preds); obj->PredTransform(&io_preds);
for (int i = 0; i < static_cast<int>(preds.size()); ++i) { auto& preds = io_preds.data_h();
for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f); EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
} }
} }

View File

@ -24,10 +24,11 @@ TEST(cpu_predictor, Test) {
auto dmat = CreateDMatrix(n_row, n_col, 0); auto dmat = CreateDMatrix(n_row, n_col, 0);
// Test predict batch // Test predict batch
std::vector<float> out_predictions; HostDeviceVector<float> out_predictions;
cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0); cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
std::vector<float>& out_predictions_h = out_predictions.data_h();
for (int i = 0; i < out_predictions.size(); i++) { for (int i = 0; i < out_predictions.size(); i++) {
ASSERT_EQ(out_predictions[i], 1.5); ASSERT_EQ(out_predictions_h[i], 1.5);
} }
// Test predict instance // Test predict instance

View File

@ -33,13 +33,15 @@ TEST(gpu_predictor, Test) {
auto dmat = CreateDMatrix(n_row, n_col, 0); auto dmat = CreateDMatrix(n_row, n_col, 0);
// Test predict batch // Test predict batch
std::vector<float> gpu_out_predictions; HostDeviceVector<float> gpu_out_predictions;
std::vector<float> cpu_out_predictions; HostDeviceVector<float> cpu_out_predictions;
gpu_predictor->PredictBatch(dmat.get(), &gpu_out_predictions, model, 0); gpu_predictor->PredictBatch(dmat.get(), &gpu_out_predictions, model, 0);
cpu_predictor->PredictBatch(dmat.get(), &cpu_out_predictions, model, 0); cpu_predictor->PredictBatch(dmat.get(), &cpu_out_predictions, model, 0);
std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.data_h();
std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.data_h();
float abs_tolerance = 0.001; float abs_tolerance = 0.001;
for (int i = 0; i < gpu_out_predictions.size(); i++) { for (int i = 0; i < gpu_out_predictions.size(); i++) {
ASSERT_LT(std::abs(gpu_out_predictions[i] - cpu_out_predictions[i]), ASSERT_LT(std::abs(gpu_out_predictions_h[i] - cpu_out_predictions_h[i]),
abs_tolerance); abs_tolerance);
} }
// Test predict instance // Test predict instance