Replaced std::vector-based interfaces with HostDeviceVector-based interfaces. (#3116)
* Replaced std::vector-based interfaces with HostDeviceVector-based interfaces. - replacement was performed in the learner, boosters, predictors, updaters, and objective functions - only interfaces used in training were replaced; interfaces like PredictInstance() still use std::vector - refactoring necessary for replacement of interfaces was also performed, such as using HostDeviceVector in prediction cache * HostDeviceVector-based interfaces for custom objective function example plugin.
This commit is contained in:
parent
11bfa8584d
commit
d5992dd881
@ -68,12 +68,9 @@ class GradientBooster {
|
|||||||
* \param obj The objective function, optional, can be nullptr when use customized version
|
* \param obj The objective function, optional, can be nullptr when use customized version
|
||||||
* the booster may change content of gpair
|
* the booster may change content of gpair
|
||||||
*/
|
*/
|
||||||
virtual void DoBoost(DMatrix* p_fmat,
|
|
||||||
std::vector<bst_gpair>* in_gpair,
|
|
||||||
ObjFunction* obj = nullptr) = 0;
|
|
||||||
virtual void DoBoost(DMatrix* p_fmat,
|
virtual void DoBoost(DMatrix* p_fmat,
|
||||||
HostDeviceVector<bst_gpair>* in_gpair,
|
HostDeviceVector<bst_gpair>* in_gpair,
|
||||||
ObjFunction* obj = nullptr);
|
ObjFunction* obj = nullptr) = 0;
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief generate predictions for given feature matrix
|
* \brief generate predictions for given feature matrix
|
||||||
@ -82,12 +79,9 @@ class GradientBooster {
|
|||||||
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
|
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
|
||||||
* we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
|
* we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
|
||||||
*/
|
*/
|
||||||
virtual void PredictBatch(DMatrix* dmat,
|
|
||||||
std::vector<bst_float>* out_preds,
|
|
||||||
unsigned ntree_limit = 0) = 0;
|
|
||||||
virtual void PredictBatch(DMatrix* dmat,
|
virtual void PredictBatch(DMatrix* dmat,
|
||||||
HostDeviceVector<bst_float>* out_preds,
|
HostDeviceVector<bst_float>* out_preds,
|
||||||
unsigned ntree_limit = 0);
|
unsigned ntree_limit = 0) = 0;
|
||||||
/*!
|
/*!
|
||||||
* \brief online prediction function, predict score for one instance at a time
|
* \brief online prediction function, predict score for one instance at a time
|
||||||
* NOTE: use the batch prediction interface if possible, batch prediction is usually
|
* NOTE: use the batch prediction interface if possible, batch prediction is usually
|
||||||
|
|||||||
@ -84,7 +84,7 @@ class Learner : public rabit::Serializable {
|
|||||||
*/
|
*/
|
||||||
virtual void BoostOneIter(int iter,
|
virtual void BoostOneIter(int iter,
|
||||||
DMatrix* train,
|
DMatrix* train,
|
||||||
std::vector<bst_gpair>* in_gpair) = 0;
|
HostDeviceVector<bst_gpair>* in_gpair) = 0;
|
||||||
/*!
|
/*!
|
||||||
* \brief evaluate the model for specific iteration using the configured metrics.
|
* \brief evaluate the model for specific iteration using the configured metrics.
|
||||||
* \param iter iteration number
|
* \param iter iteration number
|
||||||
@ -109,7 +109,7 @@ class Learner : public rabit::Serializable {
|
|||||||
*/
|
*/
|
||||||
virtual void Predict(DMatrix* data,
|
virtual void Predict(DMatrix* data,
|
||||||
bool output_margin,
|
bool output_margin,
|
||||||
std::vector<bst_float> *out_preds,
|
HostDeviceVector<bst_float> *out_preds,
|
||||||
unsigned ntree_limit = 0,
|
unsigned ntree_limit = 0,
|
||||||
bool pred_leaf = false,
|
bool pred_leaf = false,
|
||||||
bool pred_contribs = false,
|
bool pred_contribs = false,
|
||||||
@ -169,7 +169,7 @@ class Learner : public rabit::Serializable {
|
|||||||
*/
|
*/
|
||||||
inline void Predict(const SparseBatch::Inst &inst,
|
inline void Predict(const SparseBatch::Inst &inst,
|
||||||
bool output_margin,
|
bool output_margin,
|
||||||
std::vector<bst_float> *out_preds,
|
HostDeviceVector<bst_float> *out_preds,
|
||||||
unsigned ntree_limit = 0) const;
|
unsigned ntree_limit = 0) const;
|
||||||
/*!
|
/*!
|
||||||
* \brief Create a new instance of learner.
|
* \brief Create a new instance of learner.
|
||||||
@ -192,9 +192,9 @@ class Learner : public rabit::Serializable {
|
|||||||
// implementation of inline functions.
|
// implementation of inline functions.
|
||||||
inline void Learner::Predict(const SparseBatch::Inst& inst,
|
inline void Learner::Predict(const SparseBatch::Inst& inst,
|
||||||
bool output_margin,
|
bool output_margin,
|
||||||
std::vector<bst_float>* out_preds,
|
HostDeviceVector<bst_float>* out_preds,
|
||||||
unsigned ntree_limit) const {
|
unsigned ntree_limit) const {
|
||||||
gbm_->PredictInstance(inst, out_preds, ntree_limit);
|
gbm_->PredictInstance(inst, &out_preds->data_h(), ntree_limit);
|
||||||
if (!output_margin) {
|
if (!output_margin) {
|
||||||
obj_->PredTransform(out_preds);
|
obj_->PredTransform(out_preds);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -44,14 +44,10 @@ class ObjFunction {
|
|||||||
* \param iteration current iteration number.
|
* \param iteration current iteration number.
|
||||||
* \param out_gpair output of get gradient, saves gradient and second order gradient in
|
* \param out_gpair output of get gradient, saves gradient and second order gradient in
|
||||||
*/
|
*/
|
||||||
virtual void GetGradient(const std::vector<bst_float>& preds,
|
|
||||||
const MetaInfo& info,
|
|
||||||
int iteration,
|
|
||||||
std::vector<bst_gpair>* out_gpair) = 0;
|
|
||||||
virtual void GetGradient(HostDeviceVector<bst_float>* preds,
|
virtual void GetGradient(HostDeviceVector<bst_float>* preds,
|
||||||
const MetaInfo& info,
|
const MetaInfo& info,
|
||||||
int iteration,
|
int iteration,
|
||||||
HostDeviceVector<bst_gpair>* out_gpair);
|
HostDeviceVector<bst_gpair>* out_gpair) = 0;
|
||||||
|
|
||||||
/*! \return the default evaluation metric for the objective */
|
/*! \return the default evaluation metric for the objective */
|
||||||
virtual const char* DefaultEvalMetric() const = 0;
|
virtual const char* DefaultEvalMetric() const = 0;
|
||||||
@ -60,17 +56,13 @@ class ObjFunction {
|
|||||||
* \brief transform prediction values, this is only called when Prediction is called
|
* \brief transform prediction values, this is only called when Prediction is called
|
||||||
* \param io_preds prediction values, saves to this vector as well
|
* \param io_preds prediction values, saves to this vector as well
|
||||||
*/
|
*/
|
||||||
virtual void PredTransform(std::vector<bst_float> *io_preds) {}
|
virtual void PredTransform(HostDeviceVector<bst_float> *io_preds) {}
|
||||||
virtual void PredTransform(HostDeviceVector<bst_float> *io_preds);
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief transform prediction values, this is only called when Eval is called,
|
* \brief transform prediction values, this is only called when Eval is called,
|
||||||
* usually it redirect to PredTransform
|
* usually it redirect to PredTransform
|
||||||
* \param io_preds prediction values, saves to this vector as well
|
* \param io_preds prediction values, saves to this vector as well
|
||||||
*/
|
*/
|
||||||
virtual void EvalTransform(std::vector<bst_float> *io_preds) {
|
|
||||||
this->PredTransform(io_preds);
|
|
||||||
}
|
|
||||||
virtual void EvalTransform(HostDeviceVector<bst_float> *io_preds) {
|
virtual void EvalTransform(HostDeviceVector<bst_float> *io_preds) {
|
||||||
this->PredTransform(io_preds);
|
this->PredTransform(io_preds);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -63,22 +63,6 @@ class Predictor {
|
|||||||
* limit trees.
|
* limit trees.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
virtual void PredictBatch(DMatrix* dmat, std::vector<bst_float>* out_preds,
|
|
||||||
const gbm::GBTreeModel& model, int tree_begin,
|
|
||||||
unsigned ntree_limit = 0) = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Generate batch predictions for a given feature matrix. May use
|
|
||||||
* cached predictions if available instead of calculating from scratch.
|
|
||||||
*
|
|
||||||
* \param [in,out] dmat Feature matrix.
|
|
||||||
* \param [in,out] out_preds The output preds.
|
|
||||||
* \param model The model to predict from.
|
|
||||||
* \param tree_begin The tree begin index.
|
|
||||||
* \param ntree_limit (Optional) The ntree limit. 0 means do not
|
|
||||||
* limit trees.
|
|
||||||
*/
|
|
||||||
|
|
||||||
virtual void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
|
virtual void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
|
||||||
const gbm::GBTreeModel& model, int tree_begin,
|
const gbm::GBTreeModel& model, int tree_begin,
|
||||||
unsigned ntree_limit = 0) = 0;
|
unsigned ntree_limit = 0) = 0;
|
||||||
@ -186,41 +170,14 @@ class Predictor {
|
|||||||
static Predictor* Create(std::string name);
|
static Predictor* Create(std::string name);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
/**
|
|
||||||
* \fn bool PredictFromCache(DMatrix* dmat, std::vector<bst_float>*
|
|
||||||
* out_preds, const gbm::GBTreeModel& model, unsigned ntree_limit = 0)
|
|
||||||
*
|
|
||||||
* \brief Attempt to predict from cache.
|
|
||||||
*
|
|
||||||
* \return True if it succeeds, false if it fails.
|
|
||||||
*/
|
|
||||||
bool PredictFromCache(DMatrix* dmat, std::vector<bst_float>* out_preds,
|
|
||||||
const gbm::GBTreeModel& model,
|
|
||||||
unsigned ntree_limit = 0);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \fn void Predictor::InitOutPredictions(const MetaInfo& info,
|
|
||||||
* std::vector<bst_float>* out_preds, const gbm::GBTreeModel& model) const;
|
|
||||||
*
|
|
||||||
* \brief Init out predictions according to base margin.
|
|
||||||
*
|
|
||||||
* \param info Dmatrix info possibly containing base margin.
|
|
||||||
* \param [in,out] out_preds The out preds.
|
|
||||||
* \param model The model.
|
|
||||||
*/
|
|
||||||
void InitOutPredictions(const MetaInfo& info,
|
|
||||||
std::vector<bst_float>* out_preds,
|
|
||||||
const gbm::GBTreeModel& model) const;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \struct PredictionCacheEntry
|
* \struct PredictionCacheEntry
|
||||||
*
|
*
|
||||||
* \brief Contains pointer to input matrix and associated cached predictions.
|
* \brief Contains pointer to input matrix and associated cached predictions.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct PredictionCacheEntry {
|
struct PredictionCacheEntry {
|
||||||
std::shared_ptr<DMatrix> data;
|
std::shared_ptr<DMatrix> data;
|
||||||
std::vector<bst_float> predictions;
|
HostDeviceVector<bst_float> predictions;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -40,12 +40,9 @@ class TreeUpdater {
|
|||||||
* but maybe different random seeds, usually one tree is passed in at a time,
|
* but maybe different random seeds, usually one tree is passed in at a time,
|
||||||
* there can be multiple trees when we train random forest style model
|
* there can be multiple trees when we train random forest style model
|
||||||
*/
|
*/
|
||||||
virtual void Update(const std::vector<bst_gpair>& gpair,
|
|
||||||
DMatrix* data,
|
|
||||||
const std::vector<RegTree*>& trees) = 0;
|
|
||||||
virtual void Update(HostDeviceVector<bst_gpair>* gpair,
|
virtual void Update(HostDeviceVector<bst_gpair>* gpair,
|
||||||
DMatrix* data,
|
DMatrix* data,
|
||||||
const std::vector<RegTree*>& trees);
|
const std::vector<RegTree*>& trees) = 0;
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief determines whether updater has enough knowledge about a given dataset
|
* \brief determines whether updater has enough knowledge about a given dataset
|
||||||
@ -58,11 +55,9 @@ class TreeUpdater {
|
|||||||
* updated by the time this function returns.
|
* updated by the time this function returns.
|
||||||
*/
|
*/
|
||||||
virtual bool UpdatePredictionCache(const DMatrix* data,
|
virtual bool UpdatePredictionCache(const DMatrix* data,
|
||||||
std::vector<bst_float>* out_preds) {
|
HostDeviceVector<bst_float>* out_preds) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
virtual bool UpdatePredictionCache(const DMatrix* data,
|
|
||||||
HostDeviceVector<bst_float>* out_preds);
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief Create a tree updater given name
|
* \brief Create a tree updater given name
|
||||||
|
|||||||
@ -33,30 +33,32 @@ class MyLogistic : public ObjFunction {
|
|||||||
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||||
param_.InitAllowUnknown(args);
|
param_.InitAllowUnknown(args);
|
||||||
}
|
}
|
||||||
void GetGradient(const std::vector<bst_float> &preds,
|
void GetGradient(HostDeviceVector<bst_float> *preds,
|
||||||
const MetaInfo &info,
|
const MetaInfo &info,
|
||||||
int iter,
|
int iter,
|
||||||
std::vector<bst_gpair> *out_gpair) override {
|
HostDeviceVector<bst_gpair> *out_gpair) override {
|
||||||
out_gpair->resize(preds.size());
|
out_gpair->resize(preds->size());
|
||||||
for (size_t i = 0; i < preds.size(); ++i) {
|
std::vector<bst_float>& preds_h = preds->data_h();
|
||||||
|
std::vector<bst_gpair>& out_gpair_h = out_gpair->data_h();
|
||||||
|
for (size_t i = 0; i < preds_h.size(); ++i) {
|
||||||
bst_float w = info.GetWeight(i);
|
bst_float w = info.GetWeight(i);
|
||||||
// scale the negative examples!
|
// scale the negative examples!
|
||||||
if (info.labels[i] == 0.0f) w *= param_.scale_neg_weight;
|
if (info.labels[i] == 0.0f) w *= param_.scale_neg_weight;
|
||||||
// logistic transformation
|
// logistic transformation
|
||||||
bst_float p = 1.0f / (1.0f + std::exp(-preds[i]));
|
bst_float p = 1.0f / (1.0f + std::exp(-preds_h[i]));
|
||||||
// this is the gradient
|
// this is the gradient
|
||||||
bst_float grad = (p - info.labels[i]) * w;
|
bst_float grad = (p - info.labels[i]) * w;
|
||||||
// this is the second order gradient
|
// this is the second order gradient
|
||||||
bst_float hess = p * (1.0f - p) * w;
|
bst_float hess = p * (1.0f - p) * w;
|
||||||
out_gpair->at(i) = bst_gpair(grad, hess);
|
out_gpair_h.at(i) = bst_gpair(grad, hess);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const char* DefaultEvalMetric() const override {
|
const char* DefaultEvalMetric() const override {
|
||||||
return "error";
|
return "error";
|
||||||
}
|
}
|
||||||
void PredTransform(std::vector<bst_float> *io_preds) override {
|
void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
|
||||||
// transform margin value to probability.
|
// transform margin value to probability.
|
||||||
std::vector<bst_float> &preds = *io_preds;
|
std::vector<bst_float> &preds = io_preds->data_h();
|
||||||
for (size_t i = 0; i < preds.size(); ++i) {
|
for (size_t i = 0; i < preds.size(); ++i) {
|
||||||
preds[i] = 1.0f / (1.0f + std::exp(-preds[i]));
|
preds[i] = 1.0f / (1.0f + std::exp(-preds[i]));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -191,9 +191,9 @@ struct XGBAPIThreadLocalEntry {
|
|||||||
/*! \brief result holder for returning string pointers */
|
/*! \brief result holder for returning string pointers */
|
||||||
std::vector<const char *> ret_vec_charp;
|
std::vector<const char *> ret_vec_charp;
|
||||||
/*! \brief returning float vector. */
|
/*! \brief returning float vector. */
|
||||||
std::vector<bst_float> ret_vec_float;
|
HostDeviceVector<bst_float> ret_vec_float;
|
||||||
/*! \brief temp variable of gradient pairs. */
|
/*! \brief temp variable of gradient pairs. */
|
||||||
std::vector<bst_gpair> tmp_gpair;
|
HostDeviceVector<bst_gpair> tmp_gpair;
|
||||||
};
|
};
|
||||||
|
|
||||||
// define the threadlocal store.
|
// define the threadlocal store.
|
||||||
@ -705,14 +705,15 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
|
|||||||
bst_float *grad,
|
bst_float *grad,
|
||||||
bst_float *hess,
|
bst_float *hess,
|
||||||
xgboost::bst_ulong len) {
|
xgboost::bst_ulong len) {
|
||||||
std::vector<bst_gpair>& tmp_gpair = XGBAPIThreadLocalStore::Get()->tmp_gpair;
|
HostDeviceVector<bst_gpair>& tmp_gpair = XGBAPIThreadLocalStore::Get()->tmp_gpair;
|
||||||
API_BEGIN();
|
API_BEGIN();
|
||||||
Booster* bst = static_cast<Booster*>(handle);
|
Booster* bst = static_cast<Booster*>(handle);
|
||||||
std::shared_ptr<DMatrix>* dtr =
|
std::shared_ptr<DMatrix>* dtr =
|
||||||
static_cast<std::shared_ptr<DMatrix>*>(dtrain);
|
static_cast<std::shared_ptr<DMatrix>*>(dtrain);
|
||||||
tmp_gpair.resize(len);
|
tmp_gpair.resize(len);
|
||||||
|
std::vector<bst_gpair>& tmp_gpair_h = tmp_gpair.data_h();
|
||||||
for (xgboost::bst_ulong i = 0; i < len; ++i) {
|
for (xgboost::bst_ulong i = 0; i < len; ++i) {
|
||||||
tmp_gpair[i] = bst_gpair(grad[i], hess[i]);
|
tmp_gpair_h[i] = bst_gpair(grad[i], hess[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
bst->LazyInit();
|
bst->LazyInit();
|
||||||
@ -749,7 +750,8 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
|
|||||||
unsigned ntree_limit,
|
unsigned ntree_limit,
|
||||||
xgboost::bst_ulong *len,
|
xgboost::bst_ulong *len,
|
||||||
const bst_float **out_result) {
|
const bst_float **out_result) {
|
||||||
std::vector<bst_float>& preds = XGBAPIThreadLocalStore::Get()->ret_vec_float;
|
HostDeviceVector<bst_float>& preds =
|
||||||
|
XGBAPIThreadLocalStore::Get()->ret_vec_float;
|
||||||
API_BEGIN();
|
API_BEGIN();
|
||||||
Booster *bst = static_cast<Booster*>(handle);
|
Booster *bst = static_cast<Booster*>(handle);
|
||||||
bst->LazyInit();
|
bst->LazyInit();
|
||||||
@ -761,7 +763,7 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
|
|||||||
(option_mask & 4) != 0,
|
(option_mask & 4) != 0,
|
||||||
(option_mask & 8) != 0,
|
(option_mask & 8) != 0,
|
||||||
(option_mask & 16) != 0);
|
(option_mask & 16) != 0);
|
||||||
*out_result = dmlc::BeginPtr(preds);
|
*out_result = dmlc::BeginPtr(preds.data_h());
|
||||||
*len = static_cast<xgboost::bst_ulong>(preds.size());
|
*len = static_cast<xgboost::bst_ulong>(preds.size());
|
||||||
API_END();
|
API_END();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -324,7 +324,7 @@ void CLIPredict(const CLIParam& param) {
|
|||||||
if (param.silent == 0) {
|
if (param.silent == 0) {
|
||||||
LOG(CONSOLE) << "start prediction...";
|
LOG(CONSOLE) << "start prediction...";
|
||||||
}
|
}
|
||||||
std::vector<bst_float> preds;
|
HostDeviceVector<bst_float> preds;
|
||||||
learner->Predict(dtest.get(), param.pred_margin, &preds, param.ntree_limit);
|
learner->Predict(dtest.get(), param.pred_margin, &preds, param.ntree_limit);
|
||||||
if (param.silent == 0) {
|
if (param.silent == 0) {
|
||||||
LOG(CONSOLE) << "writing prediction to " << param.name_pred;
|
LOG(CONSOLE) << "writing prediction to " << param.name_pred;
|
||||||
@ -332,7 +332,7 @@ void CLIPredict(const CLIParam& param) {
|
|||||||
std::unique_ptr<dmlc::Stream> fo(
|
std::unique_ptr<dmlc::Stream> fo(
|
||||||
dmlc::Stream::Create(param.name_pred.c_str(), "w"));
|
dmlc::Stream::Create(param.name_pred.c_str(), "w"));
|
||||||
dmlc::ostream os(fo.get());
|
dmlc::ostream os(fo.get());
|
||||||
for (bst_float p : preds) {
|
for (bst_float p : preds.data_h()) {
|
||||||
os << p << '\n';
|
os << p << '\n';
|
||||||
}
|
}
|
||||||
// force flush before fo destruct.
|
// force flush before fo destruct.
|
||||||
|
|||||||
@ -12,13 +12,27 @@ namespace xgboost {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct HostDeviceVectorImpl {
|
struct HostDeviceVectorImpl {
|
||||||
explicit HostDeviceVectorImpl(size_t size) : data_h_(size) {}
|
explicit HostDeviceVectorImpl(size_t size, T v) : data_h_(size, v) {}
|
||||||
|
explicit HostDeviceVectorImpl(std::initializer_list<T> init) : data_h_(init) {}
|
||||||
|
explicit HostDeviceVectorImpl(const std::vector<T>& init) : data_h_(init) {}
|
||||||
std::vector<T> data_h_;
|
std::vector<T> data_h_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
HostDeviceVector<T>::HostDeviceVector(size_t size, int device) : impl_(nullptr) {
|
HostDeviceVector<T>::HostDeviceVector(size_t size, T v, int device) : impl_(nullptr) {
|
||||||
impl_ = new HostDeviceVectorImpl<T>(size);
|
impl_ = new HostDeviceVectorImpl<T>(size, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
HostDeviceVector<T>::HostDeviceVector(std::initializer_list<T> init, int device)
|
||||||
|
: impl_(nullptr) {
|
||||||
|
impl_ = new HostDeviceVectorImpl<T>(init);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
HostDeviceVector<T>::HostDeviceVector(const std::vector<T>& init, int device)
|
||||||
|
: impl_(nullptr) {
|
||||||
|
impl_ = new HostDeviceVectorImpl<T>(init);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -41,8 +55,8 @@ template <typename T>
|
|||||||
std::vector<T>& HostDeviceVector<T>::data_h() { return impl_->data_h_; }
|
std::vector<T>& HostDeviceVector<T>::data_h() { return impl_->data_h_; }
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void HostDeviceVector<T>::resize(size_t new_size, int new_device) {
|
void HostDeviceVector<T>::resize(size_t new_size, T v, int new_device) {
|
||||||
impl_->data_h_.resize(new_size);
|
impl_->data_h_.resize(new_size, v);
|
||||||
}
|
}
|
||||||
|
|
||||||
// explicit instantiations are required, as HostDeviceVector isn't header-only
|
// explicit instantiations are required, as HostDeviceVector isn't header-only
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
/*!
|
/*!
|
||||||
* Copyright 2017 XGBoost contributors
|
* Copyright 2017 XGBoost contributors
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "./host_device_vector.h"
|
#include "./host_device_vector.h"
|
||||||
#include "./device_helpers.cuh"
|
#include "./device_helpers.cuh"
|
||||||
|
|
||||||
@ -8,13 +9,25 @@ namespace xgboost {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct HostDeviceVectorImpl {
|
struct HostDeviceVectorImpl {
|
||||||
HostDeviceVectorImpl(size_t size, int device)
|
HostDeviceVectorImpl(size_t size, T v, int device)
|
||||||
: device_(device), on_d_(device >= 0) {
|
: device_(device), on_d_(device >= 0) {
|
||||||
if (on_d_) {
|
if (on_d_) {
|
||||||
dh::safe_cuda(cudaSetDevice(device_));
|
dh::safe_cuda(cudaSetDevice(device_));
|
||||||
data_d_.resize(size);
|
data_d_.resize(size, v);
|
||||||
} else {
|
} else {
|
||||||
data_h_.resize(size);
|
data_h_.resize(size, v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Init can be std::vector<T> or std::initializer_list<T>
|
||||||
|
template <class Init>
|
||||||
|
HostDeviceVectorImpl(const Init& init, int device)
|
||||||
|
: device_(device), on_d_(device >= 0) {
|
||||||
|
if (on_d_) {
|
||||||
|
dh::safe_cuda(cudaSetDevice(device_));
|
||||||
|
data_d_.resize(init.size());
|
||||||
|
thrust::copy(init.begin(), init.end(), data_d_.begin());
|
||||||
|
} else {
|
||||||
|
data_h_ = init;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
HostDeviceVectorImpl(const HostDeviceVectorImpl<T>&) = delete;
|
HostDeviceVectorImpl(const HostDeviceVectorImpl<T>&) = delete;
|
||||||
@ -41,17 +54,18 @@ struct HostDeviceVectorImpl {
|
|||||||
lazy_sync_host();
|
lazy_sync_host();
|
||||||
return data_h_;
|
return data_h_;
|
||||||
}
|
}
|
||||||
void resize(size_t new_size, int new_device) {
|
void resize(size_t new_size, T v, int new_device) {
|
||||||
if (new_size == this->size() && new_device == device_)
|
if (new_size == this->size() && new_device == device_)
|
||||||
return;
|
return;
|
||||||
device_ = new_device;
|
if (new_device != -1)
|
||||||
|
device_ = new_device;
|
||||||
// if !on_d_, but the data size is 0 and the device is set,
|
// if !on_d_, but the data size is 0 and the device is set,
|
||||||
// resize the data on device instead
|
// resize the data on device instead
|
||||||
if (!on_d_ && (data_h_.size() > 0 || device_ == -1)) {
|
if (!on_d_ && (data_h_.size() > 0 || device_ == -1)) {
|
||||||
data_h_.resize(new_size);
|
data_h_.resize(new_size, v);
|
||||||
} else {
|
} else {
|
||||||
dh::safe_cuda(cudaSetDevice(device_));
|
dh::safe_cuda(cudaSetDevice(device_));
|
||||||
data_d_.resize(new_size);
|
data_d_.resize(new_size, v);
|
||||||
on_d_ = true;
|
on_d_ = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -90,8 +104,20 @@ struct HostDeviceVectorImpl {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
HostDeviceVector<T>::HostDeviceVector(size_t size, int device) : impl_(nullptr) {
|
HostDeviceVector<T>::HostDeviceVector(size_t size, T v, int device) : impl_(nullptr) {
|
||||||
impl_ = new HostDeviceVectorImpl<T>(size, device);
|
impl_ = new HostDeviceVectorImpl<T>(size, v, device);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
HostDeviceVector<T>::HostDeviceVector(std::initializer_list<T> init, int device)
|
||||||
|
: impl_(nullptr) {
|
||||||
|
impl_ = new HostDeviceVectorImpl<T>(init, device);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
HostDeviceVector<T>::HostDeviceVector(const std::vector<T>& init, int device)
|
||||||
|
: impl_(nullptr) {
|
||||||
|
impl_ = new HostDeviceVectorImpl<T>(init, device);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -124,8 +150,8 @@ template <typename T>
|
|||||||
std::vector<T>& HostDeviceVector<T>::data_h() { return impl_->data_h(); }
|
std::vector<T>& HostDeviceVector<T>::data_h() { return impl_->data_h(); }
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void HostDeviceVector<T>::resize(size_t new_size, int new_device) {
|
void HostDeviceVector<T>::resize(size_t new_size, T v, int new_device) {
|
||||||
impl_->resize(new_size, new_device);
|
impl_->resize(new_size, v, new_device);
|
||||||
}
|
}
|
||||||
|
|
||||||
// explicit instantiations are required, as HostDeviceVector isn't header-only
|
// explicit instantiations are required, as HostDeviceVector isn't header-only
|
||||||
|
|||||||
@ -5,6 +5,7 @@
|
|||||||
#define XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_
|
#define XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_
|
||||||
|
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include <initializer_list>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
// only include thrust-related files if host_device_vector.h
|
// only include thrust-related files if host_device_vector.h
|
||||||
@ -61,7 +62,9 @@ template <typename T> struct HostDeviceVectorImpl;
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
class HostDeviceVector {
|
class HostDeviceVector {
|
||||||
public:
|
public:
|
||||||
explicit HostDeviceVector(size_t size = 0, int device = -1);
|
explicit HostDeviceVector(size_t size = 0, T v = T(), int device = -1);
|
||||||
|
HostDeviceVector(std::initializer_list<T> init, int device = -1);
|
||||||
|
explicit HostDeviceVector(const std::vector<T>& init, int device = -1);
|
||||||
~HostDeviceVector();
|
~HostDeviceVector();
|
||||||
HostDeviceVector(const HostDeviceVector<T>&) = delete;
|
HostDeviceVector(const HostDeviceVector<T>&) = delete;
|
||||||
HostDeviceVector(HostDeviceVector<T>&&) = delete;
|
HostDeviceVector(HostDeviceVector<T>&&) = delete;
|
||||||
@ -70,6 +73,7 @@ class HostDeviceVector {
|
|||||||
size_t size() const;
|
size_t size() const;
|
||||||
int device() const;
|
int device() const;
|
||||||
T* ptr_d(int device);
|
T* ptr_d(int device);
|
||||||
|
T* ptr_h() { return data_h().data(); }
|
||||||
|
|
||||||
// only define functions returning device_ptr
|
// only define functions returning device_ptr
|
||||||
// if HostDeviceVector.h is included from a .cu file
|
// if HostDeviceVector.h is included from a .cu file
|
||||||
@ -79,17 +83,9 @@ class HostDeviceVector {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
std::vector<T>& data_h();
|
std::vector<T>& data_h();
|
||||||
void resize(size_t new_size, int new_device);
|
|
||||||
|
|
||||||
// helper functions in case a function needs to be templated
|
// passing in new_device == -1 keeps the device as is
|
||||||
// to work for both HostDeviceVector and std::vector
|
void resize(size_t new_size, T v = T(), int new_device = -1);
|
||||||
static std::vector<T>& data_h(HostDeviceVector<T>* v) {
|
|
||||||
return v->data_h();
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::vector<T>& data_h(std::vector<T>* v) {
|
|
||||||
return *v;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
HostDeviceVectorImpl<T>* impl_;
|
HostDeviceVectorImpl<T>* impl_;
|
||||||
|
|||||||
@ -76,8 +76,10 @@ class GBLinear : public GradientBooster {
|
|||||||
void Save(dmlc::Stream* fo) const override {
|
void Save(dmlc::Stream* fo) const override {
|
||||||
model.Save(fo);
|
model.Save(fo);
|
||||||
}
|
}
|
||||||
void DoBoost(DMatrix *p_fmat, std::vector<bst_gpair> *in_gpair,
|
|
||||||
ObjFunction *obj) override {
|
void DoBoost(DMatrix *p_fmat,
|
||||||
|
HostDeviceVector<bst_gpair> *in_gpair,
|
||||||
|
ObjFunction* obj) override {
|
||||||
monitor.Start("DoBoost");
|
monitor.Start("DoBoost");
|
||||||
|
|
||||||
if (!p_fmat->HaveColAccess(false)) {
|
if (!p_fmat->HaveColAccess(false)) {
|
||||||
@ -91,14 +93,15 @@ class GBLinear : public GradientBooster {
|
|||||||
this->LazySumWeights(p_fmat);
|
this->LazySumWeights(p_fmat);
|
||||||
|
|
||||||
if (!this->CheckConvergence()) {
|
if (!this->CheckConvergence()) {
|
||||||
updater->Update(in_gpair, p_fmat, &model, sum_instance_weight);
|
updater->Update(&in_gpair->data_h(), p_fmat, &model, sum_instance_weight);
|
||||||
}
|
}
|
||||||
this->UpdatePredictionCache();
|
this->UpdatePredictionCache();
|
||||||
|
|
||||||
monitor.Stop("DoBoost");
|
monitor.Stop("DoBoost");
|
||||||
}
|
}
|
||||||
|
|
||||||
void PredictBatch(DMatrix *p_fmat, std::vector<bst_float> *out_preds,
|
void PredictBatch(DMatrix *p_fmat,
|
||||||
|
HostDeviceVector<bst_float> *out_preds,
|
||||||
unsigned ntree_limit) override {
|
unsigned ntree_limit) override {
|
||||||
monitor.Start("PredictBatch");
|
monitor.Start("PredictBatch");
|
||||||
CHECK_EQ(ntree_limit, 0U)
|
CHECK_EQ(ntree_limit, 0U)
|
||||||
@ -109,9 +112,9 @@ class GBLinear : public GradientBooster {
|
|||||||
if (it != cache_.end() && it->second.predictions.size() != 0) {
|
if (it != cache_.end() && it->second.predictions.size() != 0) {
|
||||||
std::vector<bst_float> &y = it->second.predictions;
|
std::vector<bst_float> &y = it->second.predictions;
|
||||||
out_preds->resize(y.size());
|
out_preds->resize(y.size());
|
||||||
std::copy(y.begin(), y.end(), out_preds->begin());
|
std::copy(y.begin(), y.end(), out_preds->data_h().begin());
|
||||||
} else {
|
} else {
|
||||||
this->PredictBatchInternal(p_fmat, out_preds);
|
this->PredictBatchInternal(p_fmat, &out_preds->data_h());
|
||||||
}
|
}
|
||||||
monitor.Stop("PredictBatch");
|
monitor.Stop("PredictBatch");
|
||||||
}
|
}
|
||||||
|
|||||||
@ -22,18 +22,6 @@ GradientBooster* GradientBooster::Create(
|
|||||||
return (e->body)(cache_mats, base_margin);
|
return (e->body)(cache_mats, base_margin);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GradientBooster::DoBoost(DMatrix* p_fmat,
|
|
||||||
HostDeviceVector<bst_gpair>* in_gpair,
|
|
||||||
ObjFunction* obj) {
|
|
||||||
DoBoost(p_fmat, &in_gpair->data_h(), obj);
|
|
||||||
}
|
|
||||||
|
|
||||||
void GradientBooster::PredictBatch(DMatrix* dmat,
|
|
||||||
HostDeviceVector<bst_float>* out_preds,
|
|
||||||
unsigned ntree_limit) {
|
|
||||||
PredictBatch(dmat, &out_preds->data_h(), ntree_limit);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|||||||
@ -180,22 +180,39 @@ class GBTree : public GradientBooster {
|
|||||||
tparam.updater_seq.find("distcol") != std::string::npos;
|
tparam.updater_seq.find("distcol") != std::string::npos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DoBoost(DMatrix* p_fmat,
|
|
||||||
std::vector<bst_gpair>* in_gpair,
|
|
||||||
ObjFunction* obj) override {
|
|
||||||
DoBoostHelper(p_fmat, in_gpair, obj);
|
|
||||||
}
|
|
||||||
|
|
||||||
void DoBoost(DMatrix* p_fmat,
|
void DoBoost(DMatrix* p_fmat,
|
||||||
HostDeviceVector<bst_gpair>* in_gpair,
|
HostDeviceVector<bst_gpair>* in_gpair,
|
||||||
ObjFunction* obj) override {
|
ObjFunction* obj) override {
|
||||||
DoBoostHelper(p_fmat, in_gpair, obj);
|
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
|
||||||
}
|
const int ngroup = model_.param.num_output_group;
|
||||||
|
monitor.Start("BoostNewTrees");
|
||||||
void PredictBatch(DMatrix* p_fmat,
|
if (ngroup == 1) {
|
||||||
std::vector<bst_float>* out_preds,
|
std::vector<std::unique_ptr<RegTree> > ret;
|
||||||
unsigned ntree_limit) override {
|
BoostNewTrees(in_gpair, p_fmat, 0, &ret);
|
||||||
predictor->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
|
new_trees.push_back(std::move(ret));
|
||||||
|
} else {
|
||||||
|
CHECK_EQ(in_gpair->size() % ngroup, 0U)
|
||||||
|
<< "must have exactly ngroup*nrow gpairs";
|
||||||
|
// TODO(canonizer): perform this on GPU if HostDeviceVector has device set.
|
||||||
|
HostDeviceVector<bst_gpair> tmp(in_gpair->size() / ngroup,
|
||||||
|
bst_gpair(), in_gpair->device());
|
||||||
|
std::vector<bst_gpair>& gpair_h = in_gpair->data_h();
|
||||||
|
bst_omp_uint nsize = static_cast<bst_omp_uint>(tmp.size());
|
||||||
|
for (int gid = 0; gid < ngroup; ++gid) {
|
||||||
|
std::vector<bst_gpair>& tmp_h = tmp.data_h();
|
||||||
|
#pragma omp parallel for schedule(static)
|
||||||
|
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||||
|
tmp_h[i] = gpair_h[i * ngroup + gid];
|
||||||
|
}
|
||||||
|
std::vector<std::unique_ptr<RegTree> > ret;
|
||||||
|
BoostNewTrees(&tmp, p_fmat, gid, &ret);
|
||||||
|
new_trees.push_back(std::move(ret));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
monitor.Stop("BoostNewTrees");
|
||||||
|
monitor.Start("CommitModel");
|
||||||
|
this->CommitModel(std::move(new_trees));
|
||||||
|
monitor.Stop("CommitModel");
|
||||||
}
|
}
|
||||||
|
|
||||||
void PredictBatch(DMatrix* p_fmat,
|
void PredictBatch(DMatrix* p_fmat,
|
||||||
@ -251,48 +268,11 @@ class GBTree : public GradientBooster {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TVec is either std::vector<bst_gpair> or HostDeviceVector<bst_gpair>
|
|
||||||
template <typename TVec>
|
|
||||||
void DoBoostHelper(DMatrix* p_fmat,
|
|
||||||
TVec* in_gpair,
|
|
||||||
ObjFunction* obj) {
|
|
||||||
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
|
|
||||||
const int ngroup = model_.param.num_output_group;
|
|
||||||
monitor.Start("BoostNewTrees");
|
|
||||||
if (ngroup == 1) {
|
|
||||||
std::vector<std::unique_ptr<RegTree> > ret;
|
|
||||||
BoostNewTrees(in_gpair, p_fmat, 0, &ret);
|
|
||||||
new_trees.push_back(std::move(ret));
|
|
||||||
} else {
|
|
||||||
CHECK_EQ(in_gpair->size() % ngroup, 0U)
|
|
||||||
<< "must have exactly ngroup*nrow gpairs";
|
|
||||||
std::vector<bst_gpair> tmp(in_gpair->size() / ngroup);
|
|
||||||
auto& gpair_h = HostDeviceVector<bst_gpair>::data_h(in_gpair);
|
|
||||||
for (int gid = 0; gid < ngroup; ++gid) {
|
|
||||||
bst_omp_uint nsize = static_cast<bst_omp_uint>(tmp.size());
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
|
||||||
tmp[i] = gpair_h[i * ngroup + gid];
|
|
||||||
}
|
|
||||||
std::vector<std::unique_ptr<RegTree> > ret;
|
|
||||||
BoostNewTrees(&tmp, p_fmat, gid, &ret);
|
|
||||||
new_trees.push_back(std::move(ret));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
monitor.Stop("BoostNewTrees");
|
|
||||||
monitor.Start("CommitModel");
|
|
||||||
this->CommitModel(std::move(new_trees));
|
|
||||||
monitor.Stop("CommitModel");
|
|
||||||
}
|
|
||||||
|
|
||||||
// do group specific group
|
// do group specific group
|
||||||
// TVec is either const std::vector<bst_gpair> or HostDeviceVector<bst_gpair>
|
inline void BoostNewTrees(HostDeviceVector<bst_gpair>* gpair,
|
||||||
template <typename TVec>
|
DMatrix *p_fmat,
|
||||||
inline void
|
int bst_group,
|
||||||
BoostNewTrees(TVec* gpair,
|
std::vector<std::unique_ptr<RegTree> >* ret) {
|
||||||
DMatrix *p_fmat,
|
|
||||||
int bst_group,
|
|
||||||
std::vector<std::unique_ptr<RegTree> >* ret) {
|
|
||||||
this->InitUpdater();
|
this->InitUpdater();
|
||||||
std::vector<RegTree*> new_trees;
|
std::vector<RegTree*> new_trees;
|
||||||
ret->clear();
|
ret->clear();
|
||||||
@ -315,23 +295,8 @@ class GBTree : public GradientBooster {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// update the trees
|
// update the trees
|
||||||
for (auto& up : updaters) {
|
for (auto& up : updaters)
|
||||||
UpdateHelper(up.get(), gpair, p_fmat, new_trees);
|
up->Update(gpair, p_fmat, new_trees);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void UpdateHelper(TreeUpdater* updater,
|
|
||||||
std::vector<bst_gpair>* gpair,
|
|
||||||
DMatrix *p_fmat,
|
|
||||||
const std::vector<RegTree*>& new_trees) {
|
|
||||||
updater->Update(*gpair, p_fmat, new_trees);
|
|
||||||
}
|
|
||||||
|
|
||||||
void UpdateHelper(TreeUpdater* updater,
|
|
||||||
HostDeviceVector<bst_gpair>* gpair,
|
|
||||||
DMatrix *p_fmat,
|
|
||||||
const std::vector<RegTree*>& new_trees) {
|
|
||||||
updater->Update(gpair, p_fmat, new_trees);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// commit new trees all at once
|
// commit new trees all at once
|
||||||
@ -389,10 +354,10 @@ class Dart : public GBTree {
|
|||||||
|
|
||||||
// predict the leaf scores with dropout if ntree_limit = 0
|
// predict the leaf scores with dropout if ntree_limit = 0
|
||||||
void PredictBatch(DMatrix* p_fmat,
|
void PredictBatch(DMatrix* p_fmat,
|
||||||
std::vector<bst_float>* out_preds,
|
HostDeviceVector<bst_float>* out_preds,
|
||||||
unsigned ntree_limit) override {
|
unsigned ntree_limit) override {
|
||||||
DropTrees(ntree_limit);
|
DropTrees(ntree_limit);
|
||||||
PredLoopInternal<Dart>(p_fmat, out_preds, 0, ntree_limit, true);
|
PredLoopInternal<Dart>(p_fmat, &out_preds->data_h(), 0, ntree_limit, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PredictInstance(const SparseBatch::Inst& inst,
|
void PredictInstance(const SparseBatch::Inst& inst,
|
||||||
|
|||||||
@ -362,17 +362,17 @@ class LearnerImpl : public Learner {
|
|||||||
}
|
}
|
||||||
this->LazyInitDMatrix(train);
|
this->LazyInitDMatrix(train);
|
||||||
monitor.Start("PredictRaw");
|
monitor.Start("PredictRaw");
|
||||||
this->PredictRaw(train, &preds2_);
|
this->PredictRaw(train, &preds_);
|
||||||
monitor.Stop("PredictRaw");
|
monitor.Stop("PredictRaw");
|
||||||
monitor.Start("GetGradient");
|
monitor.Start("GetGradient");
|
||||||
obj_->GetGradient(&preds2_, train->info(), iter, &gpair_);
|
obj_->GetGradient(&preds_, train->info(), iter, &gpair_);
|
||||||
monitor.Stop("GetGradient");
|
monitor.Stop("GetGradient");
|
||||||
gbm_->DoBoost(train, &gpair_, obj_.get());
|
gbm_->DoBoost(train, &gpair_, obj_.get());
|
||||||
monitor.Stop("UpdateOneIter");
|
monitor.Stop("UpdateOneIter");
|
||||||
}
|
}
|
||||||
|
|
||||||
void BoostOneIter(int iter, DMatrix* train,
|
void BoostOneIter(int iter, DMatrix* train,
|
||||||
std::vector<bst_gpair>* in_gpair) override {
|
HostDeviceVector<bst_gpair>* in_gpair) override {
|
||||||
monitor.Start("BoostOneIter");
|
monitor.Start("BoostOneIter");
|
||||||
if (tparam.seed_per_iteration || rabit::IsDistributed()) {
|
if (tparam.seed_per_iteration || rabit::IsDistributed()) {
|
||||||
common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter);
|
common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter);
|
||||||
@ -395,7 +395,7 @@ class LearnerImpl : public Learner {
|
|||||||
obj_->EvalTransform(&preds_);
|
obj_->EvalTransform(&preds_);
|
||||||
for (auto& ev : metrics_) {
|
for (auto& ev : metrics_) {
|
||||||
os << '\t' << data_names[i] << '-' << ev->Name() << ':'
|
os << '\t' << data_names[i] << '-' << ev->Name() << ':'
|
||||||
<< ev->Eval(preds_, data_sets[i]->info(), tparam.dsplit == 2);
|
<< ev->Eval(preds_.data_h(), data_sets[i]->info(), tparam.dsplit == 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -438,19 +438,20 @@ class LearnerImpl : public Learner {
|
|||||||
this->PredictRaw(data, &preds_);
|
this->PredictRaw(data, &preds_);
|
||||||
obj_->EvalTransform(&preds_);
|
obj_->EvalTransform(&preds_);
|
||||||
return std::make_pair(metric,
|
return std::make_pair(metric,
|
||||||
ev->Eval(preds_, data->info(), tparam.dsplit == 2));
|
ev->Eval(preds_.data_h(), data->info(), tparam.dsplit == 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Predict(DMatrix* data, bool output_margin,
|
void Predict(DMatrix* data, bool output_margin,
|
||||||
std::vector<bst_float>* out_preds, unsigned ntree_limit,
|
HostDeviceVector<bst_float>* out_preds, unsigned ntree_limit,
|
||||||
bool pred_leaf, bool pred_contribs, bool approx_contribs,
|
bool pred_leaf, bool pred_contribs, bool approx_contribs,
|
||||||
bool pred_interactions) const override {
|
bool pred_interactions) const override {
|
||||||
if (pred_contribs) {
|
if (pred_contribs) {
|
||||||
gbm_->PredictContribution(data, out_preds, ntree_limit, approx_contribs);
|
gbm_->PredictContribution(data, &out_preds->data_h(), ntree_limit, approx_contribs);
|
||||||
} else if (pred_interactions) {
|
} else if (pred_interactions) {
|
||||||
gbm_->PredictInteractionContributions(data, out_preds, ntree_limit, approx_contribs);
|
gbm_->PredictInteractionContributions(data, &out_preds->data_h(), ntree_limit,
|
||||||
|
approx_contribs);
|
||||||
} else if (pred_leaf) {
|
} else if (pred_leaf) {
|
||||||
gbm_->PredictLeaf(data, out_preds, ntree_limit);
|
gbm_->PredictLeaf(data, &out_preds->data_h(), ntree_limit);
|
||||||
} else {
|
} else {
|
||||||
this->PredictRaw(data, out_preds, ntree_limit);
|
this->PredictRaw(data, out_preds, ntree_limit);
|
||||||
if (!output_margin) {
|
if (!output_margin) {
|
||||||
@ -546,12 +547,6 @@ class LearnerImpl : public Learner {
|
|||||||
* \param ntree_limit limit number of trees used for boosted tree
|
* \param ntree_limit limit number of trees used for boosted tree
|
||||||
* predictor, when it equals 0, this means we are using all the trees
|
* predictor, when it equals 0, this means we are using all the trees
|
||||||
*/
|
*/
|
||||||
inline void PredictRaw(DMatrix* data, std::vector<bst_float>* out_preds,
|
|
||||||
unsigned ntree_limit = 0) const {
|
|
||||||
CHECK(gbm_.get() != nullptr)
|
|
||||||
<< "Predict must happen after Load or InitModel";
|
|
||||||
gbm_->PredictBatch(data, out_preds, ntree_limit);
|
|
||||||
}
|
|
||||||
inline void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds,
|
inline void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds,
|
||||||
unsigned ntree_limit = 0) const {
|
unsigned ntree_limit = 0) const {
|
||||||
CHECK(gbm_.get() != nullptr)
|
CHECK(gbm_.get() != nullptr)
|
||||||
@ -572,8 +567,7 @@ class LearnerImpl : public Learner {
|
|||||||
// name of objective function
|
// name of objective function
|
||||||
std::string name_obj_;
|
std::string name_obj_;
|
||||||
// temporal storages for prediction
|
// temporal storages for prediction
|
||||||
std::vector<bst_float> preds_;
|
HostDeviceVector<bst_float> preds_;
|
||||||
HostDeviceVector<bst_float> preds2_;
|
|
||||||
// gradient pairs
|
// gradient pairs
|
||||||
HostDeviceVector<bst_gpair> gpair_;
|
HostDeviceVector<bst_gpair> gpair_;
|
||||||
|
|
||||||
|
|||||||
@ -35,16 +35,18 @@ class SoftmaxMultiClassObj : public ObjFunction {
|
|||||||
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||||
param_.InitAllowUnknown(args);
|
param_.InitAllowUnknown(args);
|
||||||
}
|
}
|
||||||
void GetGradient(const std::vector<bst_float>& preds,
|
void GetGradient(HostDeviceVector<bst_float>* preds,
|
||||||
const MetaInfo& info,
|
const MetaInfo& info,
|
||||||
int iter,
|
int iter,
|
||||||
std::vector<bst_gpair>* out_gpair) override {
|
HostDeviceVector<bst_gpair>* out_gpair) override {
|
||||||
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
|
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
|
||||||
CHECK(preds.size() == (static_cast<size_t>(param_.num_class) * info.labels.size()))
|
CHECK(preds->size() == (static_cast<size_t>(param_.num_class) * info.labels.size()))
|
||||||
<< "SoftmaxMultiClassObj: label size and pred size does not match";
|
<< "SoftmaxMultiClassObj: label size and pred size does not match";
|
||||||
out_gpair->resize(preds.size());
|
std::vector<bst_float>& preds_h = preds->data_h();
|
||||||
|
out_gpair->resize(preds_h.size());
|
||||||
|
std::vector<bst_gpair>& gpair = out_gpair->data_h();
|
||||||
const int nclass = param_.num_class;
|
const int nclass = param_.num_class;
|
||||||
const omp_ulong ndata = static_cast<omp_ulong>(preds.size() / nclass);
|
const omp_ulong ndata = static_cast<omp_ulong>(preds_h.size() / nclass);
|
||||||
|
|
||||||
int label_error = 0;
|
int label_error = 0;
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
@ -53,7 +55,7 @@ class SoftmaxMultiClassObj : public ObjFunction {
|
|||||||
#pragma omp for schedule(static)
|
#pragma omp for schedule(static)
|
||||||
for (omp_ulong i = 0; i < ndata; ++i) {
|
for (omp_ulong i = 0; i < ndata; ++i) {
|
||||||
for (int k = 0; k < nclass; ++k) {
|
for (int k = 0; k < nclass; ++k) {
|
||||||
rec[k] = preds[i * nclass + k];
|
rec[k] = preds_h[i * nclass + k];
|
||||||
}
|
}
|
||||||
common::Softmax(&rec);
|
common::Softmax(&rec);
|
||||||
int label = static_cast<int>(info.labels[i]);
|
int label = static_cast<int>(info.labels[i]);
|
||||||
@ -65,9 +67,9 @@ class SoftmaxMultiClassObj : public ObjFunction {
|
|||||||
bst_float p = rec[k];
|
bst_float p = rec[k];
|
||||||
const bst_float h = 2.0f * p * (1.0f - p) * wt;
|
const bst_float h = 2.0f * p * (1.0f - p) * wt;
|
||||||
if (label == k) {
|
if (label == k) {
|
||||||
(*out_gpair)[i * nclass + k] = bst_gpair((p - 1.0f) * wt, h);
|
gpair[i * nclass + k] = bst_gpair((p - 1.0f) * wt, h);
|
||||||
} else {
|
} else {
|
||||||
(*out_gpair)[i * nclass + k] = bst_gpair(p* wt, h);
|
gpair[i * nclass + k] = bst_gpair(p* wt, h);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -77,10 +79,10 @@ class SoftmaxMultiClassObj : public ObjFunction {
|
|||||||
<< " num_class=" << nclass
|
<< " num_class=" << nclass
|
||||||
<< " but found " << label_error << " in label.";
|
<< " but found " << label_error << " in label.";
|
||||||
}
|
}
|
||||||
void PredTransform(std::vector<bst_float>* io_preds) override {
|
void PredTransform(HostDeviceVector<bst_float>* io_preds) override {
|
||||||
this->Transform(io_preds, output_prob_);
|
this->Transform(io_preds, output_prob_);
|
||||||
}
|
}
|
||||||
void EvalTransform(std::vector<bst_float>* io_preds) override {
|
void EvalTransform(HostDeviceVector<bst_float>* io_preds) override {
|
||||||
this->Transform(io_preds, true);
|
this->Transform(io_preds, true);
|
||||||
}
|
}
|
||||||
const char* DefaultEvalMetric() const override {
|
const char* DefaultEvalMetric() const override {
|
||||||
@ -88,8 +90,8 @@ class SoftmaxMultiClassObj : public ObjFunction {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
inline void Transform(std::vector<bst_float> *io_preds, bool prob) {
|
inline void Transform(HostDeviceVector<bst_float> *io_preds, bool prob) {
|
||||||
std::vector<bst_float> &preds = *io_preds;
|
std::vector<bst_float> &preds = io_preds->data_h();
|
||||||
std::vector<bst_float> tmp;
|
std::vector<bst_float> tmp;
|
||||||
const int nclass = param_.num_class;
|
const int nclass = param_.num_class;
|
||||||
const omp_ulong ndata = static_cast<omp_ulong>(preds.size() / nclass);
|
const omp_ulong ndata = static_cast<omp_ulong>(preds.size() / nclass);
|
||||||
|
|||||||
@ -25,17 +25,6 @@ ObjFunction* ObjFunction::Create(const std::string& name) {
|
|||||||
return (e->body)();
|
return (e->body)();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ObjFunction::GetGradient(HostDeviceVector<bst_float>* preds,
|
|
||||||
const MetaInfo& info,
|
|
||||||
int iteration,
|
|
||||||
HostDeviceVector<bst_gpair>* out_gpair) {
|
|
||||||
GetGradient(preds->data_h(), info, iteration, &out_gpair->data_h());
|
|
||||||
}
|
|
||||||
|
|
||||||
void ObjFunction::PredTransform(HostDeviceVector<bst_float> *io_preds) {
|
|
||||||
PredTransform(&io_preds->data_h());
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|||||||
@ -37,13 +37,14 @@ class LambdaRankObj : public ObjFunction {
|
|||||||
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||||
param_.InitAllowUnknown(args);
|
param_.InitAllowUnknown(args);
|
||||||
}
|
}
|
||||||
void GetGradient(const std::vector<bst_float>& preds,
|
void GetGradient(HostDeviceVector<bst_float>* preds,
|
||||||
const MetaInfo& info,
|
const MetaInfo& info,
|
||||||
int iter,
|
int iter,
|
||||||
std::vector<bst_gpair>* out_gpair) override {
|
HostDeviceVector<bst_gpair>* out_gpair) override {
|
||||||
CHECK_EQ(preds.size(), info.labels.size()) << "label size predict size not match";
|
CHECK_EQ(preds->size(), info.labels.size()) << "label size predict size not match";
|
||||||
std::vector<bst_gpair>& gpair = *out_gpair;
|
auto& preds_h = preds->data_h();
|
||||||
gpair.resize(preds.size());
|
out_gpair->resize(preds_h.size());
|
||||||
|
std::vector<bst_gpair>& gpair = out_gpair->data_h();
|
||||||
// quick consistency when group is not available
|
// quick consistency when group is not available
|
||||||
std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels.size());
|
std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels.size());
|
||||||
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
|
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
|
||||||
@ -63,7 +64,7 @@ class LambdaRankObj : public ObjFunction {
|
|||||||
for (bst_omp_uint k = 0; k < ngroup; ++k) {
|
for (bst_omp_uint k = 0; k < ngroup; ++k) {
|
||||||
lst.clear(); pairs.clear();
|
lst.clear(); pairs.clear();
|
||||||
for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
|
for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
|
||||||
lst.push_back(ListEntry(preds[j], info.labels[j], j));
|
lst.push_back(ListEntry(preds_h[j], info.labels[j], j));
|
||||||
gpair[j] = bst_gpair(0.0f, 0.0f);
|
gpair[j] = bst_gpair(0.0f, 0.0f);
|
||||||
}
|
}
|
||||||
std::sort(lst.begin(), lst.end(), ListEntry::CmpPred);
|
std::sort(lst.begin(), lst.end(), ListEntry::CmpPred);
|
||||||
|
|||||||
@ -38,18 +38,20 @@ class RegLossObj : public ObjFunction {
|
|||||||
const std::vector<std::pair<std::string, std::string> > &args) override {
|
const std::vector<std::pair<std::string, std::string> > &args) override {
|
||||||
param_.InitAllowUnknown(args);
|
param_.InitAllowUnknown(args);
|
||||||
}
|
}
|
||||||
void GetGradient(const std::vector<bst_float> &preds, const MetaInfo &info,
|
void GetGradient(HostDeviceVector<bst_float> *preds, const MetaInfo &info,
|
||||||
int iter, std::vector<bst_gpair> *out_gpair) override {
|
int iter, HostDeviceVector<bst_gpair> *out_gpair) override {
|
||||||
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
|
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
|
||||||
CHECK_EQ(preds.size(), info.labels.size())
|
CHECK_EQ(preds->size(), info.labels.size())
|
||||||
<< "labels are not correctly provided"
|
<< "labels are not correctly provided"
|
||||||
<< "preds.size=" << preds.size()
|
<< "preds.size=" << preds->size()
|
||||||
<< ", label.size=" << info.labels.size();
|
<< ", label.size=" << info.labels.size();
|
||||||
|
auto& preds_h = preds->data_h();
|
||||||
|
|
||||||
this->LazyCheckLabels(info.labels);
|
this->LazyCheckLabels(info.labels);
|
||||||
out_gpair->resize(preds.size());
|
out_gpair->resize(preds_h.size());
|
||||||
const omp_ulong n = static_cast<omp_ulong>(preds.size());
|
auto& gpair = out_gpair->data_h();
|
||||||
auto gpair_ptr = out_gpair->data();
|
const omp_ulong n = static_cast<omp_ulong>(preds_h.size());
|
||||||
|
auto gpair_ptr = out_gpair->ptr_h();
|
||||||
avx::Float8 scale(param_.scale_pos_weight);
|
avx::Float8 scale(param_.scale_pos_weight);
|
||||||
|
|
||||||
const omp_ulong remainder = n % 8;
|
const omp_ulong remainder = n % 8;
|
||||||
@ -58,7 +60,7 @@ class RegLossObj : public ObjFunction {
|
|||||||
#pragma omp parallel for schedule(static) num_threads(std::min(8, nthread))
|
#pragma omp parallel for schedule(static) num_threads(std::min(8, nthread))
|
||||||
for (omp_ulong i = 0; i < n - remainder; i += 8) {
|
for (omp_ulong i = 0; i < n - remainder; i += 8) {
|
||||||
avx::Float8 y(&info.labels[i]);
|
avx::Float8 y(&info.labels[i]);
|
||||||
avx::Float8 p = Loss::PredTransform(avx::Float8(&preds[i]));
|
avx::Float8 p = Loss::PredTransform(avx::Float8(&preds_h[i]));
|
||||||
avx::Float8 w = info.weights.empty() ? avx::Float8(1.0f)
|
avx::Float8 w = info.weights.empty() ? avx::Float8(1.0f)
|
||||||
: avx::Float8(&info.weights[i]);
|
: avx::Float8(&info.weights[i]);
|
||||||
// Adjust weight
|
// Adjust weight
|
||||||
@ -69,11 +71,11 @@ class RegLossObj : public ObjFunction {
|
|||||||
}
|
}
|
||||||
for (omp_ulong i = n - remainder; i < n; ++i) {
|
for (omp_ulong i = n - remainder; i < n; ++i) {
|
||||||
auto y = info.labels[i];
|
auto y = info.labels[i];
|
||||||
bst_float p = Loss::PredTransform(preds[i]);
|
bst_float p = Loss::PredTransform(preds_h[i]);
|
||||||
bst_float w = info.GetWeight(i);
|
bst_float w = info.GetWeight(i);
|
||||||
w += y * ((param_.scale_pos_weight * w) - w);
|
w += y * ((param_.scale_pos_weight * w) - w);
|
||||||
(*out_gpair)[i] = bst_gpair(Loss::FirstOrderGradient(p, y) * w,
|
gpair[i] = bst_gpair(Loss::FirstOrderGradient(p, y) * w,
|
||||||
Loss::SecondOrderGradient(p, y) * w);
|
Loss::SecondOrderGradient(p, y) * w);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset omp max threads
|
// Reset omp max threads
|
||||||
@ -82,8 +84,8 @@ class RegLossObj : public ObjFunction {
|
|||||||
const char *DefaultEvalMetric() const override {
|
const char *DefaultEvalMetric() const override {
|
||||||
return Loss::DefaultEvalMetric();
|
return Loss::DefaultEvalMetric();
|
||||||
}
|
}
|
||||||
void PredTransform(std::vector<bst_float> *io_preds) override {
|
void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
|
||||||
std::vector<bst_float> &preds = *io_preds;
|
std::vector<bst_float> &preds = io_preds->data_h();
|
||||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
|
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||||
@ -143,40 +145,42 @@ class PoissonRegression : public ObjFunction {
|
|||||||
param_.InitAllowUnknown(args);
|
param_.InitAllowUnknown(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GetGradient(const std::vector<bst_float> &preds,
|
void GetGradient(HostDeviceVector<bst_float> *preds,
|
||||||
const MetaInfo &info,
|
const MetaInfo &info,
|
||||||
int iter,
|
int iter,
|
||||||
std::vector<bst_gpair> *out_gpair) override {
|
HostDeviceVector<bst_gpair> *out_gpair) override {
|
||||||
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
|
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
|
||||||
CHECK_EQ(preds.size(), info.labels.size()) << "labels are not correctly provided";
|
CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided";
|
||||||
out_gpair->resize(preds.size());
|
auto& preds_h = preds->data_h();
|
||||||
|
out_gpair->resize(preds->size());
|
||||||
|
auto& gpair = out_gpair->data_h();
|
||||||
// check if label in range
|
// check if label in range
|
||||||
bool label_correct = true;
|
bool label_correct = true;
|
||||||
// start calculating gradient
|
// start calculating gradient
|
||||||
const omp_ulong ndata = static_cast<omp_ulong>(preds.size()); // NOLINT(*)
|
const omp_ulong ndata = static_cast<omp_ulong>(preds_h.size()); // NOLINT(*)
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
|
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
|
||||||
bst_float p = preds[i];
|
bst_float p = preds_h[i];
|
||||||
bst_float w = info.GetWeight(i);
|
bst_float w = info.GetWeight(i);
|
||||||
bst_float y = info.labels[i];
|
bst_float y = info.labels[i];
|
||||||
if (y >= 0.0f) {
|
if (y >= 0.0f) {
|
||||||
(*out_gpair)[i] = bst_gpair((std::exp(p) - y) * w,
|
gpair[i] = bst_gpair((std::exp(p) - y) * w,
|
||||||
std::exp(p + param_.max_delta_step) * w);
|
std::exp(p + param_.max_delta_step) * w);
|
||||||
} else {
|
} else {
|
||||||
label_correct = false;
|
label_correct = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
CHECK(label_correct) << "PoissonRegression: label must be nonnegative";
|
CHECK(label_correct) << "PoissonRegression: label must be nonnegative";
|
||||||
}
|
}
|
||||||
void PredTransform(std::vector<bst_float> *io_preds) override {
|
void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
|
||||||
std::vector<bst_float> &preds = *io_preds;
|
std::vector<bst_float> &preds = io_preds->data_h();
|
||||||
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
|
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
|
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
|
||||||
preds[j] = std::exp(preds[j]);
|
preds[j] = std::exp(preds[j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void EvalTransform(std::vector<bst_float> *io_preds) override {
|
void EvalTransform(HostDeviceVector<bst_float> *io_preds) override {
|
||||||
PredTransform(io_preds);
|
PredTransform(io_preds);
|
||||||
}
|
}
|
||||||
bst_float ProbToMargin(bst_float base_score) const override {
|
bst_float ProbToMargin(bst_float base_score) const override {
|
||||||
@ -202,21 +206,23 @@ class CoxRegression : public ObjFunction {
|
|||||||
public:
|
public:
|
||||||
// declare functions
|
// declare functions
|
||||||
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {}
|
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {}
|
||||||
void GetGradient(const std::vector<bst_float> &preds,
|
void GetGradient(HostDeviceVector<bst_float> *preds,
|
||||||
const MetaInfo &info,
|
const MetaInfo &info,
|
||||||
int iter,
|
int iter,
|
||||||
std::vector<bst_gpair> *out_gpair) override {
|
HostDeviceVector<bst_gpair> *out_gpair) override {
|
||||||
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
|
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
|
||||||
CHECK_EQ(preds.size(), info.labels.size()) << "labels are not correctly provided";
|
CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided";
|
||||||
out_gpair->resize(preds.size());
|
auto& preds_h = preds->data_h();
|
||||||
|
out_gpair->resize(preds_h.size());
|
||||||
|
auto& gpair = out_gpair->data_h();
|
||||||
const std::vector<size_t> &label_order = info.LabelAbsSort();
|
const std::vector<size_t> &label_order = info.LabelAbsSort();
|
||||||
|
|
||||||
const omp_ulong ndata = static_cast<omp_ulong>(preds.size()); // NOLINT(*)
|
const omp_ulong ndata = static_cast<omp_ulong>(preds_h.size()); // NOLINT(*)
|
||||||
|
|
||||||
// pre-compute a sum
|
// pre-compute a sum
|
||||||
double exp_p_sum = 0; // we use double because we might need the precision with large datasets
|
double exp_p_sum = 0; // we use double because we might need the precision with large datasets
|
||||||
for (omp_ulong i = 0; i < ndata; ++i) {
|
for (omp_ulong i = 0; i < ndata; ++i) {
|
||||||
exp_p_sum += std::exp(preds[label_order[i]]);
|
exp_p_sum += std::exp(preds_h[label_order[i]]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// start calculating grad and hess
|
// start calculating grad and hess
|
||||||
@ -227,7 +233,7 @@ class CoxRegression : public ObjFunction {
|
|||||||
double accumulated_sum = 0;
|
double accumulated_sum = 0;
|
||||||
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
|
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
|
||||||
const size_t ind = label_order[i];
|
const size_t ind = label_order[i];
|
||||||
const double p = preds[ind];
|
const double p = preds_h[ind];
|
||||||
const double exp_p = std::exp(p);
|
const double exp_p = std::exp(p);
|
||||||
const double w = info.GetWeight(ind);
|
const double w = info.GetWeight(ind);
|
||||||
const double y = info.labels[ind];
|
const double y = info.labels[ind];
|
||||||
@ -251,21 +257,21 @@ class CoxRegression : public ObjFunction {
|
|||||||
|
|
||||||
const double grad = exp_p*r_k - static_cast<bst_float>(y > 0);
|
const double grad = exp_p*r_k - static_cast<bst_float>(y > 0);
|
||||||
const double hess = exp_p*r_k - exp_p*exp_p * s_k;
|
const double hess = exp_p*r_k - exp_p*exp_p * s_k;
|
||||||
out_gpair->at(ind) = bst_gpair(grad * w, hess * w);
|
gpair.at(ind) = bst_gpair(grad * w, hess * w);
|
||||||
|
|
||||||
last_abs_y = abs_y;
|
last_abs_y = abs_y;
|
||||||
last_exp_p = exp_p;
|
last_exp_p = exp_p;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void PredTransform(std::vector<bst_float> *io_preds) override {
|
void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
|
||||||
std::vector<bst_float> &preds = *io_preds;
|
std::vector<bst_float> &preds = io_preds->data_h();
|
||||||
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
|
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
|
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
|
||||||
preds[j] = std::exp(preds[j]);
|
preds[j] = std::exp(preds[j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void EvalTransform(std::vector<bst_float> *io_preds) override {
|
void EvalTransform(HostDeviceVector<bst_float> *io_preds) override {
|
||||||
PredTransform(io_preds);
|
PredTransform(io_preds);
|
||||||
}
|
}
|
||||||
bst_float ProbToMargin(bst_float base_score) const override {
|
bst_float ProbToMargin(bst_float base_score) const override {
|
||||||
@ -288,39 +294,41 @@ class GammaRegression : public ObjFunction {
|
|||||||
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||||
}
|
}
|
||||||
|
|
||||||
void GetGradient(const std::vector<bst_float> &preds,
|
void GetGradient(HostDeviceVector<bst_float> *preds,
|
||||||
const MetaInfo &info,
|
const MetaInfo &info,
|
||||||
int iter,
|
int iter,
|
||||||
std::vector<bst_gpair> *out_gpair) override {
|
HostDeviceVector<bst_gpair> *out_gpair) override {
|
||||||
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
|
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
|
||||||
CHECK_EQ(preds.size(), info.labels.size()) << "labels are not correctly provided";
|
CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided";
|
||||||
out_gpair->resize(preds.size());
|
auto& preds_h = preds->data_h();
|
||||||
|
out_gpair->resize(preds_h.size());
|
||||||
|
auto& gpair = out_gpair->data_h();
|
||||||
// check if label in range
|
// check if label in range
|
||||||
bool label_correct = true;
|
bool label_correct = true;
|
||||||
// start calculating gradient
|
// start calculating gradient
|
||||||
const omp_ulong ndata = static_cast<omp_ulong>(preds.size()); // NOLINT(*)
|
const omp_ulong ndata = static_cast<omp_ulong>(preds_h.size()); // NOLINT(*)
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
|
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
|
||||||
bst_float p = preds[i];
|
bst_float p = preds_h[i];
|
||||||
bst_float w = info.GetWeight(i);
|
bst_float w = info.GetWeight(i);
|
||||||
bst_float y = info.labels[i];
|
bst_float y = info.labels[i];
|
||||||
if (y >= 0.0f) {
|
if (y >= 0.0f) {
|
||||||
(*out_gpair)[i] = bst_gpair((1 - y / std::exp(p)) * w, y / std::exp(p) * w);
|
gpair[i] = bst_gpair((1 - y / std::exp(p)) * w, y / std::exp(p) * w);
|
||||||
} else {
|
} else {
|
||||||
label_correct = false;
|
label_correct = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
CHECK(label_correct) << "GammaRegression: label must be positive";
|
CHECK(label_correct) << "GammaRegression: label must be positive";
|
||||||
}
|
}
|
||||||
void PredTransform(std::vector<bst_float> *io_preds) override {
|
void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
|
||||||
std::vector<bst_float> &preds = *io_preds;
|
std::vector<bst_float> &preds = io_preds->data_h();
|
||||||
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
|
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
|
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
|
||||||
preds[j] = std::exp(preds[j]);
|
preds[j] = std::exp(preds[j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void EvalTransform(std::vector<bst_float> *io_preds) override {
|
void EvalTransform(HostDeviceVector<bst_float> *io_preds) override {
|
||||||
PredTransform(io_preds);
|
PredTransform(io_preds);
|
||||||
}
|
}
|
||||||
bst_float ProbToMargin(bst_float base_score) const override {
|
bst_float ProbToMargin(bst_float base_score) const override {
|
||||||
@ -353,20 +361,22 @@ class TweedieRegression : public ObjFunction {
|
|||||||
param_.InitAllowUnknown(args);
|
param_.InitAllowUnknown(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GetGradient(const std::vector<bst_float> &preds,
|
void GetGradient(HostDeviceVector<bst_float> *preds,
|
||||||
const MetaInfo &info,
|
const MetaInfo &info,
|
||||||
int iter,
|
int iter,
|
||||||
std::vector<bst_gpair> *out_gpair) override {
|
HostDeviceVector<bst_gpair> *out_gpair) override {
|
||||||
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
|
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
|
||||||
CHECK_EQ(preds.size(), info.labels.size()) << "labels are not correctly provided";
|
CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided";
|
||||||
out_gpair->resize(preds.size());
|
auto& preds_h = preds->data_h();
|
||||||
|
out_gpair->resize(preds->size());
|
||||||
|
auto& gpair = out_gpair->data_h();
|
||||||
// check if label in range
|
// check if label in range
|
||||||
bool label_correct = true;
|
bool label_correct = true;
|
||||||
// start calculating gradient
|
// start calculating gradient
|
||||||
const omp_ulong ndata = static_cast<omp_ulong>(preds.size()); // NOLINT(*)
|
const omp_ulong ndata = static_cast<omp_ulong>(preds->size()); // NOLINT(*)
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
|
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
|
||||||
bst_float p = preds[i];
|
bst_float p = preds_h[i];
|
||||||
bst_float w = info.GetWeight(i);
|
bst_float w = info.GetWeight(i);
|
||||||
bst_float y = info.labels[i];
|
bst_float y = info.labels[i];
|
||||||
float rho = param_.tweedie_variance_power;
|
float rho = param_.tweedie_variance_power;
|
||||||
@ -374,15 +384,15 @@ class TweedieRegression : public ObjFunction {
|
|||||||
bst_float grad = -y * std::exp((1 - rho) * p) + std::exp((2 - rho) * p);
|
bst_float grad = -y * std::exp((1 - rho) * p) + std::exp((2 - rho) * p);
|
||||||
bst_float hess = -y * (1 - rho) * \
|
bst_float hess = -y * (1 - rho) * \
|
||||||
std::exp((1 - rho) * p) + (2 - rho) * std::exp((2 - rho) * p);
|
std::exp((1 - rho) * p) + (2 - rho) * std::exp((2 - rho) * p);
|
||||||
(*out_gpair)[i] = bst_gpair(grad * w, hess * w);
|
gpair[i] = bst_gpair(grad * w, hess * w);
|
||||||
} else {
|
} else {
|
||||||
label_correct = false;
|
label_correct = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
CHECK(label_correct) << "TweedieRegression: label must be nonnegative";
|
CHECK(label_correct) << "TweedieRegression: label must be nonnegative";
|
||||||
}
|
}
|
||||||
void PredTransform(std::vector<bst_float> *io_preds) override {
|
void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
|
||||||
std::vector<bst_float> &preds = *io_preds;
|
std::vector<bst_float> &preds = io_preds->data_h();
|
||||||
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
|
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
|
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
|
||||||
|
|||||||
@ -103,8 +103,8 @@ class GPURegLossObj : public ObjFunction {
|
|||||||
// free the old data and allocate the new data
|
// free the old data and allocate the new data
|
||||||
ba_.reset(new bulk_allocator<memory_type::DEVICE>());
|
ba_.reset(new bulk_allocator<memory_type::DEVICE>());
|
||||||
data_.reset(new DeviceData(ba_.get(), 0, n));
|
data_.reset(new DeviceData(ba_.get(), 0, n));
|
||||||
preds_d_.resize(n, param_.gpu_id);
|
preds_d_.resize(n, 0.0f, param_.gpu_id);
|
||||||
out_gpair_d_.resize(n, param_.gpu_id);
|
out_gpair_d_.resize(n, bst_gpair(), param_.gpu_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -114,23 +114,6 @@ class GPURegLossObj : public ObjFunction {
|
|||||||
param_.InitAllowUnknown(args);
|
param_.InitAllowUnknown(args);
|
||||||
CHECK(param_.n_gpus != 0) << "Must have at least one device";
|
CHECK(param_.n_gpus != 0) << "Must have at least one device";
|
||||||
}
|
}
|
||||||
void GetGradient(const std::vector<float> &preds,
|
|
||||||
const MetaInfo &info,
|
|
||||||
int iter,
|
|
||||||
std::vector<bst_gpair> *out_gpair) override {
|
|
||||||
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
|
|
||||||
CHECK_EQ(preds.size(), info.labels.size())
|
|
||||||
<< "labels are not correctly provided"
|
|
||||||
<< "preds.size=" << preds.size() << ", label.size=" << info.labels.size();
|
|
||||||
|
|
||||||
size_t ndata = preds.size();
|
|
||||||
out_gpair->resize(ndata);
|
|
||||||
LazyResize(ndata);
|
|
||||||
thrust::copy(preds.begin(), preds.end(), preds_d_.tbegin(param_.gpu_id));
|
|
||||||
GetGradientDevice(preds_d_.ptr_d(param_.gpu_id), info, iter,
|
|
||||||
out_gpair_d_.ptr_d(param_.gpu_id), ndata);
|
|
||||||
thrust::copy_n(out_gpair_d_.tbegin(param_.gpu_id), ndata, out_gpair->begin());
|
|
||||||
}
|
|
||||||
|
|
||||||
void GetGradient(HostDeviceVector<float>* preds,
|
void GetGradient(HostDeviceVector<float>* preds,
|
||||||
const MetaInfo &info,
|
const MetaInfo &info,
|
||||||
@ -141,7 +124,7 @@ class GPURegLossObj : public ObjFunction {
|
|||||||
<< "labels are not correctly provided"
|
<< "labels are not correctly provided"
|
||||||
<< "preds.size=" << preds->size() << ", label.size=" << info.labels.size();
|
<< "preds.size=" << preds->size() << ", label.size=" << info.labels.size();
|
||||||
size_t ndata = preds->size();
|
size_t ndata = preds->size();
|
||||||
out_gpair->resize(ndata, param_.gpu_id);
|
out_gpair->resize(ndata, bst_gpair(), param_.gpu_id);
|
||||||
LazyResize(ndata);
|
LazyResize(ndata);
|
||||||
GetGradientDevice(preds->ptr_d(param_.gpu_id), info, iter,
|
GetGradientDevice(preds->ptr_d(param_.gpu_id), info, iter,
|
||||||
out_gpair->ptr_d(param_.gpu_id), ndata);
|
out_gpair->ptr_d(param_.gpu_id), ndata);
|
||||||
@ -189,13 +172,6 @@ class GPURegLossObj : public ObjFunction {
|
|||||||
return Loss::DefaultEvalMetric();
|
return Loss::DefaultEvalMetric();
|
||||||
}
|
}
|
||||||
|
|
||||||
void PredTransform(std::vector<float> *io_preds) override {
|
|
||||||
LazyResize(io_preds->size());
|
|
||||||
thrust::copy(io_preds->begin(), io_preds->end(), preds_d_.tbegin(param_.gpu_id));
|
|
||||||
PredTransformDevice(preds_d_.ptr_d(param_.gpu_id), io_preds->size());
|
|
||||||
thrust::copy_n(preds_d_.tbegin(param_.gpu_id), io_preds->size(), io_preds->begin());
|
|
||||||
}
|
|
||||||
|
|
||||||
void PredTransform(HostDeviceVector<float> *io_preds) override {
|
void PredTransform(HostDeviceVector<float> *io_preds) override {
|
||||||
PredTransformDevice(io_preds->ptr_d(param_.gpu_id), io_preds->size());
|
PredTransformDevice(io_preds->ptr_d(param_.gpu_id), io_preds->size());
|
||||||
}
|
}
|
||||||
|
|||||||
@ -104,14 +104,43 @@ class CPUPredictor : public Predictor {
|
|||||||
tree_begin, ntree_limit);
|
tree_begin, ntree_limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
bool PredictFromCache(DMatrix* dmat,
|
||||||
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
|
HostDeviceVector<bst_float>* out_preds,
|
||||||
const gbm::GBTreeModel& model, int tree_begin,
|
const gbm::GBTreeModel& model,
|
||||||
unsigned ntree_limit = 0) override {
|
unsigned ntree_limit) {
|
||||||
PredictBatch(dmat, &out_preds->data_h(), model, tree_begin, ntree_limit);
|
if (ntree_limit == 0 ||
|
||||||
|
ntree_limit * model.param.num_output_group >= model.trees.size()) {
|
||||||
|
auto it = cache_.find(dmat);
|
||||||
|
if (it != cache_.end()) {
|
||||||
|
HostDeviceVector<bst_float>& y = it->second.predictions;
|
||||||
|
if (y.size() != 0) {
|
||||||
|
out_preds->resize(y.size());
|
||||||
|
std::copy(y.data_h().begin(), y.data_h().end(),
|
||||||
|
out_preds->data_h().begin());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PredictBatch(DMatrix* dmat, std::vector<bst_float>* out_preds,
|
void InitOutPredictions(const MetaInfo& info,
|
||||||
|
HostDeviceVector<bst_float>* out_preds,
|
||||||
|
const gbm::GBTreeModel& model) const {
|
||||||
|
size_t n = model.param.num_output_group * info.num_row;
|
||||||
|
const std::vector<bst_float>& base_margin = info.base_margin;
|
||||||
|
out_preds->resize(n);
|
||||||
|
std::vector<bst_float>& out_preds_h = out_preds->data_h();
|
||||||
|
if (base_margin.size() != 0) {
|
||||||
|
CHECK_EQ(out_preds->size(), n);
|
||||||
|
std::copy(base_margin.begin(), base_margin.end(), out_preds_h.begin());
|
||||||
|
} else {
|
||||||
|
std::fill(out_preds_h.begin(), out_preds_h.end(), model.base_margin);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
|
||||||
const gbm::GBTreeModel& model, int tree_begin,
|
const gbm::GBTreeModel& model, int tree_begin,
|
||||||
unsigned ntree_limit = 0) override {
|
unsigned ntree_limit = 0) override {
|
||||||
if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) {
|
if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) {
|
||||||
@ -125,12 +154,14 @@ class CPUPredictor : public Predictor {
|
|||||||
ntree_limit = static_cast<unsigned>(model.trees.size());
|
ntree_limit = static_cast<unsigned>(model.trees.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
this->PredLoopInternal(dmat, out_preds, model, tree_begin, ntree_limit);
|
this->PredLoopInternal(dmat, &out_preds->data_h(), model,
|
||||||
|
tree_begin, ntree_limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
void UpdatePredictionCache(const gbm::GBTreeModel& model,
|
void UpdatePredictionCache(
|
||||||
std::vector<std::unique_ptr<TreeUpdater>>* updaters,
|
const gbm::GBTreeModel& model,
|
||||||
int num_new_trees) override {
|
std::vector<std::unique_ptr<TreeUpdater>>* updaters,
|
||||||
|
int num_new_trees) override {
|
||||||
int old_ntree = model.trees.size() - num_new_trees;
|
int old_ntree = model.trees.size() - num_new_trees;
|
||||||
// update cache entry
|
// update cache entry
|
||||||
for (auto& kv : cache_) {
|
for (auto& kv : cache_) {
|
||||||
@ -138,7 +169,7 @@ class CPUPredictor : public Predictor {
|
|||||||
|
|
||||||
if (e.predictions.size() == 0) {
|
if (e.predictions.size() == 0) {
|
||||||
InitOutPredictions(e.data->info(), &(e.predictions), model);
|
InitOutPredictions(e.data->info(), &(e.predictions), model);
|
||||||
PredLoopInternal(e.data.get(), &(e.predictions), model, 0,
|
PredLoopInternal(e.data.get(), &(e.predictions.data_h()), model, 0,
|
||||||
model.trees.size());
|
model.trees.size());
|
||||||
} else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
|
} else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
|
||||||
num_new_trees == 1 &&
|
num_new_trees == 1 &&
|
||||||
@ -146,7 +177,7 @@ class CPUPredictor : public Predictor {
|
|||||||
&(e.predictions))) {
|
&(e.predictions))) {
|
||||||
{} // do nothing
|
{} // do nothing
|
||||||
} else {
|
} else {
|
||||||
PredLoopInternal(e.data.get(), &(e.predictions), model, old_ntree,
|
PredLoopInternal(e.data.get(), &(e.predictions.data_h()), model, old_ntree,
|
||||||
model.trees.size());
|
model.trees.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -256,8 +256,6 @@ class GPUPredictor : public xgboost::Predictor {
|
|||||||
HostDeviceVector<bst_float> predictions;
|
HostDeviceVector<bst_float> predictions;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unordered_map<DMatrix*, DevicePredictionCacheEntry> device_cache_;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void DevicePredictInternal(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
|
void DevicePredictInternal(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
|
||||||
const gbm::GBTreeModel& model, size_t tree_begin,
|
const gbm::GBTreeModel& model, size_t tree_begin,
|
||||||
@ -337,25 +335,16 @@ class GPUPredictor : public xgboost::Predictor {
|
|||||||
public:
|
public:
|
||||||
GPUPredictor() : cpu_predictor(Predictor::Create("cpu_predictor")) {}
|
GPUPredictor() : cpu_predictor(Predictor::Create("cpu_predictor")) {}
|
||||||
|
|
||||||
void PredictBatch(DMatrix* dmat, std::vector<bst_float>* out_preds,
|
|
||||||
const gbm::GBTreeModel& model, int tree_begin,
|
|
||||||
unsigned ntree_limit = 0) override {
|
|
||||||
HostDeviceVector<bst_float> out_preds_d;
|
|
||||||
PredictBatch(dmat, &out_preds_d, model, tree_begin, ntree_limit);
|
|
||||||
out_preds->resize(out_preds_d.size());
|
|
||||||
thrust::copy(out_preds_d.tbegin(param.gpu_id),
|
|
||||||
out_preds_d.tend(param.gpu_id), out_preds->begin());
|
|
||||||
}
|
|
||||||
|
|
||||||
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
|
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
|
||||||
const gbm::GBTreeModel& model, int tree_begin,
|
const gbm::GBTreeModel& model, int tree_begin,
|
||||||
unsigned ntree_limit = 0) override {
|
unsigned ntree_limit = 0) override {
|
||||||
if (this->PredictFromCacheDevice(dmat, out_preds, model, ntree_limit)) {
|
if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
this->InitOutPredictionsDevice(dmat->info(), out_preds, model);
|
this->InitOutPredictions(dmat->info(), out_preds, model);
|
||||||
|
|
||||||
int tree_end = ntree_limit * model.param.num_output_group;
|
int tree_end = ntree_limit * model.param.num_output_group;
|
||||||
|
|
||||||
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
|
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
|
||||||
tree_end = static_cast<unsigned>(model.trees.size());
|
tree_end = static_cast<unsigned>(model.trees.size());
|
||||||
}
|
}
|
||||||
@ -363,13 +352,13 @@ class GPUPredictor : public xgboost::Predictor {
|
|||||||
DevicePredictInternal(dmat, out_preds, model, tree_begin, tree_end);
|
DevicePredictInternal(dmat, out_preds, model, tree_begin, tree_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
void InitOutPredictionsDevice(const MetaInfo& info,
|
void InitOutPredictions(const MetaInfo& info,
|
||||||
HostDeviceVector<bst_float>* out_preds,
|
HostDeviceVector<bst_float>* out_preds,
|
||||||
const gbm::GBTreeModel& model) const {
|
const gbm::GBTreeModel& model) const {
|
||||||
size_t n = model.param.num_output_group * info.num_row;
|
size_t n = model.param.num_output_group * info.num_row;
|
||||||
const std::vector<bst_float>& base_margin = info.base_margin;
|
const std::vector<bst_float>& base_margin = info.base_margin;
|
||||||
out_preds->resize(n, param.gpu_id);
|
out_preds->resize(n, 0.0f, param.gpu_id);
|
||||||
if (base_margin.size() != 0) {
|
if (base_margin.size() != 0) {
|
||||||
CHECK_EQ(out_preds->size(), n);
|
CHECK_EQ(out_preds->size(), n);
|
||||||
thrust::copy(base_margin.begin(), base_margin.end(), out_preds->tbegin(param.gpu_id));
|
thrust::copy(base_margin.begin(), base_margin.end(), out_preds->tbegin(param.gpu_id));
|
||||||
@ -380,29 +369,16 @@ class GPUPredictor : public xgboost::Predictor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool PredictFromCache(DMatrix* dmat,
|
bool PredictFromCache(DMatrix* dmat,
|
||||||
std::vector<bst_float>* out_preds,
|
HostDeviceVector<bst_float>* out_preds,
|
||||||
const gbm::GBTreeModel& model,
|
const gbm::GBTreeModel& model,
|
||||||
unsigned ntree_limit) {
|
unsigned ntree_limit) {
|
||||||
HostDeviceVector<bst_float> out_preds_d(0, -1);
|
|
||||||
bool result = PredictFromCacheDevice(dmat, &out_preds_d, model, ntree_limit);
|
|
||||||
if (!result) return false;
|
|
||||||
out_preds->resize(out_preds_d.size(), param.gpu_id);
|
|
||||||
thrust::copy(out_preds_d.tbegin(param.gpu_id),
|
|
||||||
out_preds_d.tend(param.gpu_id), out_preds->begin());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool PredictFromCacheDevice(DMatrix* dmat,
|
|
||||||
HostDeviceVector<bst_float>* out_preds,
|
|
||||||
const gbm::GBTreeModel& model,
|
|
||||||
unsigned ntree_limit) {
|
|
||||||
if (ntree_limit == 0 ||
|
if (ntree_limit == 0 ||
|
||||||
ntree_limit * model.param.num_output_group >= model.trees.size()) {
|
ntree_limit * model.param.num_output_group >= model.trees.size()) {
|
||||||
auto it = device_cache_.find(dmat);
|
auto it = cache_.find(dmat);
|
||||||
if (it != device_cache_.end()) {
|
if (it != cache_.end()) {
|
||||||
HostDeviceVector<bst_float>& y = it->second.predictions;
|
HostDeviceVector<bst_float>& y = it->second.predictions;
|
||||||
if (y.size() != 0) {
|
if (y.size() != 0) {
|
||||||
out_preds->resize(y.size(), param.gpu_id);
|
out_preds->resize(y.size(), 0.0f, param.gpu_id);
|
||||||
thrust::copy(y.tbegin(param.gpu_id), y.tend(param.gpu_id),
|
thrust::copy(y.tbegin(param.gpu_id), y.tend(param.gpu_id),
|
||||||
out_preds->tbegin(param.gpu_id));
|
out_preds->tbegin(param.gpu_id));
|
||||||
return true;
|
return true;
|
||||||
@ -418,15 +394,15 @@ class GPUPredictor : public xgboost::Predictor {
|
|||||||
int num_new_trees) override {
|
int num_new_trees) override {
|
||||||
auto old_ntree = model.trees.size() - num_new_trees;
|
auto old_ntree = model.trees.size() - num_new_trees;
|
||||||
// update cache entry
|
// update cache entry
|
||||||
for (auto& kv : device_cache_) {
|
for (auto& kv : cache_) {
|
||||||
DevicePredictionCacheEntry& e = kv.second;
|
PredictionCacheEntry& e = kv.second;
|
||||||
DMatrix* dmat = kv.first;
|
DMatrix* dmat = kv.first;
|
||||||
HostDeviceVector<bst_float>& predictions = e.predictions;
|
HostDeviceVector<bst_float>& predictions = e.predictions;
|
||||||
|
|
||||||
if (predictions.size() == 0) {
|
if (predictions.size() == 0) {
|
||||||
// ensure that the device in predictions is correct
|
// ensure that the device in predictions is correct
|
||||||
predictions.resize(0, param.gpu_id);
|
predictions.resize(0, 0.0f, param.gpu_id);
|
||||||
cpu_predictor->PredictBatch(dmat, &predictions.data_h(), model, 0,
|
cpu_predictor->PredictBatch(dmat, &predictions, model, 0,
|
||||||
static_cast<bst_uint>(model.trees.size()));
|
static_cast<bst_uint>(model.trees.size()));
|
||||||
} else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
|
} else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
|
||||||
num_new_trees == 1 &&
|
num_new_trees == 1 &&
|
||||||
@ -477,8 +453,6 @@ class GPUPredictor : public xgboost::Predictor {
|
|||||||
Predictor::Init(cfg, cache);
|
Predictor::Init(cfg, cache);
|
||||||
cpu_predictor->Init(cfg, cache);
|
cpu_predictor->Init(cfg, cache);
|
||||||
param.InitAllowUnknown(cfg);
|
param.InitAllowUnknown(cfg);
|
||||||
for (const std::shared_ptr<DMatrix>& d : cache)
|
|
||||||
device_cache_[d.get()].data = d;
|
|
||||||
max_shared_memory_bytes = dh::max_shared_memory(param.gpu_id);
|
max_shared_memory_bytes = dh::max_shared_memory(param.gpu_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -11,43 +11,8 @@ namespace xgboost {
|
|||||||
void Predictor::Init(
|
void Predictor::Init(
|
||||||
const std::vector<std::pair<std::string, std::string>>& cfg,
|
const std::vector<std::pair<std::string, std::string>>& cfg,
|
||||||
const std::vector<std::shared_ptr<DMatrix>>& cache) {
|
const std::vector<std::shared_ptr<DMatrix>>& cache) {
|
||||||
for (const std::shared_ptr<DMatrix>& d : cache) {
|
for (const std::shared_ptr<DMatrix>& d : cache)
|
||||||
PredictionCacheEntry e;
|
cache_[d.get()].data = d;
|
||||||
e.data = d;
|
|
||||||
cache_[d.get()] = std::move(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bool Predictor::PredictFromCache(DMatrix* dmat,
|
|
||||||
std::vector<bst_float>* out_preds,
|
|
||||||
const gbm::GBTreeModel& model,
|
|
||||||
unsigned ntree_limit) {
|
|
||||||
if (ntree_limit == 0 ||
|
|
||||||
ntree_limit * model.param.num_output_group >= model.trees.size()) {
|
|
||||||
auto it = cache_.find(dmat);
|
|
||||||
if (it != cache_.end()) {
|
|
||||||
std::vector<bst_float>& y = it->second.predictions;
|
|
||||||
if (y.size() != 0) {
|
|
||||||
out_preds->resize(y.size());
|
|
||||||
std::copy(y.begin(), y.end(), out_preds->begin());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
void Predictor::InitOutPredictions(const MetaInfo& info,
|
|
||||||
std::vector<bst_float>* out_preds,
|
|
||||||
const gbm::GBTreeModel& model) const {
|
|
||||||
size_t n = model.param.num_output_group * info.num_row;
|
|
||||||
const std::vector<bst_float>& base_margin = info.base_margin;
|
|
||||||
out_preds->resize(n);
|
|
||||||
if (base_margin.size() != 0) {
|
|
||||||
CHECK_EQ(out_preds->size(), n);
|
|
||||||
std::copy(base_margin.begin(), base_margin.end(), out_preds->begin());
|
|
||||||
} else {
|
|
||||||
std::fill(out_preds->begin(), out_preds->end(), model.base_margin);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Predictor* Predictor::Create(std::string name) {
|
Predictor* Predictor::Create(std::string name) {
|
||||||
auto* e = ::dmlc::Registry<PredictorReg>::Get()->Find(name);
|
auto* e = ::dmlc::Registry<PredictorReg>::Get()->Find(name);
|
||||||
|
|||||||
@ -22,17 +22,6 @@ TreeUpdater* TreeUpdater::Create(const std::string& name) {
|
|||||||
return (e->body)();
|
return (e->body)();
|
||||||
}
|
}
|
||||||
|
|
||||||
void TreeUpdater::Update(HostDeviceVector<bst_gpair>* gpair,
|
|
||||||
DMatrix* data,
|
|
||||||
const std::vector<RegTree*>& trees) {
|
|
||||||
Update(gpair->data_h(), data, trees);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool TreeUpdater::UpdatePredictionCache(const DMatrix* data,
|
|
||||||
HostDeviceVector<bst_float>* out_preds) {
|
|
||||||
return UpdatePredictionCache(data, &out_preds->data_h());
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|||||||
@ -26,7 +26,7 @@ class ColMaker: public TreeUpdater {
|
|||||||
param.InitAllowUnknown(args);
|
param.InitAllowUnknown(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Update(const std::vector<bst_gpair> &gpair,
|
void Update(HostDeviceVector<bst_gpair> *gpair,
|
||||||
DMatrix* dmat,
|
DMatrix* dmat,
|
||||||
const std::vector<RegTree*> &trees) override {
|
const std::vector<RegTree*> &trees) override {
|
||||||
TStats::CheckInfo(dmat->info());
|
TStats::CheckInfo(dmat->info());
|
||||||
@ -37,7 +37,7 @@ class ColMaker: public TreeUpdater {
|
|||||||
// build tree
|
// build tree
|
||||||
for (size_t i = 0; i < trees.size(); ++i) {
|
for (size_t i = 0; i < trees.size(); ++i) {
|
||||||
Builder builder(param);
|
Builder builder(param);
|
||||||
builder.Update(gpair, dmat, trees[i]);
|
builder.Update(gpair->data_h(), dmat, trees[i]);
|
||||||
}
|
}
|
||||||
param.learning_rate = lr;
|
param.learning_rate = lr;
|
||||||
}
|
}
|
||||||
@ -806,13 +806,13 @@ class DistColMaker : public ColMaker<TStats, TConstraint> {
|
|||||||
param.InitAllowUnknown(args);
|
param.InitAllowUnknown(args);
|
||||||
pruner->Init(args);
|
pruner->Init(args);
|
||||||
}
|
}
|
||||||
void Update(const std::vector<bst_gpair> &gpair,
|
void Update(HostDeviceVector<bst_gpair> *gpair,
|
||||||
DMatrix* dmat,
|
DMatrix* dmat,
|
||||||
const std::vector<RegTree*> &trees) override {
|
const std::vector<RegTree*> &trees) override {
|
||||||
TStats::CheckInfo(dmat->info());
|
TStats::CheckInfo(dmat->info());
|
||||||
CHECK_EQ(trees.size(), 1U) << "DistColMaker: only support one tree at a time";
|
CHECK_EQ(trees.size(), 1U) << "DistColMaker: only support one tree at a time";
|
||||||
// build the tree
|
// build the tree
|
||||||
builder.Update(gpair, dmat, trees[0]);
|
builder.Update(gpair->data_h(), dmat, trees[0]);
|
||||||
//// prune the tree, note that pruner will sync the tree
|
//// prune the tree, note that pruner will sync the tree
|
||||||
pruner->Update(gpair, dmat, trees);
|
pruner->Update(gpair, dmat, trees);
|
||||||
// update position after the tree is pruned
|
// update position after the tree is pruned
|
||||||
@ -967,7 +967,7 @@ class TreeUpdaterSwitch : public TreeUpdater {
|
|||||||
inner_->Init(args);
|
inner_->Init(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Update(const std::vector<bst_gpair>& gpair,
|
void Update(HostDeviceVector<bst_gpair>* gpair,
|
||||||
DMatrix* data,
|
DMatrix* data,
|
||||||
const std::vector<RegTree*>& trees) override {
|
const std::vector<RegTree*>& trees) override {
|
||||||
CHECK(inner_ != nullptr);
|
CHECK(inner_ != nullptr);
|
||||||
|
|||||||
@ -55,7 +55,7 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
is_gmat_initialized_ = false;
|
is_gmat_initialized_ = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Update(const std::vector<bst_gpair>& gpair,
|
void Update(HostDeviceVector<bst_gpair>* gpair,
|
||||||
DMatrix* dmat,
|
DMatrix* dmat,
|
||||||
const std::vector<RegTree*>& trees) override {
|
const std::vector<RegTree*>& trees) override {
|
||||||
TStats::CheckInfo(dmat->info());
|
TStats::CheckInfo(dmat->info());
|
||||||
@ -82,13 +82,14 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
builder_.reset(new Builder(param, fhparam, std::move(pruner_)));
|
builder_.reset(new Builder(param, fhparam, std::move(pruner_)));
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < trees.size(); ++i) {
|
for (size_t i = 0; i < trees.size(); ++i) {
|
||||||
builder_->Update(gmat_, gmatb_, column_matrix_, gpair, dmat, trees[i]);
|
builder_->Update
|
||||||
|
(gmat_, gmatb_, column_matrix_, gpair, dmat, trees[i]);
|
||||||
}
|
}
|
||||||
param.learning_rate = lr;
|
param.learning_rate = lr;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UpdatePredictionCache(const DMatrix* data,
|
bool UpdatePredictionCache(const DMatrix* data,
|
||||||
std::vector<bst_float>* out_preds) override {
|
HostDeviceVector<bst_float>* out_preds) override {
|
||||||
if (!builder_ || param.subsample < 1.0f) {
|
if (!builder_ || param.subsample < 1.0f) {
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
@ -139,7 +140,7 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
virtual void Update(const GHistIndexMatrix& gmat,
|
virtual void Update(const GHistIndexMatrix& gmat,
|
||||||
const GHistIndexBlockMatrix& gmatb,
|
const GHistIndexBlockMatrix& gmatb,
|
||||||
const ColumnMatrix& column_matrix,
|
const ColumnMatrix& column_matrix,
|
||||||
const std::vector<bst_gpair>& gpair,
|
HostDeviceVector<bst_gpair>* gpair,
|
||||||
DMatrix* p_fmat,
|
DMatrix* p_fmat,
|
||||||
RegTree* p_tree) {
|
RegTree* p_tree) {
|
||||||
double gstart = dmlc::GetTime();
|
double gstart = dmlc::GetTime();
|
||||||
@ -154,8 +155,10 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
double time_evaluate_split = 0;
|
double time_evaluate_split = 0;
|
||||||
double time_apply_split = 0;
|
double time_apply_split = 0;
|
||||||
|
|
||||||
|
std::vector<bst_gpair>& gpair_h = gpair->data_h();
|
||||||
|
|
||||||
tstart = dmlc::GetTime();
|
tstart = dmlc::GetTime();
|
||||||
this->InitData(gmat, gpair, *p_fmat, *p_tree);
|
this->InitData(gmat, gpair_h, *p_fmat, *p_tree);
|
||||||
std::vector<bst_uint> feat_set = feat_index;
|
std::vector<bst_uint> feat_set = feat_index;
|
||||||
time_init_data = dmlc::GetTime() - tstart;
|
time_init_data = dmlc::GetTime() - tstart;
|
||||||
|
|
||||||
@ -165,11 +168,11 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
for (int nid = 0; nid < p_tree->param.num_roots; ++nid) {
|
for (int nid = 0; nid < p_tree->param.num_roots; ++nid) {
|
||||||
tstart = dmlc::GetTime();
|
tstart = dmlc::GetTime();
|
||||||
hist_.AddHistRow(nid);
|
hist_.AddHistRow(nid);
|
||||||
BuildHist(gpair, row_set_collection_[nid], gmat, gmatb, feat_set, hist_[nid]);
|
BuildHist(gpair_h, row_set_collection_[nid], gmat, gmatb, feat_set, hist_[nid]);
|
||||||
time_build_hist += dmlc::GetTime() - tstart;
|
time_build_hist += dmlc::GetTime() - tstart;
|
||||||
|
|
||||||
tstart = dmlc::GetTime();
|
tstart = dmlc::GetTime();
|
||||||
this->InitNewNode(nid, gmat, gpair, *p_fmat, *p_tree);
|
this->InitNewNode(nid, gmat, gpair_h, *p_fmat, *p_tree);
|
||||||
time_init_new_node += dmlc::GetTime() - tstart;
|
time_init_new_node += dmlc::GetTime() - tstart;
|
||||||
|
|
||||||
tstart = dmlc::GetTime();
|
tstart = dmlc::GetTime();
|
||||||
@ -200,17 +203,17 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
hist_.AddHistRow(cleft);
|
hist_.AddHistRow(cleft);
|
||||||
hist_.AddHistRow(cright);
|
hist_.AddHistRow(cright);
|
||||||
if (row_set_collection_[cleft].size() < row_set_collection_[cright].size()) {
|
if (row_set_collection_[cleft].size() < row_set_collection_[cright].size()) {
|
||||||
BuildHist(gpair, row_set_collection_[cleft], gmat, gmatb, feat_set, hist_[cleft]);
|
BuildHist(gpair_h, row_set_collection_[cleft], gmat, gmatb, feat_set, hist_[cleft]);
|
||||||
SubtractionTrick(hist_[cright], hist_[cleft], hist_[nid]);
|
SubtractionTrick(hist_[cright], hist_[cleft], hist_[nid]);
|
||||||
} else {
|
} else {
|
||||||
BuildHist(gpair, row_set_collection_[cright], gmat, gmatb, feat_set, hist_[cright]);
|
BuildHist(gpair_h, row_set_collection_[cright], gmat, gmatb, feat_set, hist_[cright]);
|
||||||
SubtractionTrick(hist_[cleft], hist_[cright], hist_[nid]);
|
SubtractionTrick(hist_[cleft], hist_[cright], hist_[nid]);
|
||||||
}
|
}
|
||||||
time_build_hist += dmlc::GetTime() - tstart;
|
time_build_hist += dmlc::GetTime() - tstart;
|
||||||
|
|
||||||
tstart = dmlc::GetTime();
|
tstart = dmlc::GetTime();
|
||||||
this->InitNewNode(cleft, gmat, gpair, *p_fmat, *p_tree);
|
this->InitNewNode(cleft, gmat, gpair_h, *p_fmat, *p_tree);
|
||||||
this->InitNewNode(cright, gmat, gpair, *p_fmat, *p_tree);
|
this->InitNewNode(cright, gmat, gpair_h, *p_fmat, *p_tree);
|
||||||
time_init_new_node += dmlc::GetTime() - tstart;
|
time_init_new_node += dmlc::GetTime() - tstart;
|
||||||
|
|
||||||
tstart = dmlc::GetTime();
|
tstart = dmlc::GetTime();
|
||||||
@ -293,8 +296,8 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline bool UpdatePredictionCache(const DMatrix* data,
|
inline bool UpdatePredictionCache(const DMatrix* data,
|
||||||
std::vector<bst_float>* p_out_preds) {
|
HostDeviceVector<bst_float>* p_out_preds) {
|
||||||
std::vector<bst_float>& out_preds = *p_out_preds;
|
std::vector<bst_float>& out_preds = p_out_preds->data_h();
|
||||||
|
|
||||||
// p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in
|
// p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in
|
||||||
// conjunction with Update().
|
// conjunction with Update().
|
||||||
|
|||||||
@ -512,7 +512,7 @@ class GPUMaker : public TreeUpdater {
|
|||||||
maxLeaves = 1 << param.max_depth;
|
maxLeaves = 1 << param.max_depth;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Update(const std::vector<bst_gpair>& gpair, DMatrix* dmat,
|
void Update(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
|
||||||
const std::vector<RegTree*>& trees) override {
|
const std::vector<RegTree*>& trees) override {
|
||||||
GradStats::CheckInfo(dmat->info());
|
GradStats::CheckInfo(dmat->info());
|
||||||
// rescale learning rate according to size of trees
|
// rescale learning rate according to size of trees
|
||||||
@ -530,7 +530,7 @@ class GPUMaker : public TreeUpdater {
|
|||||||
param.learning_rate = lr;
|
param.learning_rate = lr;
|
||||||
}
|
}
|
||||||
/// @note: Update should be only after Init!!
|
/// @note: Update should be only after Init!!
|
||||||
void UpdateTree(const std::vector<bst_gpair>& gpair, DMatrix* dmat,
|
void UpdateTree(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
|
||||||
RegTree* hTree) {
|
RegTree* hTree) {
|
||||||
if (!allocated) {
|
if (!allocated) {
|
||||||
setupOneTimeData(dmat);
|
setupOneTimeData(dmat);
|
||||||
@ -687,11 +687,11 @@ class GPUMaker : public TreeUpdater {
|
|||||||
assignColIds<<<nCols, 512>>>(colIds.data(), colOffsets.data());
|
assignColIds<<<nCols, 512>>>(colIds.data(), colOffsets.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
void transferGrads(const std::vector<bst_gpair>& gpair) {
|
void transferGrads(HostDeviceVector<bst_gpair>* gpair) {
|
||||||
// HACK
|
// HACK
|
||||||
dh::safe_cuda(cudaMemcpy(gradsInst.data(), &(gpair[0]),
|
dh::safe_cuda(cudaMemcpy(gradsInst.data(), gpair->ptr_d(param.gpu_id),
|
||||||
sizeof(bst_gpair) * nRows,
|
sizeof(bst_gpair) * nRows,
|
||||||
cudaMemcpyHostToDevice));
|
cudaMemcpyDefault));
|
||||||
// evaluate the full-grad reduction for the root node
|
// evaluate the full-grad reduction for the root node
|
||||||
dh::sumReduction<bst_gpair>(tmp_mem, gradsInst, gradSums, nRows);
|
dh::sumReduction<bst_gpair>(tmp_mem, gradsInst, gradSums, nRows);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -506,27 +506,9 @@ class GPUHistMaker : public TreeUpdater {
|
|||||||
monitor.Init("updater_gpu_hist", param.debug_verbose);
|
monitor.Init("updater_gpu_hist", param.debug_verbose);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Update(const std::vector<bst_gpair>& gpair, DMatrix* dmat,
|
|
||||||
const std::vector<RegTree*>& trees) override {
|
|
||||||
monitor.Start("Update", dList);
|
|
||||||
// TODO(canonizer): move it into the class if this ever becomes a bottleneck
|
|
||||||
HostDeviceVector<bst_gpair> gpair_d(gpair.size(), param.gpu_id);
|
|
||||||
dh::safe_cuda(cudaSetDevice(param.gpu_id));
|
|
||||||
thrust::copy(gpair.begin(), gpair.end(), gpair_d.tbegin(param.gpu_id));
|
|
||||||
Update(&gpair_d, dmat, trees);
|
|
||||||
monitor.Stop("Update", dList);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Update(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
|
void Update(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
|
||||||
const std::vector<RegTree*>& trees) override {
|
const std::vector<RegTree*>& trees) override {
|
||||||
monitor.Start("Update", dList);
|
monitor.Start("Update", dList);
|
||||||
UpdateHelper(gpair, dmat, trees);
|
|
||||||
monitor.Stop("Update", dList);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
void UpdateHelper(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
|
|
||||||
const std::vector<RegTree*>& trees) {
|
|
||||||
GradStats::CheckInfo(dmat->info());
|
GradStats::CheckInfo(dmat->info());
|
||||||
// rescale learning rate according to size of trees
|
// rescale learning rate according to size of trees
|
||||||
float lr = param.learning_rate;
|
float lr = param.learning_rate;
|
||||||
@ -541,9 +523,9 @@ class GPUHistMaker : public TreeUpdater {
|
|||||||
LOG(FATAL) << "GPU plugin exception: " << e.what() << std::endl;
|
LOG(FATAL) << "GPU plugin exception: " << e.what() << std::endl;
|
||||||
}
|
}
|
||||||
param.learning_rate = lr;
|
param.learning_rate = lr;
|
||||||
|
monitor.Stop("Update", dList);
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
|
||||||
void InitDataOnce(DMatrix* dmat) {
|
void InitDataOnce(DMatrix* dmat) {
|
||||||
info = &dmat->info();
|
info = &dmat->info();
|
||||||
monitor.Start("Quantiles", dList);
|
monitor.Start("Quantiles", dList);
|
||||||
@ -876,16 +858,6 @@ class GPUHistMaker : public TreeUpdater {
|
|||||||
omp_set_num_threads(nthread);
|
omp_set_num_threads(nthread);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UpdatePredictionCache(const DMatrix* data,
|
|
||||||
std::vector<bst_float>* p_out_preds) override {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool UpdatePredictionCache(
|
|
||||||
const DMatrix* data, HostDeviceVector<bst_float>* p_out_preds) override {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ExpandEntry {
|
struct ExpandEntry {
|
||||||
int nid;
|
int nid;
|
||||||
int depth;
|
int depth;
|
||||||
|
|||||||
@ -21,7 +21,7 @@ DMLC_REGISTRY_FILE_TAG(updater_histmaker);
|
|||||||
template<typename TStats>
|
template<typename TStats>
|
||||||
class HistMaker: public BaseMaker {
|
class HistMaker: public BaseMaker {
|
||||||
public:
|
public:
|
||||||
void Update(const std::vector<bst_gpair> &gpair,
|
void Update(HostDeviceVector<bst_gpair> *gpair,
|
||||||
DMatrix *p_fmat,
|
DMatrix *p_fmat,
|
||||||
const std::vector<RegTree*> &trees) override {
|
const std::vector<RegTree*> &trees) override {
|
||||||
TStats::CheckInfo(p_fmat->info());
|
TStats::CheckInfo(p_fmat->info());
|
||||||
@ -30,7 +30,7 @@ class HistMaker: public BaseMaker {
|
|||||||
param.learning_rate = lr / trees.size();
|
param.learning_rate = lr / trees.size();
|
||||||
// build tree
|
// build tree
|
||||||
for (size_t i = 0; i < trees.size(); ++i) {
|
for (size_t i = 0; i < trees.size(); ++i) {
|
||||||
this->Update(gpair, p_fmat, trees[i]);
|
this->Update(gpair->data_h(), p_fmat, trees[i]);
|
||||||
}
|
}
|
||||||
param.learning_rate = lr;
|
param.learning_rate = lr;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -29,7 +29,7 @@ class TreePruner: public TreeUpdater {
|
|||||||
syncher->Init(args);
|
syncher->Init(args);
|
||||||
}
|
}
|
||||||
// update the tree, do pruning
|
// update the tree, do pruning
|
||||||
void Update(const std::vector<bst_gpair> &gpair,
|
void Update(HostDeviceVector<bst_gpair> *gpair,
|
||||||
DMatrix *p_fmat,
|
DMatrix *p_fmat,
|
||||||
const std::vector<RegTree*> &trees) override {
|
const std::vector<RegTree*> &trees) override {
|
||||||
// rescale learning rate according to size of trees
|
// rescale learning rate according to size of trees
|
||||||
|
|||||||
@ -25,10 +25,11 @@ class TreeRefresher: public TreeUpdater {
|
|||||||
param.InitAllowUnknown(args);
|
param.InitAllowUnknown(args);
|
||||||
}
|
}
|
||||||
// update the tree, do pruning
|
// update the tree, do pruning
|
||||||
void Update(const std::vector<bst_gpair> &gpair,
|
void Update(HostDeviceVector<bst_gpair> *gpair,
|
||||||
DMatrix *p_fmat,
|
DMatrix *p_fmat,
|
||||||
const std::vector<RegTree*> &trees) override {
|
const std::vector<RegTree*> &trees) override {
|
||||||
if (trees.size() == 0) return;
|
if (trees.size() == 0) return;
|
||||||
|
std::vector<bst_gpair> &gpair_h = gpair->data_h();
|
||||||
// number of threads
|
// number of threads
|
||||||
// thread temporal space
|
// thread temporal space
|
||||||
std::vector<std::vector<TStats> > stemp;
|
std::vector<std::vector<TStats> > stemp;
|
||||||
@ -71,7 +72,7 @@ class TreeRefresher: public TreeUpdater {
|
|||||||
feats.Fill(inst);
|
feats.Fill(inst);
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
for (size_t j = 0; j < trees.size(); ++j) {
|
for (size_t j = 0; j < trees.size(); ++j) {
|
||||||
AddStats(*trees[j], feats, gpair, info, ridx,
|
AddStats(*trees[j], feats, gpair_h, info, ridx,
|
||||||
dmlc::BeginPtr(stemp[tid]) + offset);
|
dmlc::BeginPtr(stemp[tid]) + offset);
|
||||||
offset += trees[j]->param.num_nodes;
|
offset += trees[j]->param.num_nodes;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -22,7 +22,7 @@ DMLC_REGISTRY_FILE_TAG(updater_skmaker);
|
|||||||
|
|
||||||
class SketchMaker: public BaseMaker {
|
class SketchMaker: public BaseMaker {
|
||||||
public:
|
public:
|
||||||
void Update(const std::vector<bst_gpair> &gpair,
|
void Update(HostDeviceVector<bst_gpair> *gpair,
|
||||||
DMatrix *p_fmat,
|
DMatrix *p_fmat,
|
||||||
const std::vector<RegTree*> &trees) override {
|
const std::vector<RegTree*> &trees) override {
|
||||||
// rescale learning rate according to size of trees
|
// rescale learning rate according to size of trees
|
||||||
@ -30,7 +30,7 @@ class SketchMaker: public BaseMaker {
|
|||||||
param.learning_rate = lr / trees.size();
|
param.learning_rate = lr / trees.size();
|
||||||
// build tree
|
// build tree
|
||||||
for (size_t i = 0; i < trees.size(); ++i) {
|
for (size_t i = 0; i < trees.size(); ++i) {
|
||||||
this->Update(gpair, p_fmat, trees[i]);
|
this->Update(gpair->data_h(), p_fmat, trees[i]);
|
||||||
}
|
}
|
||||||
param.learning_rate = lr;
|
param.learning_rate = lr;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -23,7 +23,7 @@ class TreeSyncher: public TreeUpdater {
|
|||||||
public:
|
public:
|
||||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {}
|
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {}
|
||||||
|
|
||||||
void Update(const std::vector<bst_gpair> &gpair,
|
void Update(HostDeviceVector<bst_gpair> *gpair,
|
||||||
DMatrix* dmat,
|
DMatrix* dmat,
|
||||||
const std::vector<RegTree*> &trees) override {
|
const std::vector<RegTree*> &trees) override {
|
||||||
if (rabit::GetWorldSize() == 1) return;
|
if (rabit::GetWorldSize() == 1) return;
|
||||||
|
|||||||
@ -38,10 +38,13 @@ void CheckObjFunction(xgboost::ObjFunction * obj,
|
|||||||
info.labels = labels;
|
info.labels = labels;
|
||||||
info.weights = weights;
|
info.weights = weights;
|
||||||
|
|
||||||
std::vector<xgboost::bst_gpair> gpair;
|
xgboost::HostDeviceVector<xgboost::bst_float> in_preds(preds);
|
||||||
obj->GetGradient(preds, info, 1, &gpair);
|
|
||||||
|
|
||||||
ASSERT_EQ(gpair.size(), preds.size());
|
xgboost::HostDeviceVector<xgboost::bst_gpair> out_gpair;
|
||||||
|
obj->GetGradient(&in_preds, info, 1, &out_gpair);
|
||||||
|
std::vector<xgboost::bst_gpair>& gpair = out_gpair.data_h();
|
||||||
|
|
||||||
|
ASSERT_EQ(gpair.size(), in_preds.size());
|
||||||
for (int i = 0; i < static_cast<int>(gpair.size()); ++i) {
|
for (int i = 0; i < static_cast<int>(gpair.size()); ++i) {
|
||||||
EXPECT_NEAR(gpair[i].GetGrad(), out_grad[i], 0.01)
|
EXPECT_NEAR(gpair[i].GetGrad(), out_grad[i], 0.01)
|
||||||
<< "Unexpected grad for pred=" << preds[i] << " label=" << labels[i]
|
<< "Unexpected grad for pred=" << preds[i] << " label=" << labels[i]
|
||||||
|
|||||||
@ -46,10 +46,11 @@ TEST(Objective, LogisticRegressionBasic) {
|
|||||||
<< "Expected error when base_score not in range [0,1f] for LogisticRegression";
|
<< "Expected error when base_score not in range [0,1f] for LogisticRegression";
|
||||||
|
|
||||||
// test PredTransform
|
// test PredTransform
|
||||||
std::vector<xgboost::bst_float> preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
||||||
std::vector<xgboost::bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f};
|
std::vector<xgboost::bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f};
|
||||||
obj->PredTransform(&preds);
|
obj->PredTransform(&io_preds);
|
||||||
for (int i = 0; i < static_cast<int>(preds.size()); ++i) {
|
auto& preds = io_preds.data_h();
|
||||||
|
for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) {
|
||||||
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -94,10 +95,11 @@ TEST(Objective, PoissonRegressionBasic) {
|
|||||||
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
|
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
|
||||||
|
|
||||||
// test PredTransform
|
// test PredTransform
|
||||||
std::vector<xgboost::bst_float> preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
||||||
std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
|
std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
|
||||||
obj->PredTransform(&preds);
|
obj->PredTransform(&io_preds);
|
||||||
for (int i = 0; i < static_cast<int>(preds.size()); ++i) {
|
auto& preds = io_preds.data_h();
|
||||||
|
for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) {
|
||||||
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -129,10 +131,11 @@ TEST(Objective, GammaRegressionBasic) {
|
|||||||
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
|
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
|
||||||
|
|
||||||
// test PredTransform
|
// test PredTransform
|
||||||
std::vector<xgboost::bst_float> preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
||||||
std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
|
std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
|
||||||
obj->PredTransform(&preds);
|
obj->PredTransform(&io_preds);
|
||||||
for (int i = 0; i < static_cast<int>(preds.size()); ++i) {
|
auto& preds = io_preds.data_h();
|
||||||
|
for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) {
|
||||||
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -165,10 +168,11 @@ TEST(Objective, TweedieRegressionBasic) {
|
|||||||
EXPECT_NEAR(obj->ProbToMargin(0.9f), 0.89f, 0.01f);
|
EXPECT_NEAR(obj->ProbToMargin(0.9f), 0.89f, 0.01f);
|
||||||
|
|
||||||
// test PredTransform
|
// test PredTransform
|
||||||
std::vector<xgboost::bst_float> preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
||||||
std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
|
std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
|
||||||
obj->PredTransform(&preds);
|
obj->PredTransform(&io_preds);
|
||||||
for (int i = 0; i < static_cast<int>(preds.size()); ++i) {
|
auto& preds = io_preds.data_h();
|
||||||
|
for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) {
|
||||||
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -48,10 +48,11 @@ TEST(Objective, GPULogisticRegressionBasic) {
|
|||||||
<< "Expected error when base_score not in range [0,1f] for LogisticRegression";
|
<< "Expected error when base_score not in range [0,1f] for LogisticRegression";
|
||||||
|
|
||||||
// test PredTransform
|
// test PredTransform
|
||||||
std::vector<xgboost::bst_float> preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
||||||
std::vector<xgboost::bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f};
|
std::vector<xgboost::bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f};
|
||||||
obj->PredTransform(&preds);
|
obj->PredTransform(&io_preds);
|
||||||
for (int i = 0; i < static_cast<int>(preds.size()); ++i) {
|
auto& preds = io_preds.data_h();
|
||||||
|
for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) {
|
||||||
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -24,10 +24,11 @@ TEST(cpu_predictor, Test) {
|
|||||||
auto dmat = CreateDMatrix(n_row, n_col, 0);
|
auto dmat = CreateDMatrix(n_row, n_col, 0);
|
||||||
|
|
||||||
// Test predict batch
|
// Test predict batch
|
||||||
std::vector<float> out_predictions;
|
HostDeviceVector<float> out_predictions;
|
||||||
cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
|
cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
|
||||||
|
std::vector<float>& out_predictions_h = out_predictions.data_h();
|
||||||
for (int i = 0; i < out_predictions.size(); i++) {
|
for (int i = 0; i < out_predictions.size(); i++) {
|
||||||
ASSERT_EQ(out_predictions[i], 1.5);
|
ASSERT_EQ(out_predictions_h[i], 1.5);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test predict instance
|
// Test predict instance
|
||||||
|
|||||||
@ -33,13 +33,15 @@ TEST(gpu_predictor, Test) {
|
|||||||
auto dmat = CreateDMatrix(n_row, n_col, 0);
|
auto dmat = CreateDMatrix(n_row, n_col, 0);
|
||||||
|
|
||||||
// Test predict batch
|
// Test predict batch
|
||||||
std::vector<float> gpu_out_predictions;
|
HostDeviceVector<float> gpu_out_predictions;
|
||||||
std::vector<float> cpu_out_predictions;
|
HostDeviceVector<float> cpu_out_predictions;
|
||||||
gpu_predictor->PredictBatch(dmat.get(), &gpu_out_predictions, model, 0);
|
gpu_predictor->PredictBatch(dmat.get(), &gpu_out_predictions, model, 0);
|
||||||
cpu_predictor->PredictBatch(dmat.get(), &cpu_out_predictions, model, 0);
|
cpu_predictor->PredictBatch(dmat.get(), &cpu_out_predictions, model, 0);
|
||||||
|
std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.data_h();
|
||||||
|
std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.data_h();
|
||||||
float abs_tolerance = 0.001;
|
float abs_tolerance = 0.001;
|
||||||
for (int i = 0; i < gpu_out_predictions.size(); i++) {
|
for (int i = 0; i < gpu_out_predictions.size(); i++) {
|
||||||
ASSERT_LT(std::abs(gpu_out_predictions[i] - cpu_out_predictions[i]),
|
ASSERT_LT(std::abs(gpu_out_predictions_h[i] - cpu_out_predictions_h[i]),
|
||||||
abs_tolerance);
|
abs_tolerance);
|
||||||
}
|
}
|
||||||
// Test predict instance
|
// Test predict instance
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user