Objective function evaluation on GPU with minimal PCIe transfers (#2935)
* Added GPU objective function and no-copy interface. - xgboost::HostDeviceVector<T> syncs automatically between host and device - no-copy interfaces have been added - default implementations just sync the data to host and call the implementations with std::vector - GPU objective function, predictor, histogram updater process data directly on GPU
This commit is contained in:
@@ -21,6 +21,19 @@ GradientBooster* GradientBooster::Create(
|
||||
}
|
||||
return (e->body)(cache_mats, base_margin);
|
||||
}
|
||||
|
||||
void GradientBooster::DoBoost(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_gpair>* in_gpair,
|
||||
ObjFunction* obj) {
|
||||
DoBoost(p_fmat, &in_gpair->data_h(), obj);
|
||||
}
|
||||
|
||||
void GradientBooster::PredictBatch(DMatrix* dmat,
|
||||
HostDeviceVector<bst_float>* out_preds,
|
||||
unsigned ntree_limit) {
|
||||
PredictBatch(dmat, &out_preds->data_h(), ntree_limit);
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
#include "../common/common.h"
|
||||
#include "../common/host_device_vector.h"
|
||||
#include "../common/random.h"
|
||||
#include "gbtree_model.h"
|
||||
#include "../common/timer.h"
|
||||
@@ -182,35 +183,13 @@ class GBTree : public GradientBooster {
|
||||
void DoBoost(DMatrix* p_fmat,
|
||||
std::vector<bst_gpair>* in_gpair,
|
||||
ObjFunction* obj) override {
|
||||
const std::vector<bst_gpair>& gpair = *in_gpair;
|
||||
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
|
||||
const int ngroup = model_.param.num_output_group;
|
||||
monitor.Start("BoostNewTrees");
|
||||
if (ngroup == 1) {
|
||||
std::vector<std::unique_ptr<RegTree> > ret;
|
||||
BoostNewTrees(gpair, p_fmat, 0, &ret);
|
||||
new_trees.push_back(std::move(ret));
|
||||
} else {
|
||||
CHECK_EQ(gpair.size() % ngroup, 0U)
|
||||
<< "must have exactly ngroup*nrow gpairs";
|
||||
std::vector<bst_gpair> tmp(gpair.size() / ngroup);
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
bst_omp_uint nsize = static_cast<bst_omp_uint>(tmp.size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
tmp[i] = gpair[i * ngroup + gid];
|
||||
}
|
||||
std::vector<std::unique_ptr<RegTree> > ret;
|
||||
BoostNewTrees(tmp, p_fmat, gid, &ret);
|
||||
new_trees.push_back(std::move(ret));
|
||||
}
|
||||
}
|
||||
monitor.Stop("BoostNewTrees");
|
||||
monitor.Start("CommitModel");
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
this->CommitModel(std::move(new_trees[gid]), gid);
|
||||
}
|
||||
monitor.Stop("CommitModel");
|
||||
DoBoostHelper(p_fmat, in_gpair, obj);
|
||||
}
|
||||
|
||||
void DoBoost(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_gpair>* in_gpair,
|
||||
ObjFunction* obj) override {
|
||||
DoBoostHelper(p_fmat, in_gpair, obj);
|
||||
}
|
||||
|
||||
void PredictBatch(DMatrix* p_fmat,
|
||||
@@ -219,6 +198,12 @@ class GBTree : public GradientBooster {
|
||||
predictor->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
|
||||
}
|
||||
|
||||
void PredictBatch(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_float>* out_preds,
|
||||
unsigned ntree_limit) override {
|
||||
predictor->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
|
||||
}
|
||||
|
||||
void PredictInstance(const SparseBatch::Inst& inst,
|
||||
std::vector<bst_float>* out_preds,
|
||||
unsigned ntree_limit,
|
||||
@@ -257,9 +242,48 @@ class GBTree : public GradientBooster {
|
||||
updaters.push_back(std::move(up));
|
||||
}
|
||||
}
|
||||
|
||||
// TVec is either std::vector<bst_gpair> or HostDeviceVector<bst_gpair>
|
||||
template <typename TVec>
|
||||
void DoBoostHelper(DMatrix* p_fmat,
|
||||
TVec* in_gpair,
|
||||
ObjFunction* obj) {
|
||||
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
|
||||
const int ngroup = model_.param.num_output_group;
|
||||
monitor.Start("BoostNewTrees");
|
||||
if (ngroup == 1) {
|
||||
std::vector<std::unique_ptr<RegTree> > ret;
|
||||
BoostNewTrees(in_gpair, p_fmat, 0, &ret);
|
||||
new_trees.push_back(std::move(ret));
|
||||
} else {
|
||||
CHECK_EQ(in_gpair->size() % ngroup, 0U)
|
||||
<< "must have exactly ngroup*nrow gpairs";
|
||||
std::vector<bst_gpair> tmp(in_gpair->size() / ngroup);
|
||||
auto& gpair_h = HostDeviceVector<bst_gpair>::data_h(in_gpair);
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
bst_omp_uint nsize = static_cast<bst_omp_uint>(tmp.size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
tmp[i] = gpair_h[i * ngroup + gid];
|
||||
}
|
||||
std::vector<std::unique_ptr<RegTree> > ret;
|
||||
BoostNewTrees(&tmp, p_fmat, gid, &ret);
|
||||
new_trees.push_back(std::move(ret));
|
||||
}
|
||||
}
|
||||
monitor.Stop("BoostNewTrees");
|
||||
monitor.Start("CommitModel");
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
this->CommitModel(std::move(new_trees[gid]), gid);
|
||||
}
|
||||
monitor.Stop("CommitModel");
|
||||
}
|
||||
|
||||
// do group specific group
|
||||
// TVec is either const std::vector<bst_gpair> or HostDeviceVector<bst_gpair>
|
||||
template <typename TVec>
|
||||
inline void
|
||||
BoostNewTrees(const std::vector<bst_gpair> &gpair,
|
||||
BoostNewTrees(TVec* gpair,
|
||||
DMatrix *p_fmat,
|
||||
int bst_group,
|
||||
std::vector<std::unique_ptr<RegTree> >* ret) {
|
||||
@@ -286,9 +310,24 @@ class GBTree : public GradientBooster {
|
||||
}
|
||||
// update the trees
|
||||
for (auto& up : updaters) {
|
||||
up->Update(gpair, p_fmat, new_trees);
|
||||
UpdateHelper(up.get(), gpair, p_fmat, new_trees);
|
||||
}
|
||||
}
|
||||
|
||||
void UpdateHelper(TreeUpdater* updater,
|
||||
std::vector<bst_gpair>* gpair,
|
||||
DMatrix *p_fmat,
|
||||
const std::vector<RegTree*>& new_trees) {
|
||||
updater->Update(*gpair, p_fmat, new_trees);
|
||||
}
|
||||
|
||||
void UpdateHelper(TreeUpdater* updater,
|
||||
HostDeviceVector<bst_gpair>* gpair,
|
||||
DMatrix *p_fmat,
|
||||
const std::vector<RegTree*>& new_trees) {
|
||||
updater->Update(gpair, p_fmat, new_trees);
|
||||
}
|
||||
|
||||
// commit new trees all at once
|
||||
virtual void
|
||||
CommitModel(std::vector<std::unique_ptr<RegTree> >&& new_trees,
|
||||
|
||||
Reference in New Issue
Block a user