Replaced std::vector-based interfaces with HostDeviceVector-based interfaces. (#3116)

* Replaced std::vector-based interfaces with HostDeviceVector-based interfaces.

- replacement was performed in the learner, boosters, predictors,
  updaters, and objective functions
- only interfaces used in training were replaced;
  interfaces like PredictInstance() still use std::vector
- refactoring necessary for replacement of interfaces was also performed,
  such as using HostDeviceVector in prediction cache

* HostDeviceVector-based interfaces for custom objective function example plugin.
This commit is contained in:
Andrew V. Adinetz
2018-02-28 01:00:04 +01:00
committed by Rory Mitchell
parent 11bfa8584d
commit d5992dd881
38 changed files with 371 additions and 519 deletions

View File

@@ -104,14 +104,43 @@ class CPUPredictor : public Predictor {
tree_begin, ntree_limit);
}
public:
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, int tree_begin,
unsigned ntree_limit = 0) override {
PredictBatch(dmat, &out_preds->data_h(), model, tree_begin, ntree_limit);
bool PredictFromCache(DMatrix* dmat,
HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model,
unsigned ntree_limit) {
if (ntree_limit == 0 ||
ntree_limit * model.param.num_output_group >= model.trees.size()) {
auto it = cache_.find(dmat);
if (it != cache_.end()) {
HostDeviceVector<bst_float>& y = it->second.predictions;
if (y.size() != 0) {
out_preds->resize(y.size());
std::copy(y.data_h().begin(), y.data_h().end(),
out_preds->data_h().begin());
return true;
}
}
}
return false;
}
void PredictBatch(DMatrix* dmat, std::vector<bst_float>* out_preds,
void InitOutPredictions(const MetaInfo& info,
HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model) const {
size_t n = model.param.num_output_group * info.num_row;
const std::vector<bst_float>& base_margin = info.base_margin;
out_preds->resize(n);
std::vector<bst_float>& out_preds_h = out_preds->data_h();
if (base_margin.size() != 0) {
CHECK_EQ(out_preds->size(), n);
std::copy(base_margin.begin(), base_margin.end(), out_preds_h.begin());
} else {
std::fill(out_preds_h.begin(), out_preds_h.end(), model.base_margin);
}
}
public:
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, int tree_begin,
unsigned ntree_limit = 0) override {
if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) {
@@ -125,12 +154,14 @@ class CPUPredictor : public Predictor {
ntree_limit = static_cast<unsigned>(model.trees.size());
}
this->PredLoopInternal(dmat, out_preds, model, tree_begin, ntree_limit);
this->PredLoopInternal(dmat, &out_preds->data_h(), model,
tree_begin, ntree_limit);
}
void UpdatePredictionCache(const gbm::GBTreeModel& model,
std::vector<std::unique_ptr<TreeUpdater>>* updaters,
int num_new_trees) override {
void UpdatePredictionCache(
const gbm::GBTreeModel& model,
std::vector<std::unique_ptr<TreeUpdater>>* updaters,
int num_new_trees) override {
int old_ntree = model.trees.size() - num_new_trees;
// update cache entry
for (auto& kv : cache_) {
@@ -138,7 +169,7 @@ class CPUPredictor : public Predictor {
if (e.predictions.size() == 0) {
InitOutPredictions(e.data->info(), &(e.predictions), model);
PredLoopInternal(e.data.get(), &(e.predictions), model, 0,
PredLoopInternal(e.data.get(), &(e.predictions.data_h()), model, 0,
model.trees.size());
} else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
num_new_trees == 1 &&
@@ -146,7 +177,7 @@ class CPUPredictor : public Predictor {
&(e.predictions))) {
{} // do nothing
} else {
PredLoopInternal(e.data.get(), &(e.predictions), model, old_ntree,
PredLoopInternal(e.data.get(), &(e.predictions.data_h()), model, old_ntree,
model.trees.size());
}
}

View File

@@ -256,8 +256,6 @@ class GPUPredictor : public xgboost::Predictor {
HostDeviceVector<bst_float> predictions;
};
std::unordered_map<DMatrix*, DevicePredictionCacheEntry> device_cache_;
private:
void DevicePredictInternal(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, size_t tree_begin,
@@ -337,25 +335,16 @@ class GPUPredictor : public xgboost::Predictor {
public:
GPUPredictor() : cpu_predictor(Predictor::Create("cpu_predictor")) {}
void PredictBatch(DMatrix* dmat, std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model, int tree_begin,
unsigned ntree_limit = 0) override {
HostDeviceVector<bst_float> out_preds_d;
PredictBatch(dmat, &out_preds_d, model, tree_begin, ntree_limit);
out_preds->resize(out_preds_d.size());
thrust::copy(out_preds_d.tbegin(param.gpu_id),
out_preds_d.tend(param.gpu_id), out_preds->begin());
}
void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, int tree_begin,
unsigned ntree_limit = 0) override {
if (this->PredictFromCacheDevice(dmat, out_preds, model, ntree_limit)) {
if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) {
return;
}
this->InitOutPredictionsDevice(dmat->info(), out_preds, model);
this->InitOutPredictions(dmat->info(), out_preds, model);
int tree_end = ntree_limit * model.param.num_output_group;
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
tree_end = static_cast<unsigned>(model.trees.size());
}
@@ -363,13 +352,13 @@ class GPUPredictor : public xgboost::Predictor {
DevicePredictInternal(dmat, out_preds, model, tree_begin, tree_end);
}
void InitOutPredictionsDevice(const MetaInfo& info,
protected:
void InitOutPredictions(const MetaInfo& info,
HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model) const {
size_t n = model.param.num_output_group * info.num_row;
const std::vector<bst_float>& base_margin = info.base_margin;
out_preds->resize(n, param.gpu_id);
out_preds->resize(n, 0.0f, param.gpu_id);
if (base_margin.size() != 0) {
CHECK_EQ(out_preds->size(), n);
thrust::copy(base_margin.begin(), base_margin.end(), out_preds->tbegin(param.gpu_id));
@@ -380,29 +369,16 @@ class GPUPredictor : public xgboost::Predictor {
}
bool PredictFromCache(DMatrix* dmat,
std::vector<bst_float>* out_preds,
HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model,
unsigned ntree_limit) {
HostDeviceVector<bst_float> out_preds_d(0, -1);
bool result = PredictFromCacheDevice(dmat, &out_preds_d, model, ntree_limit);
if (!result) return false;
out_preds->resize(out_preds_d.size(), param.gpu_id);
thrust::copy(out_preds_d.tbegin(param.gpu_id),
out_preds_d.tend(param.gpu_id), out_preds->begin());
return true;
}
bool PredictFromCacheDevice(DMatrix* dmat,
HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model,
unsigned ntree_limit) {
if (ntree_limit == 0 ||
ntree_limit * model.param.num_output_group >= model.trees.size()) {
auto it = device_cache_.find(dmat);
if (it != device_cache_.end()) {
auto it = cache_.find(dmat);
if (it != cache_.end()) {
HostDeviceVector<bst_float>& y = it->second.predictions;
if (y.size() != 0) {
out_preds->resize(y.size(), param.gpu_id);
out_preds->resize(y.size(), 0.0f, param.gpu_id);
thrust::copy(y.tbegin(param.gpu_id), y.tend(param.gpu_id),
out_preds->tbegin(param.gpu_id));
return true;
@@ -418,15 +394,15 @@ class GPUPredictor : public xgboost::Predictor {
int num_new_trees) override {
auto old_ntree = model.trees.size() - num_new_trees;
// update cache entry
for (auto& kv : device_cache_) {
DevicePredictionCacheEntry& e = kv.second;
for (auto& kv : cache_) {
PredictionCacheEntry& e = kv.second;
DMatrix* dmat = kv.first;
HostDeviceVector<bst_float>& predictions = e.predictions;
if (predictions.size() == 0) {
// ensure that the device in predictions is correct
predictions.resize(0, param.gpu_id);
cpu_predictor->PredictBatch(dmat, &predictions.data_h(), model, 0,
predictions.resize(0, 0.0f, param.gpu_id);
cpu_predictor->PredictBatch(dmat, &predictions, model, 0,
static_cast<bst_uint>(model.trees.size()));
} else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
num_new_trees == 1 &&
@@ -477,8 +453,6 @@ class GPUPredictor : public xgboost::Predictor {
Predictor::Init(cfg, cache);
cpu_predictor->Init(cfg, cache);
param.InitAllowUnknown(cfg);
for (const std::shared_ptr<DMatrix>& d : cache)
device_cache_[d.get()].data = d;
max_shared_memory_bytes = dh::max_shared_memory(param.gpu_id);
}

View File

@@ -11,43 +11,8 @@ namespace xgboost {
void Predictor::Init(
const std::vector<std::pair<std::string, std::string>>& cfg,
const std::vector<std::shared_ptr<DMatrix>>& cache) {
for (const std::shared_ptr<DMatrix>& d : cache) {
PredictionCacheEntry e;
e.data = d;
cache_[d.get()] = std::move(e);
}
}
bool Predictor::PredictFromCache(DMatrix* dmat,
std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model,
unsigned ntree_limit) {
if (ntree_limit == 0 ||
ntree_limit * model.param.num_output_group >= model.trees.size()) {
auto it = cache_.find(dmat);
if (it != cache_.end()) {
std::vector<bst_float>& y = it->second.predictions;
if (y.size() != 0) {
out_preds->resize(y.size());
std::copy(y.begin(), y.end(), out_preds->begin());
return true;
}
}
}
return false;
}
void Predictor::InitOutPredictions(const MetaInfo& info,
std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model) const {
size_t n = model.param.num_output_group * info.num_row;
const std::vector<bst_float>& base_margin = info.base_margin;
out_preds->resize(n);
if (base_margin.size() != 0) {
CHECK_EQ(out_preds->size(), n);
std::copy(base_margin.begin(), base_margin.end(), out_preds->begin());
} else {
std::fill(out_preds->begin(), out_preds->end(), model.base_margin);
}
for (const std::shared_ptr<DMatrix>& d : cache)
cache_[d.get()].data = d;
}
Predictor* Predictor::Create(std::string name) {
auto* e = ::dmlc::Registry<PredictorReg>::Get()->Find(name);