[GPU-Plugin] Add GPU accelerated prediction (#2593)
* [GPU-Plugin] Add GPU accelerated prediction * Improve allocation message * Update documentation * Resolve linker error for predictor * Add unit tests
This commit is contained in:
@@ -45,6 +45,7 @@ struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
|
||||
int process_type;
|
||||
// flag to print out detailed breakdown of runtime
|
||||
int debug_verbose;
|
||||
std::string predictor;
|
||||
// declare parameters
|
||||
DMLC_DECLARE_PARAMETER(GBTreeTrainParam) {
|
||||
DMLC_DECLARE_FIELD(num_parallel_tree)
|
||||
@@ -67,6 +68,9 @@ struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
|
||||
.describe("flag to print out detailed breakdown of runtime");
|
||||
// add alias
|
||||
DMLC_DECLARE_ALIAS(updater_seq, updater);
|
||||
DMLC_DECLARE_FIELD(predictor)
|
||||
.set_default("cpu_predictor")
|
||||
.describe("Predictor algorithm type");
|
||||
}
|
||||
};
|
||||
|
||||
@@ -130,13 +134,10 @@ struct CacheEntry {
|
||||
// gradient boosted trees
|
||||
class GBTree : public GradientBooster {
|
||||
public:
|
||||
explicit GBTree(bst_float base_margin)
|
||||
: model_(base_margin),
|
||||
predictor(
|
||||
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor"))) {}
|
||||
explicit GBTree(bst_float base_margin) : model_(base_margin) {}
|
||||
|
||||
void InitCache(const std::vector<std::shared_ptr<DMatrix> > &cache) {
|
||||
predictor->InitCache(cache);
|
||||
cache_ = cache;
|
||||
}
|
||||
|
||||
void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
|
||||
@@ -153,6 +154,10 @@ class GBTree : public GradientBooster {
|
||||
if (tparam.process_type == kUpdate) {
|
||||
model_.InitTreesToUpdate();
|
||||
}
|
||||
|
||||
// configure predictor
|
||||
predictor = std::unique_ptr<Predictor>(Predictor::Create(tparam.predictor));
|
||||
predictor->Init(cfg, cache_);
|
||||
}
|
||||
|
||||
void Load(dmlc::Stream* fi) override {
|
||||
@@ -300,7 +305,8 @@ class GBTree : public GradientBooster {
|
||||
std::vector<std::pair<std::string, std::string> > cfg;
|
||||
// the updaters that can be applied to each of tree
|
||||
std::vector<std::unique_ptr<TreeUpdater>> updaters;
|
||||
|
||||
// Cached matrices
|
||||
std::vector<std::shared_ptr<DMatrix>> cache_;
|
||||
std::unique_ptr<Predictor> predictor;
|
||||
};
|
||||
|
||||
|
||||
@@ -165,9 +165,19 @@ class LearnerImpl : public Learner {
|
||||
<< "grow_fast_histmaker.";
|
||||
cfg_["updater"] = "grow_fast_histmaker";
|
||||
} else if (tparam.tree_method == 4) {
|
||||
cfg_["updater"] = "grow_gpu,prune";
|
||||
if (cfg_.count("updater") == 0) {
|
||||
cfg_["updater"] = "grow_gpu,prune";
|
||||
}
|
||||
if (cfg_.count("predictor") == 0) {
|
||||
cfg_["predictor"] = "gpu_predictor";
|
||||
}
|
||||
} else if (tparam.tree_method == 5) {
|
||||
cfg_["updater"] = "grow_gpu_hist";
|
||||
if (cfg_.count("updater") == 0) {
|
||||
cfg_["updater"] = "grow_gpu_hist";
|
||||
}
|
||||
if (cfg_.count("predictor") == 0) {
|
||||
cfg_["predictor"] = "gpu_predictor";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
namespace xgboost {
|
||||
namespace predictor {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(cpu_predictor);
|
||||
|
||||
class CPUPredictor : public Predictor {
|
||||
protected:
|
||||
static bst_float PredValue(const RowBatch::Inst& inst,
|
||||
@@ -28,19 +30,6 @@ class CPUPredictor : public Predictor {
|
||||
return psum;
|
||||
}
|
||||
|
||||
void InitOutPredictions(const MetaInfo& info,
|
||||
std::vector<bst_float>* out_preds,
|
||||
const gbm::GBTreeModel& model) const {
|
||||
size_t n = model.param.num_output_group * info.num_row;
|
||||
const std::vector<bst_float>& base_margin = info.base_margin;
|
||||
out_preds->resize(n);
|
||||
if (base_margin.size() != 0) {
|
||||
CHECK_EQ(out_preds->size(), n);
|
||||
std::copy(base_margin.begin(), base_margin.end(), out_preds->begin());
|
||||
} else {
|
||||
std::fill(out_preds->begin(), out_preds->end(), model.base_margin);
|
||||
}
|
||||
}
|
||||
// init thread buffers
|
||||
inline void InitThreadTemp(int nthread, int num_feature) {
|
||||
int prev_thread_temp_size = thread_temp.size();
|
||||
@@ -106,33 +95,6 @@ class CPUPredictor : public Predictor {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \fn bool PredictFromCache(DMatrix* dmat, std::vector<bst_float>*
|
||||
* out_preds, const gbm::GBTreeModel& model, unsigned ntree_limit = 0)
|
||||
*
|
||||
* \brief Attempt to predict from cache.
|
||||
*
|
||||
* \return True if it succeeds, false if it fails.
|
||||
*/
|
||||
bool PredictFromCache(DMatrix* dmat, std::vector<bst_float>* out_preds,
|
||||
const gbm::GBTreeModel& model,
|
||||
unsigned ntree_limit = 0) {
|
||||
if (ntree_limit == 0 ||
|
||||
ntree_limit * model.param.num_output_group >= model.trees.size()) {
|
||||
auto it = cache_.find(dmat);
|
||||
if (it != cache_.end()) {
|
||||
std::vector<bst_float>& y = it->second.predictions;
|
||||
if (y.size() != 0) {
|
||||
out_preds->resize(y.size());
|
||||
std::copy(y.begin(), y.end(), out_preds->begin());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void PredLoopInternal(DMatrix* dmat, std::vector<bst_float>* out_preds,
|
||||
const gbm::GBTreeModel& model, int tree_begin,
|
||||
unsigned ntree_limit) {
|
||||
|
||||
@@ -8,13 +8,47 @@ namespace dmlc {
|
||||
DMLC_REGISTRY_ENABLE(::xgboost::PredictorReg);
|
||||
} // namespace dmlc
|
||||
namespace xgboost {
|
||||
void Predictor::InitCache(const std::vector<std::shared_ptr<DMatrix> >& cache) {
|
||||
void Predictor::Init(
|
||||
const std::vector<std::pair<std::string, std::string>>& cfg,
|
||||
const std::vector<std::shared_ptr<DMatrix>>& cache) {
|
||||
for (const std::shared_ptr<DMatrix>& d : cache) {
|
||||
PredictionCacheEntry e;
|
||||
e.data = d;
|
||||
cache_[d.get()] = std::move(e);
|
||||
}
|
||||
}
|
||||
bool Predictor::PredictFromCache(DMatrix* dmat,
|
||||
std::vector<bst_float>* out_preds,
|
||||
const gbm::GBTreeModel& model,
|
||||
unsigned ntree_limit) {
|
||||
if (ntree_limit == 0 ||
|
||||
ntree_limit * model.param.num_output_group >= model.trees.size()) {
|
||||
auto it = cache_.find(dmat);
|
||||
if (it != cache_.end()) {
|
||||
std::vector<bst_float>& y = it->second.predictions;
|
||||
if (y.size() != 0) {
|
||||
out_preds->resize(y.size());
|
||||
std::copy(y.begin(), y.end(), out_preds->begin());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
void Predictor::InitOutPredictions(const MetaInfo& info,
|
||||
std::vector<bst_float>* out_preds,
|
||||
const gbm::GBTreeModel& model) const {
|
||||
size_t n = model.param.num_output_group * info.num_row;
|
||||
const std::vector<bst_float>& base_margin = info.base_margin;
|
||||
out_preds->resize(n);
|
||||
if (base_margin.size() != 0) {
|
||||
CHECK_EQ(out_preds->size(), n);
|
||||
std::copy(base_margin.begin(), base_margin.end(), out_preds->begin());
|
||||
} else {
|
||||
std::fill(out_preds->begin(), out_preds->end(), model.base_margin);
|
||||
}
|
||||
}
|
||||
Predictor* Predictor::Create(std::string name) {
|
||||
auto* e = ::dmlc::Registry<PredictorReg>::Get()->Find(name);
|
||||
if (e == nullptr) {
|
||||
@@ -23,3 +57,13 @@ Predictor* Predictor::Create(std::string name) {
|
||||
return (e->body)();
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
namespace xgboost {
|
||||
namespace predictor {
|
||||
// List of files that will be force linked in static links.
|
||||
#ifdef XGBOOST_USE_CUDA
|
||||
DMLC_REGISTRY_LINK_TAG(gpu_predictor);
|
||||
#endif
|
||||
DMLC_REGISTRY_LINK_TAG(cpu_predictor);
|
||||
} // namespace predictor
|
||||
} // namespace xgboost
|
||||
|
||||
Reference in New Issue
Block a user