Simplify inplace-predict. (#7910)

Pass the `X` as part of Proxy DMatrix instead of an independent `dmlc::any`.
This commit is contained in:
Jiaming Yuan
2022-05-18 17:52:00 +08:00
committed by GitHub
parent 19775ffe15
commit 765097d514
17 changed files with 317 additions and 297 deletions

View File

@@ -795,88 +795,75 @@ class Dart : public GBTree {
this->PredictBatchImpl(p_fmat, p_out_preds, training, layer_begin, layer_end);
}
void InplacePredict(dmlc::any const &x, std::shared_ptr<DMatrix> p_m,
float missing, PredictionCacheEntry *out_preds,
uint32_t layer_begin, unsigned layer_end) const override {
void InplacePredict(std::shared_ptr<DMatrix> p_fmat, float missing,
PredictionCacheEntry* p_out_preds, uint32_t layer_begin,
unsigned layer_end) const override {
uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
std::vector<Predictor const *> predictors{
auto n_groups = model_.learner_model_param->num_output_group;
std::vector<Predictor const*> predictors {
cpu_predictor_.get(),
#if defined(XGBOOST_USE_CUDA)
gpu_predictor_.get()
#endif // defined(XGBOOST_USE_CUDA)
};
Predictor const * predictor {nullptr};
MetaInfo info;
Predictor const* predictor{nullptr};
StringView msg{"Unsupported data type for inplace predict."};
int32_t device = GenericParameter::kCpuId;
PredictionCacheEntry predts;
// Inplace predict is not used for training, so no need to drop tree.
for (size_t i = tree_begin; i < tree_end; ++i) {
if (ctx_->gpu_id != Context::kCpuId) {
predts.predictions.SetDevice(ctx_->gpu_id);
}
predts.predictions.Resize(p_fmat->Info().num_row_ * n_groups, 0);
auto predict_impl = [&](size_t i) {
predts.predictions.Fill(0);
if (tparam_.predictor == PredictorType::kAuto) {
// Try both predictor implementations
bool success = false;
for (auto const &p : predictors) {
if (p && p->InplacePredict(x, nullptr, model_, missing, &predts, i,
i + 1)) {
for (auto const& p : predictors) {
if (p && p->InplacePredict(p_fmat, model_, missing, &predts, i, i + 1)) {
success = true;
predictor = p;
#if defined(XGBOOST_USE_CUDA)
device = predts.predictions.DeviceIdx();
#endif // defined(XGBOOST_USE_CUDA)
break;
}
}
CHECK(success) << msg;
} else {
// No base margin from meta info for each tree
predictor = this->GetPredictor().get();
bool success = predictor->InplacePredict(x, nullptr, model_, missing,
&predts, i, i + 1);
device = predts.predictions.DeviceIdx();
bool success = predictor->InplacePredict(p_fmat, model_, missing, &predts, i, i + 1);
CHECK(success) << msg << std::endl
<< "Current Predictor: "
<< (tparam_.predictor == PredictorType::kCPUPredictor
? "cpu_predictor"
: "gpu_predictor");
<< (tparam_.predictor == PredictorType::kCPUPredictor ? "cpu_predictor"
: "gpu_predictor");
}
};
auto w = this->weight_drop_.at(i);
size_t n_groups = model_.learner_model_param->num_output_group;
auto n_rows = predts.predictions.Size() / n_groups;
// Inplace predict is not used for training, so no need to drop tree.
for (size_t i = tree_begin; i < tree_end; ++i) {
predict_impl(i);
if (i == tree_begin) {
// base margin is added here.
if (p_m) {
p_m->Info().num_row_ = n_rows;
predictor->InitOutPredictions(p_m->Info(), &out_preds->predictions,
model_);
} else {
info.num_row_ = n_rows;
predictor->InitOutPredictions(info, &out_preds->predictions, model_);
}
predictor->InitOutPredictions(p_fmat->Info(), &p_out_preds->predictions, model_);
}
// Multiple the tree weight
CHECK_EQ(predts.predictions.Size(), out_preds->predictions.Size());
auto w = this->weight_drop_.at(i);
auto group = model_.tree_info.at(i);
CHECK_EQ(predts.predictions.Size(), p_out_preds->predictions.Size());
if (device == GenericParameter::kCpuId) {
auto &h_predts = predts.predictions.HostVector();
auto &h_out_predts = out_preds->predictions.HostVector();
size_t n_rows = p_fmat->Info().num_row_;
if (predts.predictions.DeviceIdx() != Context::kCpuId) {
p_out_preds->predictions.SetDevice(predts.predictions.DeviceIdx());
GPUDartInplacePredictInc(p_out_preds->predictions.DeviceSpan(),
predts.predictions.DeviceSpan(), w, n_rows,
model_.learner_model_param->base_score, n_groups, group);
} else {
auto& h_predts = predts.predictions.HostVector();
auto& h_out_predts = p_out_preds->predictions.HostVector();
common::ParallelFor(n_rows, ctx_->Threads(), [&](auto ridx) {
const size_t offset = ridx * n_groups + group;
// Need to remove the base margin from individual tree.
h_out_predts[offset] += (h_predts[offset] - model_.learner_model_param->base_score) * w;
});
} else {
out_preds->predictions.SetDevice(device);
predts.predictions.SetDevice(device);
GPUDartInplacePredictInc(out_preds->predictions.DeviceSpan(),
predts.predictions.DeviceSpan(), w, n_rows,
model_.learner_model_param->base_score,
n_groups, group);
}
}
}

View File

@@ -261,8 +261,7 @@ class GBTree : public GradientBooster {
void PredictBatch(DMatrix *p_fmat, PredictionCacheEntry *out_preds,
bool training, unsigned layer_begin, unsigned layer_end) override;
void InplacePredict(dmlc::any const &x, std::shared_ptr<DMatrix> p_m,
float missing, PredictionCacheEntry *out_preds,
void InplacePredict(std::shared_ptr<DMatrix> p_m, float missing, PredictionCacheEntry* out_preds,
uint32_t layer_begin, unsigned layer_end) const override {
CHECK(configured_);
uint32_t tree_begin, tree_end;
@@ -278,15 +277,14 @@ class GBTree : public GradientBooster {
if (tparam_.predictor == PredictorType::kAuto) {
// Try both predictor implementations
for (auto const &p : predictors) {
if (p && p->InplacePredict(x, p_m, model_, missing, out_preds,
tree_begin, tree_end)) {
if (p && p->InplacePredict(p_m, model_, missing, out_preds, tree_begin, tree_end)) {
return;
}
}
LOG(FATAL) << msg;
} else {
bool success = this->GetPredictor()->InplacePredict(
x, p_m, model_, missing, out_preds, tree_begin, tree_end);
bool success = this->GetPredictor()->InplacePredict(p_m, model_, missing, out_preds,
tree_begin, tree_end);
CHECK(success) << msg << std::endl
<< "Current Predictor: "
<< (tparam_.predictor == PredictorType::kCPUPredictor