[breaking] Add prediction fucntion for DMatrix and use inplace predict for dask. (#6668)

* Add a new API function for predicting on `DMatrix`.  This function aligns
with rest of the `XGBoosterPredictFrom*` functions on semantic of function
arguments.
* Purge `ntree_limit` from libxgboost, use iteration instead.
* [dask] Use `inplace_predict` by default for dask sklearn models.
* [dask] Run prediction shape inference on worker instead of client.

The breaking change is in the Python sklearn `apply` function, I made it to be
consistent with other prediction functions where `best_iteration` is used by
default.
This commit is contained in:
Jiaming Yuan
2021-02-08 18:26:32 +08:00
committed by GitHub
parent dbb5208a0a
commit 4656b09d5d
29 changed files with 1134 additions and 604 deletions

View File

@@ -619,20 +619,58 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
CHECK_HANDLE();
auto *learner = static_cast<Learner*>(handle);
auto& entry = learner->GetThreadLocal().prediction_entry;
learner->Predict(
*static_cast<std::shared_ptr<DMatrix>*>(dmat),
(option_mask & 1) != 0,
&entry.predictions, ntree_limit,
static_cast<bool>(training),
(option_mask & 2) != 0,
(option_mask & 4) != 0,
(option_mask & 8) != 0,
(option_mask & 16) != 0);
auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner);
learner->Predict(*static_cast<std::shared_ptr<DMatrix> *>(dmat),
(option_mask & 1) != 0, &entry.predictions, 0, iteration_end,
static_cast<bool>(training), (option_mask & 2) != 0,
(option_mask & 4) != 0, (option_mask & 8) != 0,
(option_mask & 16) != 0);
*out_result = dmlc::BeginPtr(entry.predictions.ConstHostVector());
*len = static_cast<xgboost::bst_ulong>(entry.predictions.Size());
API_END();
}
XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle,
DMatrixHandle dmat,
char const* c_json_config,
xgboost::bst_ulong const **out_shape,
xgboost::bst_ulong *out_dim,
bst_float const **out_result) {
API_BEGIN();
if (handle == nullptr) {
LOG(FATAL) << "Booster has not been intialized or has already been disposed.";
}
if (dmat == nullptr) {
LOG(FATAL) << "DMatrix has not been intialized or has already been disposed.";
}
auto config = Json::Load(StringView{c_json_config});
auto *learner = static_cast<Learner*>(handle);
auto& entry = learner->GetThreadLocal().prediction_entry;
auto p_m = *static_cast<std::shared_ptr<DMatrix> *>(dmat);
auto type = PredictionType(get<Integer const>(config["type"]));
auto iteration_begin = get<Integer const>(config["iteration_begin"]);
auto iteration_end = get<Integer const>(config["iteration_end"]);
learner->Predict(
*static_cast<std::shared_ptr<DMatrix> *>(dmat),
type == PredictionType::kMargin, &entry.predictions, iteration_begin,
iteration_end, get<Boolean const>(config["training"]),
type == PredictionType::kLeaf, type == PredictionType::kContribution,
type == PredictionType::kApproxContribution,
type == PredictionType::kInteraction);
*out_result = dmlc::BeginPtr(entry.predictions.ConstHostVector());
auto &shape = learner->GetThreadLocal().prediction_shape;
auto chunksize = p_m->Info().num_row_ == 0 ? 0 : entry.predictions.Size() / p_m->Info().num_row_;
auto rounds = iteration_end - iteration_begin;
rounds = rounds == 0 ? learner->BoostedRounds() : rounds;
// Determine shape
bool strict_shape = get<Boolean const>(config["strict_shape"]);
CalcPredictShape(strict_shape, type, p_m->Info().num_row_,
p_m->Info().num_col_, chunksize, learner->Groups(), rounds,
&shape, out_dim);
*out_shape = dmlc::BeginPtr(shape);
API_END();
}
template <typename T>
void InplacePredictImpl(std::shared_ptr<T> x, std::shared_ptr<DMatrix> p_m,
@@ -705,7 +743,7 @@ XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr,
}
#if !defined(XGBOOST_USE_CUDA)
XGB_DLL int XGBoosterPredictFromArrayInterface(
XGB_DLL int XGBoosterPredictFromCUDAArray(
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
const float **out_result) {
@@ -715,7 +753,7 @@ XGB_DLL int XGBoosterPredictFromArrayInterface(
API_END();
}
XGB_DLL int XGBoosterPredictFromArrayInterfaceColumns(
XGB_DLL int XGBoosterPredictFromCUDAColumnar(
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
const float **out_result) {