[breaking] Add prediction fucntion for DMatrix and use inplace predict for dask. (#6668)

* Add a new API function for predicting on `DMatrix`.  This function aligns
with rest of the `XGBoosterPredictFrom*` functions on semantic of function
arguments.
* Purge `ntree_limit` from libxgboost, use iteration instead.
* [dask] Use `inplace_predict` by default for dask sklearn models.
* [dask] Run prediction shape inference on worker instead of client.

The breaking change is in the Python sklearn `apply` function, I made it to be
consistent with other prediction functions where `best_iteration` is used by
default.
This commit is contained in:
Jiaming Yuan
2021-02-08 18:26:32 +08:00
committed by GitHub
parent dbb5208a0a
commit 4656b09d5d
29 changed files with 1134 additions and 604 deletions

View File

@@ -704,8 +704,9 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
const char *evnames[],
bst_ulong len,
const char **out_result);
/*!
* \brief make prediction based on dmat
* \brief make prediction based on dmat (deprecated, use `XGBoosterPredictFromDMatrix` instead)
* \param handle handle
* \param dmat data matrix
* \param option_mask bit-mask of options taken in prediction, possible values
@@ -734,6 +735,165 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
int training,
bst_ulong *out_len,
const float **out_result);
/*!
* \brief Make prediction from DMatrix, replacing `XGBoosterPredict`.
*
* \param handle Booster handle
* \param dmat DMatrix handle
* \param c_json_config String encoded predict configuration in JSON format.
*
* "type": [0, 5]
* 0: normal prediction
* 1: output margin
* 2: predict contribution
* 3: predict approxmated contribution
* 4: predict feature interaction
* 5: predict leaf
* "training": bool
* Whether the prediction function is used as part of a training loop. **Not used
* for inplace prediction**.
*
* Prediction can be run in 2 scenarios:
* 1. Given data matrix X, obtain prediction y_pred from the model.
* 2. Obtain the prediction for computing gradients. For example, DART booster performs dropout
* during training, and the prediction result will be different from the one obtained by normal
* inference step due to dropped trees.
* Set training=false for the first scenario. Set training=true for the second
* scenario. The second scenario applies when you are defining a custom objective
* function.
* "iteration_begin": int
* Beginning iteration of prediction.
* "iteration_end": int
* End iteration of prediction. Set to 0 this will become the size of tree model.
* "strict_shape": bool
* Whether should we reshape the output with stricter rules. If set to true,
* normal/margin/contrib/interaction predict will output consistent shape
* disregarding the use of multi-class model, and leaf prediction will output 4-dim
* array representing: (n_samples, n_iterations, n_classes, n_trees_in_forest)
*
* Run a normal prediction with strict output shape, 2 dim for softprob , 1 dim for others.
* \code
* {
* "type": 0,
* "training": False,
* "iteration_begin": 0,
* "iteration_end": 0,
* "strict_shape": true,
* }
* \endcode
*
* \param out_shape Shape of output prediction (copy before use).
* \param out_dim Dimension of output prediction.
* \param out_result Buffer storing prediction value (copy before use).
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle,
DMatrixHandle dmat,
char const* c_json_config,
bst_ulong const **out_shape,
bst_ulong *out_dim,
float const **out_result);
/*
* \brief Inplace prediction from CPU dense matrix.
*
* \param handle Booster handle.
* \param values JSON encoded __array_interface__ to values.
* \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
*
* Additional fields for inplace prediction are:
* "missing": float
*
* \param m An optional (NULL if not available) proxy DMatrix instance
* storing meta info.
*
* \param out_shape See `XGBoosterPredictFromDMatrix` for more info.
* \param out_dim See `XGBoosterPredictFromDMatrix` for more info.
* \param out_result See `XGBoosterPredictFromDMatrix` for more info.
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle,
char const *values,
char const *c_json_config,
DMatrixHandle m,
bst_ulong const **out_shape,
bst_ulong *out_dim,
const float **out_result);
/*
* \brief Inplace prediction from CPU CSR matrix.
*
* \param handle Booster handle.
* \param indptr JSON encoded __array_interface__ to row pointer in CSR.
* \param indices JSON encoded __array_interface__ to column indices in CSR.
* \param values JSON encoded __array_interface__ to values in CSR..
* \param ncol Number of features in data.
* \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
* Additional fields for inplace prediction are:
* "missing": float
*
* \param m An optional (NULL if not available) proxy DMatrix instance
* storing meta info.
*
* \param out_shape See `XGBoosterPredictFromDMatrix` for more info.
* \param out_dim See `XGBoosterPredictFromDMatrix` for more info.
* \param out_result See `XGBoosterPredictFromDMatrix` for more info.
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr,
char const *indices, char const *values,
bst_ulong ncol,
char const *c_json_config, DMatrixHandle m,
bst_ulong const **out_shape,
bst_ulong *out_dim,
const float **out_result);
/*
* \brief Inplace prediction from CUDA Dense matrix (cupy in Python).
*
* \param handle Booster handle
* \param values JSON encoded __cuda_array_interface__ to values.
* \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
* Additional fields for inplace prediction are:
* "missing": float
*
* \param m An optional (NULL if not available) proxy DMatrix instance
* storing meta info.
* \param out_shape See `XGBoosterPredictFromDMatrix` for more info.
* \param out_dim See `XGBoosterPredictFromDMatrix` for more info.
* \param out_result See `XGBoosterPredictFromDMatrix` for more info.
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterPredictFromCudaArray(
BoosterHandle handle, char const *values, char const *c_json_config,
DMatrixHandle m, bst_ulong const **out_shape, bst_ulong *out_dim,
const float **out_result);
/*
* \brief Inplace prediction from CUDA dense dataframe (cuDF in Python).
*
* \param handle Booster handle
* \param values List of __cuda_array_interface__ for all columns encoded in JSON list.
* \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
* Additional fields for inplace prediction are:
* "missing": float
*
* \param m An optional (NULL if not available) proxy DMatrix instance
* storing meta info.
* \param out_shape See `XGBoosterPredictFromDMatrix` for more info.
* \param out_dim See `XGBoosterPredictFromDMatrix` for more info.
* \param out_result See `XGBoosterPredictFromDMatrix` for more info.
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterPredictFromCudaColumnar(
BoosterHandle handle, char const *values, char const *c_json_config,
DMatrixHandle m, bst_ulong const **out_shape, bst_ulong *out_dim,
const float **out_result);
/*
* ========================== Begin Serialization APIs =========================

View File

@@ -63,7 +63,7 @@ class GradientBooster : public Model, public Configurable {
/*!
* \brief Slice a model using boosting index. The slice m:n indicates taking all trees
* that were fit during the boosting rounds m, (m+1), (m+2), ..., (n-1).
* \param layer_begin Begining of boosted tree layer used for prediction.
* \param layer_begin Beginning of boosted tree layer used for prediction.
* \param layer_end End of booster layer. 0 means do not limit trees.
* \param out Output gradient booster
*/
@@ -99,15 +99,14 @@ class GradientBooster : public Model, public Configurable {
* \param out_preds output vector to hold the predictions
* \param training Whether the prediction value is used for training. For dart booster
* drop out is performed during training.
* \param ntree_limit limit the number of trees used in prediction,
* when it equals 0, this means we do not limit
* number of trees, this parameter is only valid
* for gbtree, but not for gblinear
* \param layer_begin Beginning of boosted tree layer used for prediction.
* \param layer_end End of booster layer. 0 means do not limit trees.
*/
virtual void PredictBatch(DMatrix* dmat,
PredictionCacheEntry* out_preds,
bool training,
unsigned ntree_limit = 0) = 0;
unsigned layer_begin,
unsigned layer_end) = 0;
/*!
* \brief Inplace prediction.
@@ -115,7 +114,7 @@ class GradientBooster : public Model, public Configurable {
* \param x A type erased data adapter.
* \param missing Missing value in the data.
* \param [in,out] out_preds The output preds.
* \param layer_begin (Optional) Begining of boosted tree layer used for prediction.
* \param layer_begin (Optional) Beginning of boosted tree layer used for prediction.
* \param layer_end (Optional) End of booster layer. 0 means do not limit trees.
*/
virtual void InplacePredict(dmlc::any const &, std::shared_ptr<DMatrix>, float,
@@ -132,44 +131,45 @@ class GradientBooster : public Model, public Configurable {
*
* \param inst the instance you want to predict
* \param out_preds output vector to hold the predictions
* \param ntree_limit limit the number of trees used in prediction
* \param layer_begin Beginning of boosted tree layer used for prediction.
* \param layer_end End of booster layer. 0 means do not limit trees.
* \sa Predict
*/
virtual void PredictInstance(const SparsePage::Inst& inst,
std::vector<bst_float>* out_preds,
unsigned ntree_limit = 0) = 0;
unsigned layer_begin, unsigned layer_end) = 0;
/*!
* \brief predict the leaf index of each tree, the output will be nsample * ntree vector
* this is only valid in gbtree predictor
* \param dmat feature matrix
* \param out_preds output vector to hold the predictions
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
* we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
* \param layer_begin Beginning of boosted tree layer used for prediction.
* \param layer_end End of booster layer. 0 means do not limit trees.
*/
virtual void PredictLeaf(DMatrix* dmat,
HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit = 0) = 0;
virtual void PredictLeaf(DMatrix *dmat,
HostDeviceVector<bst_float> *out_preds,
unsigned layer_begin, unsigned layer_end) = 0;
/*!
* \brief feature contributions to individual predictions; the output will be a vector
* of length (nfeats + 1) * num_output_group * nsample, arranged in that order
* \param dmat feature matrix
* \param out_contribs output vector to hold the contributions
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
* we do not limit number of trees
* \param layer_begin Beginning of boosted tree layer used for prediction.
* \param layer_end End of booster layer. 0 means do not limit trees.
* \param approximate use a faster (inconsistent) approximation of SHAP values
* \param condition condition on the condition_feature (0=no, -1=cond off, 1=cond on).
* \param condition_feature feature to condition on (i.e. fix) during calculations
*/
virtual void PredictContribution(DMatrix* dmat,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit = 0,
unsigned layer_begin, unsigned layer_end,
bool approximate = false, int condition = 0,
unsigned condition_feature = 0) = 0;
virtual void PredictInteractionContributions(DMatrix* dmat,
HostDeviceVector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) = 0;
virtual void PredictInteractionContributions(
DMatrix *dmat, HostDeviceVector<bst_float> *out_contribs,
unsigned layer_begin, unsigned layer_end, bool approximate) = 0;
/*!
* \brief dump the model in the requested format

View File

@@ -113,8 +113,8 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
* \param data input data
* \param output_margin whether to only predict margin value instead of transformed prediction
* \param out_preds output vector that stores the prediction
* \param ntree_limit limit number of trees used for boosted tree
* predictor, when it equals 0, this means we are using all the trees
* \param layer_begin Beginning of boosted tree layer used for prediction.
* \param layer_end End of booster layer. 0 means do not limit trees.
* \param training Whether the prediction result is used for training
* \param pred_leaf whether to only predict the leaf index of each tree in a boosted tree predictor
* \param pred_contribs whether to only predict the feature contributions
@@ -124,7 +124,8 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
virtual void Predict(std::shared_ptr<DMatrix> data,
bool output_margin,
HostDeviceVector<bst_float> *out_preds,
unsigned ntree_limit = 0,
unsigned layer_begin,
unsigned layer_end,
bool training = false,
bool pred_leaf = false,
bool pred_contribs = false,
@@ -140,7 +141,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
* \param type Prediction type.
* \param missing Missing value in the data.
* \param [in,out] out_preds Pointer to output prediction vector.
* \param layer_begin Begining of boosted tree layer used for prediction.
* \param layer_begin Beginning of boosted tree layer used for prediction.
* \param layer_end End of booster layer. 0 means do not limit trees.
*/
virtual void InplacePredict(dmlc::any const &x,

View File

@@ -127,12 +127,11 @@ class Predictor {
* \param [in,out] out_preds The output preds.
* \param model The model to predict from.
* \param tree_begin The tree begin index.
* \param ntree_limit (Optional) The ntree limit. 0 means do not
* limit trees.
* \param tree_end The tree end index.
*/
virtual void PredictBatch(DMatrix* dmat, PredictionCacheEntry* out_preds,
const gbm::GBTreeModel& model, int tree_begin,
uint32_t const ntree_limit = 0) const = 0;
const gbm::GBTreeModel& model, uint32_t tree_begin,
uint32_t tree_end = 0) const = 0;
/**
* \brief Inplace prediction.
@@ -140,7 +139,7 @@ class Predictor {
* \param model The model to predict from.
* \param missing Missing value in the data.
* \param [in,out] out_preds The output preds.
* \param tree_begin (Optional) Begining of boosted trees used for prediction.
* \param tree_begin (Optional) Beginning of boosted trees used for prediction.
* \param tree_end (Optional) End of booster trees. 0 means do not limit trees.
*
* \return True if the data can be handled by current predictor, false otherwise.
@@ -159,13 +158,13 @@ class Predictor {
* \param inst The instance to predict.
* \param [in,out] out_preds The output preds.
* \param model The model to predict from
* \param ntree_limit (Optional) The ntree limit.
* \param tree_end (Optional) The tree end index.
*/
virtual void PredictInstance(const SparsePage::Inst& inst,
std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model,
unsigned ntree_limit = 0) const = 0;
unsigned tree_end = 0) const = 0;
/**
* \brief predict the leaf index of each tree, the output will be nsample *
@@ -174,18 +173,14 @@ class Predictor {
* \param [in,out] dmat The input feature matrix.
* \param [in,out] out_preds The output preds.
* \param model Model to make predictions from.
* \param ntree_limit (Optional) The ntree limit.
* \param tree_end (Optional) The tree end index.
*/
virtual void PredictLeaf(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model,
unsigned ntree_limit = 0) const = 0;
unsigned tree_end = 0) const = 0;
/**
* \fn virtual void Predictor::PredictContribution( DMatrix* dmat,
* std::vector<bst_float>* out_contribs, const gbm::GBTreeModel& model,
* unsigned ntree_limit = 0) = 0;
*
* \brief feature contributions to individual predictions; the output will be
* a vector of length (nfeats + 1) * num_output_group * nsample, arranged in
* that order.
@@ -193,7 +188,7 @@ class Predictor {
* \param [in,out] dmat The input feature matrix.
* \param [in,out] out_contribs The output feature contribs.
* \param model Model to make predictions from.
* \param ntree_limit (Optional) The ntree limit.
* \param tree_end The tree end index.
* \param tree_weights (Optional) Weights to multiply each tree by.
* \param approximate Use fast approximate algorithm.
* \param condition Condition on the condition_feature (0=no, -1=cond off, 1=cond on).
@@ -203,7 +198,7 @@ class Predictor {
virtual void PredictContribution(DMatrix* dmat,
HostDeviceVector<bst_float>* out_contribs,
const gbm::GBTreeModel& model,
unsigned ntree_limit = 0,
unsigned tree_end = 0,
std::vector<bst_float>* tree_weights = nullptr,
bool approximate = false,
int condition = 0,
@@ -212,7 +207,7 @@ class Predictor {
virtual void PredictInteractionContributions(DMatrix* dmat,
HostDeviceVector<bst_float>* out_contribs,
const gbm::GBTreeModel& model,
unsigned ntree_limit = 0,
unsigned tree_end = 0,
std::vector<bst_float>* tree_weights = nullptr,
bool approximate = false) const = 0;