[breaking] Add prediction fucntion for DMatrix and use inplace predict for dask. (#6668)

* Add a new API function for predicting on `DMatrix`. This function aligns with rest of the `XGBoosterPredictFrom*` functions on semantic of function arguments. * Purge `ntree_limit` from libxgboost, use iteration instead. * [dask] Use `inplace_predict` by default for dask sklearn models. * [dask] Run prediction shape inference on worker instead of client. The breaking change is in the Python sklearn `apply` function, I made it to be consistent with other prediction functions where `best_iteration` is used by default.
2021-02-08 18:26:32 +08:00
parent dbb5208a0a
commit 4656b09d5d
29 changed files with 1134 additions and 604 deletions
--- a/include/xgboost/c_api.h
+++ b/include/xgboost/c_api.h
@@ -704,8 +704,9 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
                                 const char *evnames[],
                                 bst_ulong len,
                                 const char **out_result);
+
 /*!
- * \brief make prediction based on dmat
+ * \brief make prediction based on dmat (deprecated, use `XGBoosterPredictFromDMatrix` instead)
 * \param handle handle
 * \param dmat data matrix
 * \param option_mask bit-mask of options taken in prediction, possible values
@@ -734,6 +735,165 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
                             int training,
                             bst_ulong *out_len,
                             const float **out_result);
+/*!
+ * \brief Make prediction from DMatrix, replacing `XGBoosterPredict`.
+ *
+ * \param handle Booster handle
+ * \param dmat   DMatrix handle
+ * \param c_json_config String encoded predict configuration in JSON format.
+ *
+ *    "type": [0, 5]
+ *      0: normal prediction
+ *      1: output margin
+ *      2: predict contribution
+ *      3: predict approxmated contribution
+ *      4: predict feature interaction
+ *      5: predict leaf
+ *    "training": bool
+ *      Whether the prediction function is used as part of a training loop.  **Not used
+ *      for inplace prediction**.
+ *
+ *      Prediction can be run in 2 scenarios:
+ *        1. Given data matrix X, obtain prediction y_pred from the model.
+ *        2. Obtain the prediction for computing gradients. For example, DART booster performs dropout
+ *           during training, and the prediction result will be different from the one obtained by normal
+ *           inference step due to dropped trees.
+ *      Set training=false for the first scenario. Set training=true for the second
+ *      scenario.  The second scenario applies when you are defining a custom objective
+ *      function.
+ *    "iteration_begin": int
+ *      Beginning iteration of prediction.
+ *    "iteration_end": int
+ *      End iteration of prediction.  Set to 0 this will become the size of tree model.
+ *    "strict_shape": bool
+ *      Whether should we reshape the output with stricter rules.  If set to true,
+ *      normal/margin/contrib/interaction predict will output consistent shape
+ *      disregarding the use of multi-class model, and leaf prediction will output 4-dim
+ *      array representing: (n_samples, n_iterations, n_classes, n_trees_in_forest)
+ *
+ *   Run a normal prediction with strict output shape, 2 dim for softprob , 1 dim for others.
+ *   \code
+ *      {
+ *         "type": 0,
+ *         "training": False,
+ *         "iteration_begin": 0,
+ *         "iteration_end": 0,
+ *         "strict_shape": true,
+ *     }
+ *   \endcode
+ *
+ * \param out_shape Shape of output prediction (copy before use).
+ * \param out_dim   Dimension of output prediction.
+ * \param out_result Buffer storing prediction value (copy before use).
+ *
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle,
+                                        DMatrixHandle dmat,
+                                        char const* c_json_config,
+                                        bst_ulong const **out_shape,
+                                        bst_ulong *out_dim,
+                                        float const **out_result);
+/*
+ * \brief Inplace prediction from CPU dense matrix.
+ *
+ * \param handle        Booster handle.
+ * \param values        JSON encoded __array_interface__ to values.
+ * \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
+ *
+ *   Additional fields for inplace prediction are:
+ *     "missing": float
+ *
+ * \param m             An optional (NULL if not available) proxy DMatrix instance
+ *                      storing meta info.
+ *
+ * \param out_shape     See `XGBoosterPredictFromDMatrix` for more info.
+ * \param out_dim       See `XGBoosterPredictFromDMatrix` for more info.
+ * \param out_result    See `XGBoosterPredictFromDMatrix` for more info.
+ *
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle,
+                                      char const *values,
+                                      char const *c_json_config,
+                                      DMatrixHandle m,
+                                      bst_ulong const **out_shape,
+                                      bst_ulong *out_dim,
+                                      const float **out_result);
+
+/*
+ * \brief Inplace prediction from CPU CSR matrix.
+ *
+ * \param handle        Booster handle.
+ * \param indptr        JSON encoded __array_interface__ to row pointer in CSR.
+ * \param indices       JSON encoded __array_interface__ to column indices in CSR.
+ * \param values        JSON encoded __array_interface__ to values in CSR..
+ * \param ncol          Number of features in data.
+ * \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
+ *   Additional fields for inplace prediction are:
+ *     "missing": float
+ *
+ * \param m             An optional (NULL if not available) proxy DMatrix instance
+ *                      storing meta info.
+ *
+ * \param out_shape     See `XGBoosterPredictFromDMatrix` for more info.
+ * \param out_dim       See `XGBoosterPredictFromDMatrix` for more info.
+ * \param out_result    See `XGBoosterPredictFromDMatrix` for more info.
+ *
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr,
+                                    char const *indices, char const *values,
+                                    bst_ulong ncol,
+                                    char const *c_json_config, DMatrixHandle m,
+                                    bst_ulong const **out_shape,
+                                    bst_ulong *out_dim,
+                                    const float **out_result);
+
+/*
+ * \brief Inplace prediction from CUDA Dense matrix (cupy in Python).
+ *
+ * \param handle        Booster handle
+ * \param values        JSON encoded __cuda_array_interface__ to values.
+ * \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
+ *   Additional fields for inplace prediction are:
+ *     "missing": float
+ *
+ * \param m             An optional (NULL if not available) proxy DMatrix instance
+ *                      storing meta info.
+ * \param out_shape     See `XGBoosterPredictFromDMatrix` for more info.
+ * \param out_dim       See `XGBoosterPredictFromDMatrix` for more info.
+ * \param out_result    See `XGBoosterPredictFromDMatrix` for more info.
+ *
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterPredictFromCudaArray(
+    BoosterHandle handle, char const *values, char const *c_json_config,
+    DMatrixHandle m, bst_ulong const **out_shape, bst_ulong *out_dim,
+    const float **out_result);
+
+/*
+ * \brief Inplace prediction from CUDA dense dataframe (cuDF in Python).
+ *
+ * \param handle        Booster handle
+ * \param values        List of __cuda_array_interface__ for all columns encoded in JSON list.
+ * \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
+ *   Additional fields for inplace prediction are:
+ *     "missing": float
+ *
+ * \param m             An optional (NULL if not available) proxy DMatrix instance
+ *                      storing meta info.
+ * \param out_shape     See `XGBoosterPredictFromDMatrix` for more info.
+ * \param out_dim       See `XGBoosterPredictFromDMatrix` for more info.
+ * \param out_result    See `XGBoosterPredictFromDMatrix` for more info.
+ *
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterPredictFromCudaColumnar(
+    BoosterHandle handle, char const *values, char const *c_json_config,
+    DMatrixHandle m, bst_ulong const **out_shape, bst_ulong *out_dim,
+    const float **out_result);
+

 /*
 * ========================== Begin Serialization APIs =========================
--- a/include/xgboost/gbm.h
+++ b/include/xgboost/gbm.h
@@ -63,7 +63,7 @@ class GradientBooster : public Model, public Configurable {
  /*!
   * \brief Slice a model using boosting index. The slice m:n indicates taking all trees
   *        that were fit during the boosting rounds m, (m+1), (m+2), ..., (n-1).
-   * \param layer_begin Begining of boosted tree layer used for prediction.
+   * \param layer_begin Beginning of boosted tree layer used for prediction.
   * \param layer_end   End of booster layer. 0 means do not limit trees.
   * \param out         Output gradient booster
   */
@@ -99,15 +99,14 @@ class GradientBooster : public Model, public Configurable {
   * \param out_preds output vector to hold the predictions
   * \param training Whether the prediction value is used for training.  For dart booster
   *                 drop out is performed during training.
-   * \param ntree_limit limit the number of trees used in prediction,
-   *                    when it equals 0, this means we do not limit
-   *                    number of trees, this parameter is only valid
-   *                    for gbtree, but not for gblinear
+   * \param layer_begin Beginning of boosted tree layer used for prediction.
+   * \param layer_end   End of booster layer. 0 means do not limit trees.
   */
  virtual void PredictBatch(DMatrix* dmat,
                            PredictionCacheEntry* out_preds,
                            bool training,
-                            unsigned ntree_limit = 0) = 0;
+                            unsigned layer_begin,
+                            unsigned layer_end) = 0;

  /*!
   * \brief Inplace prediction.
@@ -115,7 +114,7 @@ class GradientBooster : public Model, public Configurable {
   * \param           x                      A type erased data adapter.
   * \param           missing                Missing value in the data.
   * \param [in,out]  out_preds              The output preds.
-   * \param           layer_begin (Optional) Begining of boosted tree layer used for prediction.
+   * \param           layer_begin (Optional) Beginning of boosted tree layer used for prediction.
   * \param           layer_end   (Optional) End of booster layer. 0 means do not limit trees.
   */
  virtual void InplacePredict(dmlc::any const &, std::shared_ptr<DMatrix>, float,
@@ -132,44 +131,45 @@ class GradientBooster : public Model, public Configurable {
   *
   * \param inst the instance you want to predict
   * \param out_preds output vector to hold the predictions
-   * \param ntree_limit limit the number of trees used in prediction
+   * \param layer_begin Beginning of boosted tree layer used for prediction.
+   * \param layer_end   End of booster layer. 0 means do not limit trees.
   * \sa Predict
   */
  virtual void PredictInstance(const SparsePage::Inst& inst,
                               std::vector<bst_float>* out_preds,
-                               unsigned ntree_limit = 0) = 0;
+                               unsigned layer_begin, unsigned layer_end) = 0;
  /*!
   * \brief predict the leaf index of each tree, the output will be nsample * ntree vector
   *        this is only valid in gbtree predictor
   * \param dmat feature matrix
   * \param out_preds output vector to hold the predictions
-   * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
-   *    we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
+   * \param layer_begin Beginning of boosted tree layer used for prediction.
+   * \param layer_end   End of booster layer. 0 means do not limit trees.
   */
-  virtual void PredictLeaf(DMatrix* dmat,
-                           HostDeviceVector<bst_float>* out_preds,
-                           unsigned ntree_limit = 0) = 0;
+  virtual void PredictLeaf(DMatrix *dmat,
+                           HostDeviceVector<bst_float> *out_preds,
+                           unsigned layer_begin, unsigned layer_end) = 0;

  /*!
   * \brief feature contributions to individual predictions; the output will be a vector
   *         of length (nfeats + 1) * num_output_group * nsample, arranged in that order
   * \param dmat feature matrix
   * \param out_contribs output vector to hold the contributions
-   * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
-   *    we do not limit number of trees
+   * \param layer_begin Beginning of boosted tree layer used for prediction.
+   * \param layer_end   End of booster layer. 0 means do not limit trees.
   * \param approximate use a faster (inconsistent) approximation of SHAP values
   * \param condition condition on the condition_feature (0=no, -1=cond off, 1=cond on).
   * \param condition_feature feature to condition on (i.e. fix) during calculations
   */
  virtual void PredictContribution(DMatrix* dmat,
                                   HostDeviceVector<bst_float>* out_contribs,
-                                   unsigned ntree_limit = 0,
+                                   unsigned layer_begin, unsigned layer_end,
                                   bool approximate = false, int condition = 0,
                                   unsigned condition_feature = 0) = 0;

-  virtual void PredictInteractionContributions(DMatrix* dmat,
-                                               HostDeviceVector<bst_float>* out_contribs,
-                                               unsigned ntree_limit, bool approximate) = 0;
+  virtual void PredictInteractionContributions(
+      DMatrix *dmat, HostDeviceVector<bst_float> *out_contribs,
+      unsigned layer_begin, unsigned layer_end, bool approximate) = 0;

  /*!
   * \brief dump the model in the requested format
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -113,8 +113,8 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   * \param data input data
   * \param output_margin whether to only predict margin value instead of transformed prediction
   * \param out_preds output vector that stores the prediction
-   * \param ntree_limit limit number of trees used for boosted tree
-   *   predictor, when it equals 0, this means we are using all the trees
+   * \param layer_begin Beginning of boosted tree layer used for prediction.
+   * \param layer_end   End of booster layer. 0 means do not limit trees.
   * \param training Whether the prediction result is used for training
   * \param pred_leaf whether to only predict the leaf index of each tree in a boosted tree predictor
   * \param pred_contribs whether to only predict the feature contributions
@@ -124,7 +124,8 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
  virtual void Predict(std::shared_ptr<DMatrix> data,
                       bool output_margin,
                       HostDeviceVector<bst_float> *out_preds,
-                       unsigned ntree_limit = 0,
+                       unsigned layer_begin,
+                       unsigned layer_end,
                       bool training = false,
                       bool pred_leaf = false,
                       bool pred_contribs = false,
@@ -140,7 +141,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   * \param          type        Prediction type.
   * \param          missing     Missing value in the data.
   * \param [in,out] out_preds   Pointer to output prediction vector.
-   * \param          layer_begin Begining of boosted tree layer used for prediction.
+   * \param          layer_begin Beginning of boosted tree layer used for prediction.
   * \param          layer_end   End of booster layer. 0 means do not limit trees.
   */
  virtual void InplacePredict(dmlc::any const &x,
--- a/include/xgboost/predictor.h
+++ b/include/xgboost/predictor.h
@@ -127,12 +127,11 @@ class Predictor {
   * \param [in,out]  out_preds   The output preds.
   * \param           model       The model to predict from.
   * \param           tree_begin  The tree begin index.
-   * \param           ntree_limit (Optional) The ntree limit. 0 means do not
-   *                              limit trees.
+   * \param           tree_end    The tree end index.
   */
  virtual void PredictBatch(DMatrix* dmat, PredictionCacheEntry* out_preds,
-                            const gbm::GBTreeModel& model, int tree_begin,
-                            uint32_t const ntree_limit = 0) const = 0;
+                            const gbm::GBTreeModel& model, uint32_t tree_begin,
+                            uint32_t tree_end = 0) const = 0;

  /**
   * \brief Inplace prediction.
@@ -140,7 +139,7 @@ class Predictor {
   * \param           model                  The model to predict from.
   * \param           missing                Missing value in the data.
   * \param [in,out]  out_preds              The output preds.
-   * \param           tree_begin (Optional) Begining of boosted trees used for prediction.
+   * \param           tree_begin (Optional) Beginning of boosted trees used for prediction.
   * \param           tree_end   (Optional) End of booster trees. 0 means do not limit trees.
   *
   * \return True if the data can be handled by current predictor, false otherwise.
@@ -159,13 +158,13 @@ class Predictor {
   * \param           inst        The instance to predict.
   * \param [in,out]  out_preds   The output preds.
   * \param           model       The model to predict from
-   * \param           ntree_limit (Optional) The ntree limit.
+   * \param           tree_end    (Optional) The tree end index.
   */

  virtual void PredictInstance(const SparsePage::Inst& inst,
                               std::vector<bst_float>* out_preds,
                               const gbm::GBTreeModel& model,
-                               unsigned ntree_limit = 0) const = 0;
+                               unsigned tree_end = 0) const = 0;

  /**
   * \brief predict the leaf index of each tree, the output will be nsample *
@@ -174,18 +173,14 @@ class Predictor {
   * \param [in,out]  dmat        The input feature matrix.
   * \param [in,out]  out_preds   The output preds.
   * \param           model       Model to make predictions from.
-   * \param           ntree_limit (Optional) The ntree limit.
+   * \param           tree_end    (Optional) The tree end index.
   */

  virtual void PredictLeaf(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
                           const gbm::GBTreeModel& model,
-                           unsigned ntree_limit = 0) const = 0;
+                           unsigned tree_end = 0) const = 0;

  /**
-   * \fn  virtual void Predictor::PredictContribution( DMatrix* dmat,
-   * std::vector<bst_float>* out_contribs, const gbm::GBTreeModel& model,
-   * unsigned ntree_limit = 0) = 0;
-   *
   * \brief feature contributions to individual predictions; the output will be
   * a vector of length (nfeats + 1) * num_output_group * nsample, arranged in
   * that order.
@@ -193,7 +188,7 @@ class Predictor {
   * \param [in,out]  dmat               The input feature matrix.
   * \param [in,out]  out_contribs       The output feature contribs.
   * \param           model              Model to make predictions from.
-   * \param           ntree_limit        (Optional) The ntree limit.
+   * \param           tree_end           The tree end index.
   * \param           tree_weights       (Optional) Weights to multiply each tree by.
   * \param           approximate        Use fast approximate algorithm.
   * \param           condition          Condition on the condition_feature (0=no, -1=cond off, 1=cond on).
@@ -203,7 +198,7 @@ class Predictor {
  virtual void PredictContribution(DMatrix* dmat,
                                   HostDeviceVector<bst_float>* out_contribs,
                                   const gbm::GBTreeModel& model,
-                                   unsigned ntree_limit = 0,
+                                   unsigned tree_end = 0,
                                   std::vector<bst_float>* tree_weights = nullptr,
                                   bool approximate = false,
                                   int condition = 0,
@@ -212,7 +207,7 @@ class Predictor {
  virtual void PredictInteractionContributions(DMatrix* dmat,
                                               HostDeviceVector<bst_float>* out_contribs,
                                               const gbm::GBTreeModel& model,
-                                               unsigned ntree_limit = 0,
+                                               unsigned tree_end = 0,
                                               std::vector<bst_float>* tree_weights = nullptr,
                                               bool approximate = false) const = 0;