Use matrix for gradient. (#9508)

- Use the `linalg::Matrix` for storing gradients. - New API for the custom objective. - Custom objective for multi-class/multi-target is now required to return the correct shape. - Custom objective for Python can accept arrays with any strides. (row-major, column-major)
2023-08-24 05:29:52 +08:00
parent 6103dca0bb
commit 972730cde0
77 changed files with 1052 additions and 651 deletions
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@@ -274,8 +274,8 @@ class GradientPairInt64 {
  GradientPairInt64(GradientPairInt64 const &g) = default;
  GradientPairInt64 &operator=(GradientPairInt64 const &g) = default;

-  XGBOOST_DEVICE [[nodiscard]] T GetQuantisedGrad() const { return grad_; }
-  XGBOOST_DEVICE [[nodiscard]] T GetQuantisedHess() const { return hess_; }
+  [[nodiscard]] XGBOOST_DEVICE T GetQuantisedGrad() const { return grad_; }
+  [[nodiscard]] XGBOOST_DEVICE T GetQuantisedHess() const { return hess_; }

  XGBOOST_DEVICE GradientPairInt64 &operator+=(const GradientPairInt64 &rhs) {
    grad_ += rhs.grad_;
--- a/include/xgboost/c_api.h
+++ b/include/xgboost/c_api.h
@@ -789,16 +789,14 @@ XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
 * \param out The address to hold number of rows.
 * \return 0 when success, -1 when failure happens
 */
-XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle,
-                            bst_ulong *out);
+XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle, bst_ulong *out);
 /*!
 * \brief get number of columns
 * \param handle the handle to the DMatrix
 * \param out The output of number of columns
 * \return 0 when success, -1 when failure happens
 */
-XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle,
-                            bst_ulong *out);
+XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle, bst_ulong *out);

 /*!
 * \brief Get number of valid values from DMatrix.
@@ -945,21 +943,30 @@ XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle, int iter, DMatrixHandle
 * @example c-api-demo.c
 */

-/*!
- * \brief update the model, by directly specify gradient and second order gradient,
- *        this can be used to replace UpdateOneIter, to support customized loss function
- * \param handle handle
- * \param dtrain training data
- * \param grad gradient statistics
- * \param hess second order gradient statistics
- * \param len length of grad/hess array
- * \return 0 when success, -1 when failure happens
+/**
+ * @deprecated since 2.1.0
 */
-XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
-                                  DMatrixHandle dtrain,
-                                  float *grad,
-                                  float *hess,
-                                  bst_ulong len);
+XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, float *grad,
+                                  float *hess, bst_ulong len);
+
+/**
+ * @brief Update a model with gradient and Hessian. This is used for training with a
+ *        custom objective function.
+ *
+ * @since 2.0.0
+ *
+ * @param handle handle
+ * @param dtrain The training data.
+ * @param iter   The current iteration round. When training continuation is used, the count
+ *               should restart.
+ * @param grad   Json encoded __(cuda)_array_interface__ for gradient.
+ * @param hess   Json encoded __(cuda)_array_interface__ for Hessian.
+ *
+ * @return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterTrainOneIter(BoosterHandle handle, DMatrixHandle dtrain, int iter,
+                                  char const *grad, char const *hess);
+
 /*!
 * \brief get evaluation statistics for xgboost
 * \param handle handle
--- a/include/xgboost/gbm.h
+++ b/include/xgboost/gbm.h
@@ -70,22 +70,25 @@ class GradientBooster : public Model, public Configurable {
                     GradientBooster* /*out*/, bool* /*out_of_bound*/) const {
    LOG(FATAL) << "Slice is not supported by the current booster.";
  }
-  /*! \brief Return number of boosted rounds.
+  /**
+   * @brief Return number of boosted rounds.
   */
-  virtual int32_t BoostedRounds() const = 0;
+  [[nodiscard]] virtual std::int32_t BoostedRounds() const = 0;
  /**
   * \brief Whether the model has already been trained. When tree booster is chosen, then
   *        returns true when there are existing trees.
   */
-  virtual bool ModelFitted() const = 0;
-  /*!
-   * \brief perform update to the model(boosting)
-   * \param p_fmat feature matrix that provide access to features
-   * \param in_gpair address of the gradient pair statistics of the data
-   * \param prediction The output prediction cache entry that needs to be updated.
-   * the booster may change content of gpair
+  [[nodiscard]] virtual bool ModelFitted() const = 0;
+  /**
+   * @brief perform update to the model(boosting)
+   *
+   * @param p_fmat feature matrix that provide access to features
+   * @param in_gpair address of the gradient pair statistics of the data
+   * @param prediction The output prediction cache entry that needs to be updated.
+   *                   the booster may change content of gpair
+   * @param obj The objective function used for boosting.
   */
-  virtual void DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
+  virtual void DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair,
                       PredictionCacheEntry*, ObjFunction const* obj) = 0;

  /**
@@ -165,18 +168,17 @@ class GradientBooster : public Model, public Configurable {
   * \param format the format to dump the model in
   * \return a vector of dump for boosters.
   */
-  virtual std::vector<std::string> DumpModel(const FeatureMap& fmap,
-                                             bool with_stats,
-                                             std::string format) const = 0;
+  [[nodiscard]] virtual std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
+                                                           std::string format) const = 0;

  virtual void FeatureScore(std::string const& importance_type,
                            common::Span<int32_t const> trees,
                            std::vector<bst_feature_t>* features,
                            std::vector<float>* scores) const = 0;
-  /*!
-   * \brief Whether the current booster uses GPU.
+  /**
+   * @brief Whether the current booster uses GPU.
   */
-  virtual bool UseGPU() const = 0;
+  [[nodiscard]] virtual bool UseGPU() const = 0;
  /*!
   * \brief create a gradient booster from given name
   * \param name name of gradient booster
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -76,17 +76,18 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   * \param iter current iteration number
   * \param train reference to the data matrix.
   */
-  virtual void UpdateOneIter(int iter, std::shared_ptr<DMatrix> train) = 0;
-  /*!
-   * \brief Do customized gradient boosting with in_gpair.
-   *  in_gair can be mutated after this call.
-   * \param iter current iteration number
-   * \param train reference to the data matrix.
-   * \param in_gpair The input gradient statistics.
+  virtual void UpdateOneIter(std::int32_t iter, std::shared_ptr<DMatrix> train) = 0;
+  /**
+   * @brief Do customized gradient boosting with in_gpair.
+   *
+   * @note in_gpair can be mutated after this call.
+   *
+   * @param iter current iteration number
+   * @param train reference to the data matrix.
+   * @param in_gpair The input gradient statistics.
   */
-  virtual void BoostOneIter(int iter,
-                            std::shared_ptr<DMatrix> train,
-                            HostDeviceVector<GradientPair>* in_gpair) = 0;
+  virtual void BoostOneIter(std::int32_t iter, std::shared_ptr<DMatrix> train,
+                            linalg::Matrix<GradientPair>* in_gpair) = 0;
  /*!
   * \brief evaluate the model for specific iteration using the configured metrics.
   * \param iter iteration number
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@@ -292,7 +292,7 @@ enum Order : std::uint8_t {
 template <typename T, int32_t kDim>
 class TensorView {
 public:
-  using ShapeT = size_t[kDim];
+  using ShapeT = std::size_t[kDim];
  using StrideT = ShapeT;

 private:
@@ -400,10 +400,14 @@ class TensorView {
   * \param shape  shape of the tensor
   * \param device Device ordinal
   */
-  template <typename I, int32_t D>
+  template <typename I, std::int32_t D>
  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], std::int32_t device)
      : TensorView{data, shape, device, Order::kC} {}

+  template <typename I, std::int32_t D>
+  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], DeviceOrd device)
+      : TensorView{data, shape, device.ordinal, Order::kC} {}
+
  template <typename I, int32_t D>
  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], std::int32_t device, Order order)
      : data_{data}, ptr_{data_.data()}, device_{device} {
@@ -446,6 +450,10 @@ class TensorView {
    });
    this->CalcSize();
  }
+  template <typename I, std::int32_t D>
+  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], I const (&stride)[D],
+                       DeviceOrd device)
+      : TensorView{data, shape, stride, device.ordinal} {}

  template <
      typename U,
@@ -741,7 +749,7 @@ auto ArrayInterfaceStr(TensorView<T, D> const &t) {
 template <typename T, int32_t kDim = 5>
 class Tensor {
 public:
-  using ShapeT = size_t[kDim];
+  using ShapeT = std::size_t[kDim];
  using StrideT = ShapeT;

 private:
@@ -775,6 +783,9 @@ class Tensor {
  template <typename I, int32_t D>
  explicit Tensor(I const (&shape)[D], std::int32_t device, Order order = kC)
      : Tensor{common::Span<I const, D>{shape}, device, order} {}
+  template <typename I, int32_t D>
+  explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC)
+      : Tensor{common::Span<I const, D>{shape}, device.ordinal, order} {}

  template <typename I, size_t D>
  explicit Tensor(common::Span<I const, D> shape, std::int32_t device, Order order = kC)
@@ -814,6 +825,10 @@ class Tensor {
    // shape
    this->Initialize(shape, device);
  }
+  template <typename I, int32_t D>
+  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
+                  Order order = kC)
+      : Tensor{data, shape, device.ordinal, order} {}
  /**
   * \brief Index operator. Not thread safe, should not be used in performance critical
   *        region. For more efficient indexing, consider getting a view first.
@@ -832,9 +847,9 @@ class Tensor {
  }

  /**
-   * \brief Get a \ref TensorView for this tensor.
+   * @brief Get a @ref TensorView for this tensor.
   */
-  TensorView<T, kDim> View(int32_t device) {
+  TensorView<T, kDim> View(std::int32_t device) {
    if (device >= 0) {
      data_.SetDevice(device);
      auto span = data_.DeviceSpan();
@@ -844,7 +859,7 @@ class Tensor {
      return {span, shape_, device, order_};
    }
  }
-  TensorView<T const, kDim> View(int32_t device) const {
+  TensorView<T const, kDim> View(std::int32_t device) const {
    if (device >= 0) {
      data_.SetDevice(device);
      auto span = data_.ConstDeviceSpan();
@@ -854,6 +869,26 @@ class Tensor {
      return {span, shape_, device, order_};
    }
  }
+  auto View(DeviceOrd device) {
+    if (device.IsCUDA()) {
+      data_.SetDevice(device);
+      auto span = data_.DeviceSpan();
+      return TensorView<T, kDim>{span, shape_, device.ordinal, order_};
+    } else {
+      auto span = data_.HostSpan();
+      return TensorView<T, kDim>{span, shape_, device.ordinal, order_};
+    }
+  }
+  auto View(DeviceOrd device) const {
+    if (device.IsCUDA()) {
+      data_.SetDevice(device);
+      auto span = data_.ConstDeviceSpan();
+      return TensorView<T const, kDim>{span, shape_, device.ordinal, order_};
+    } else {
+      auto span = data_.ConstHostSpan();
+      return TensorView<T const, kDim>{span, shape_, device.ordinal, order_};
+    }
+  }

  auto HostView() const { return this->View(-1); }
  auto HostView() { return this->View(-1); }
@@ -931,6 +966,7 @@ class Tensor {
   * \brief Set device ordinal for this tensor.
   */
  void SetDevice(int32_t device) const { data_.SetDevice(device); }
+  void SetDevice(DeviceOrd device) const { data_.SetDevice(device); }
  [[nodiscard]] int32_t DeviceIdx() const { return data_.DeviceIdx(); }
 };

--- a/include/xgboost/linear_updater.h
+++ b/include/xgboost/linear_updater.h
@@ -49,9 +49,8 @@ class LinearUpdater : public Configurable {
   * \param model               Model to be updated.
   * \param sum_instance_weight The sum instance weights, used to normalise l1/l2 penalty.
   */
-  virtual void Update(HostDeviceVector<GradientPair>* in_gpair, DMatrix* data,
-                      gbm::GBLinearModel* model,
-                      double sum_instance_weight) = 0;
+  virtual void Update(linalg::Matrix<GradientPair>* in_gpair, DMatrix* data,
+                      gbm::GBLinearModel* model, double sum_instance_weight) = 0;

  /*!
   * \brief Create a linear updater given name
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@@ -41,17 +41,16 @@ class ObjFunction : public Configurable {
   * \param args arguments to the objective function.
   */
  virtual void Configure(const std::vector<std::pair<std::string, std::string> >& args) = 0;
-  /*!
-   * \brief Get gradient over each of predictions, given existing information.
-   * \param preds prediction of current round
-   * \param info information about labels, weights, groups in rank
-   * \param iteration current iteration number.
-   * \param out_gpair output of get gradient, saves gradient and second order gradient in
+  /**
+   * @brief Get gradient over each of predictions, given existing information.
+   *
+   * @param preds prediction of current round
+   * @param info information about labels, weights, groups in rank
+   * @param iteration current iteration number.
+   * @param out_gpair output of get gradient, saves gradient and second order gradient in
   */
-  virtual void GetGradient(const HostDeviceVector<bst_float>& preds,
-                           const MetaInfo& info,
-                           int iteration,
-                           HostDeviceVector<GradientPair>* out_gpair) = 0;
+  virtual void GetGradient(const HostDeviceVector<bst_float>& preds, const MetaInfo& info,
+                           std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) = 0;

  /*! \return the default evaluation metric for the objective */
  virtual const char* DefaultEvalMetric() const = 0;
@@ -81,9 +80,7 @@ class ObjFunction : public Configurable {
   * used by gradient boosting
   * \return transformed value
   */
-  virtual bst_float ProbToMargin(bst_float base_score) const {
-    return base_score;
-  }
+  [[nodiscard]] virtual bst_float ProbToMargin(bst_float base_score) const { return base_score; }
  /**
   * \brief Make initialize estimation of prediction.
   *
@@ -94,14 +91,14 @@ class ObjFunction : public Configurable {
  /*!
   * \brief Return task of this objective.
   */
-  virtual struct ObjInfo Task() const = 0;
+  [[nodiscard]] virtual struct ObjInfo Task() const = 0;
  /**
-   * \brief Return number of targets for input matrix.  Right now XGBoost supports only
+   * @brief Return number of targets for input matrix.  Right now XGBoost supports only
   *        multi-target regression.
   */
-  virtual bst_target_t Targets(MetaInfo const& info) const {
+  [[nodiscard]] virtual bst_target_t Targets(MetaInfo const& info) const {
    if (info.labels.Shape(1) > 1) {
-      LOG(FATAL) << "multioutput is not supported by current objective function";
+      LOG(FATAL) << "multioutput is not supported by the current objective function";
    }
    return 1;
  }
--- a/include/xgboost/tree_updater.h
+++ b/include/xgboost/tree_updater.h
@@ -71,7 +71,7 @@ class TreeUpdater : public Configurable {
   *         but maybe different random seeds, usually one tree is passed in at a time,
   *         there can be multiple trees when we train random forest style model
   */
-  virtual void Update(tree::TrainParam const* param, HostDeviceVector<GradientPair>* gpair,
+  virtual void Update(tree::TrainParam const* param, linalg::Matrix<GradientPair>* gpair,
                      DMatrix* data, common::Span<HostDeviceVector<bst_node_t>> out_position,
                      const std::vector<RegTree*>& out_trees) = 0;