[POC] Experimental support for l1 error. (#7812)

Support adaptive tree, a feature supported by both sklearn and lightgbm. The tree leaf is recomputed based on residue of labels and predictions after construction. For l1 error, the optimal value is the median (50 percentile). This is marked as experimental support for the following reasons: - The value is not well defined for distributed training, where we might have empty leaves for local workers. Right now I just use the original leaf value for computing the average with other workers, which might cause significant errors. - Some follow-ups are required, for exact, pruner, and optimization for quantile function. Also, we need to calculate the initial estimation.
2022-04-26 21:41:55 +08:00
parent ad06172c6b
commit fdf533f2b9
64 changed files with 1727 additions and 336 deletions
--- a/include/xgboost/gbm.h
+++ b/include/xgboost/gbm.h
@@ -90,9 +90,8 @@ class GradientBooster : public Model, public Configurable {
   * \param prediction The output prediction cache entry that needs to be updated.
   * the booster may change content of gpair
   */
-  virtual void DoBoost(DMatrix* p_fmat,
-                       HostDeviceVector<GradientPair>* in_gpair,
-                       PredictionCacheEntry*) = 0;
+  virtual void DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
+                       PredictionCacheEntry*, ObjFunction const* obj) = 0;

  /*!
   * \brief generate predictions for given feature matrix
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@@ -670,9 +670,13 @@ class Tensor {
   * See \ref TensorView for parameters of this constructor.
   */
  template <typename I, int32_t D>
-  explicit Tensor(I const (&shape)[D], int32_t device) {
+  explicit Tensor(I const (&shape)[D], int32_t device)
+      : Tensor{common::Span<I const, D>{shape}, device} {}
+
+  template <typename I, size_t D>
+  explicit Tensor(common::Span<I const, D> shape, int32_t device) {
    // No device unroll as this is a host only function.
-    std::copy(shape, shape + D, shape_);
+    std::copy(shape.data(), shape.data() + D, shape_);
    for (auto i = D; i < kDim; ++i) {
      shape_[i] = 1;
    }
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2014-2019 by Contributors
+ * Copyright 2014-2022 by Contributors
 * \file objective.h
 * \brief interface of objective function used by xgboost.
 * \author Tianqi Chen, Kailong Chen
@@ -22,6 +22,8 @@

 namespace xgboost {

+class RegTree;
+
 /*! \brief interface of objective function */
 class ObjFunction : public Configurable {
 protected:
@@ -88,6 +90,22 @@ class ObjFunction : public Configurable {
    return 1;
  }

+  /**
+   * \brief Update the leaf values after a tree is built. Needed for objectives with 0
+   *        hessian.
+   *
+   *   Note that the leaf update is not well defined for distributed training as XGBoost
+   *   computes only an average of quantile between workers. This breaks when some leaf
+   *   have no sample assigned in a local worker.
+   *
+   * \param position The leaf index for each rows.
+   * \param info MetaInfo providing labels and weights.
+   * \param prediction Model prediction after transformation.
+   * \param p_tree Tree that needs to be updated.
+   */
+  virtual void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,
+                              HostDeviceVector<float> const& prediction, RegTree* p_tree) const {}
+
  /*!
   * \brief Create an objective function according to name.
   * \param tparam Generic parameters.
--- a/include/xgboost/task.h
+++ b/include/xgboost/task.h
@@ -33,13 +33,18 @@ struct ObjInfo {
  } task;
  // Does the objective have constant hessian value?
  bool const_hess{false};
+  bool zero_hess{false};

-  explicit ObjInfo(Task t) : task{t} {}
-  ObjInfo(Task t, bool khess) : task{t}, const_hess{khess} {}
+  ObjInfo(Task t) : task{t} {}  // NOLINT
+  ObjInfo(Task t, bool khess, bool zhess) : task{t}, const_hess{khess}, zero_hess(zhess) {}

  XGBOOST_DEVICE bool UseOneHot() const {
    return (task != ObjInfo::kRegression && task != ObjInfo::kBinary);
  }
+  /**
+   * \brief Use adaptive tree if the objective doesn't have valid hessian value.
+   */
+  XGBOOST_DEVICE bool UpdateTreeLeaf() const { return zero_hess; }
 };
 }  // namespace xgboost
 #endif  // XGBOOST_TASK_H_
--- a/include/xgboost/tree_updater.h
+++ b/include/xgboost/tree_updater.h
@@ -49,18 +49,25 @@ class TreeUpdater : public Configurable {
   *  existing trees.
   */
  virtual bool CanModifyTree() const { return false; }
+  /*!
+   * \brief Wether the out_position in `Update` is valid. This determines whether adaptive
+   *        tree can be used.
+   */
+  virtual bool HasNodePosition() const { return false; }
  /*!
   * \brief perform update to the tree models
   * \param gpair the gradient pair statistics of the data
   * \param data The data matrix passed to the updater.
-   * \param trees references the trees to be updated, updater will change the content of trees
+   * \param out_position The leaf index for each row.  The index is negated if that row is
+   *                     removed during sampling. So the 3th node is ~3.
+   * \param out_trees references the trees to be updated, updater will change the content of trees
   *   note: all the trees in the vector are updated, with the same statistics,
   *         but maybe different random seeds, usually one tree is passed in at a time,
   *         there can be multiple trees when we train random forest style model
   */
-  virtual void Update(HostDeviceVector<GradientPair>* gpair,
-                      DMatrix* data,
-                      const std::vector<RegTree*>& trees) = 0;
+  virtual void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* data,
+                      common::Span<HostDeviceVector<bst_node_t>> out_position,
+                      const std::vector<RegTree*>& out_trees) = 0;

  /*!
   * \brief determines whether updater has enough knowledge about a given dataset