[POC] Experimental support for l1 error. (#7812)

Support adaptive tree, a feature supported by both sklearn and lightgbm. The tree leaf is recomputed based on residue of labels and predictions after construction. For l1 error, the optimal value is the median (50 percentile). This is marked as experimental support for the following reasons: - The value is not well defined for distributed training, where we might have empty leaves for local workers. Right now I just use the original leaf value for computing the average with other workers, which might cause significant errors. - Some follow-ups are required, for exact, pruner, and optimization for quantile function. Also, we need to calculate the initial estimation.
2022-04-26 21:41:55 +08:00
parent ad06172c6b
commit fdf533f2b9
64 changed files with 1727 additions and 336 deletions
--- a/src/objective/adaptive.cc
+++ b/src/objective/adaptive.cc
@@ -0,0 +1,126 @@
+/*!
+ * Copyright 2022 by XGBoost Contributors
+ */
+#include "adaptive.h"
+
+#include <limits>
+#include <vector>
+
+#include "../common/common.h"
+#include "../common/stats.h"
+#include "../common/threading_utils.h"
+#include "xgboost/tree_model.h"
+
+namespace xgboost {
+namespace obj {
+namespace detail {
+void EncodeTreeLeafHost(RegTree const& tree, std::vector<bst_node_t> const& position,
+                        std::vector<size_t>* p_nptr, std::vector<bst_node_t>* p_nidx,
+                        std::vector<size_t>* p_ridx) {
+  auto& nptr = *p_nptr;
+  auto& nidx = *p_nidx;
+  auto& ridx = *p_ridx;
+  ridx = common::ArgSort<size_t>(position);
+  std::vector<bst_node_t> sorted_pos(position);
+  // permutation
+  for (size_t i = 0; i < position.size(); ++i) {
+    sorted_pos[i] = position[ridx[i]];
+  }
+  // find the first non-sampled row
+  auto begin_pos =
+      std::distance(sorted_pos.cbegin(), std::find_if(sorted_pos.cbegin(), sorted_pos.cend(),
+                                                      [](bst_node_t nidx) { return nidx >= 0; }));
+  CHECK_LE(begin_pos, sorted_pos.size());
+
+  std::vector<bst_node_t> leaf;
+  tree.WalkTree([&](bst_node_t nidx) {
+    if (tree[nidx].IsLeaf()) {
+      leaf.push_back(nidx);
+    }
+    return true;
+  });
+
+  if (begin_pos == sorted_pos.size()) {
+    nidx = leaf;
+    return;
+  }
+
+  auto beg_it = sorted_pos.begin() + begin_pos;
+  common::RunLengthEncode(beg_it, sorted_pos.end(), &nptr);
+  CHECK_GT(nptr.size(), 0);
+  // skip the sampled rows in indptr
+  std::transform(nptr.begin(), nptr.end(), nptr.begin(),
+                 [begin_pos](size_t ptr) { return ptr + begin_pos; });
+
+  size_t n_leaf = nptr.size() - 1;
+  auto n_unique = std::unique(beg_it, sorted_pos.end()) - beg_it;
+  CHECK_EQ(n_unique, n_leaf);
+  nidx.resize(n_leaf);
+  std::copy(beg_it, beg_it + n_unique, nidx.begin());
+
+  if (n_leaf != leaf.size()) {
+    FillMissingLeaf(leaf, &nidx, &nptr);
+  }
+}
+
+void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& position,
+                        MetaInfo const& info, HostDeviceVector<float> const& predt, float alpha,
+                        RegTree* p_tree) {
+  auto& tree = *p_tree;
+
+  std::vector<bst_node_t> nidx;
+  std::vector<size_t> nptr;
+  std::vector<size_t> ridx;
+  EncodeTreeLeafHost(*p_tree, position, &nptr, &nidx, &ridx);
+  size_t n_leaf = nidx.size();
+  if (nptr.empty()) {
+    std::vector<float> quantiles;
+    UpdateLeafValues(&quantiles, nidx, p_tree);
+    return;
+  }
+
+  CHECK(!position.empty());
+  std::vector<float> quantiles(n_leaf, 0);
+  std::vector<int32_t> n_valids(n_leaf, 0);
+
+  auto const& h_node_idx = nidx;
+  auto const& h_node_ptr = nptr;
+  CHECK_LE(h_node_ptr.back(), info.num_row_);
+  // loop over each leaf
+  common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) {
+    auto nidx = h_node_idx[k];
+    CHECK(tree[nidx].IsLeaf());
+    CHECK_LT(k + 1, h_node_ptr.size());
+    size_t n = h_node_ptr[k + 1] - h_node_ptr[k];
+    auto h_row_set = common::Span<size_t const>{ridx}.subspan(h_node_ptr[k], n);
+    // multi-target not yet supported.
+    auto h_labels = info.labels.HostView().Slice(linalg::All(), 0);
+    auto const& h_predt = predt.ConstHostVector();
+    auto h_weights = linalg::MakeVec(&info.weights_);
+
+    auto iter = common::MakeIndexTransformIter([&](size_t i) -> float {
+      auto row_idx = h_row_set[i];
+      return h_labels(row_idx) - h_predt[row_idx];
+    });
+    auto w_it = common::MakeIndexTransformIter([&](size_t i) -> float {
+      auto row_idx = h_row_set[i];
+      return h_weights(row_idx);
+    });
+
+    float q{0};
+    if (info.weights_.Empty()) {
+      q = common::Quantile(alpha, iter, iter + h_row_set.size());
+    } else {
+      q = common::WeightedQuantile(alpha, iter, iter + h_row_set.size(), w_it);
+    }
+    if (std::isnan(q)) {
+      CHECK(h_row_set.empty());
+    }
+    quantiles.at(k) = q;
+  });
+
+  UpdateLeafValues(&quantiles, nidx, p_tree);
+}
+}  // namespace detail
+}  // namespace obj
+}  // namespace xgboost
--- a/src/objective/adaptive.cu
+++ b/src/objective/adaptive.cu
@@ -0,0 +1,182 @@
+/*!
+ * Copyright 2022 by XGBoost Contributors
+ */
+#include <thrust/sort.h>
+
+#include <cub/cub.cuh>
+
+#include "../common/device_helpers.cuh"
+#include "../common/stats.cuh"
+#include "adaptive.h"
+
+namespace xgboost {
+namespace obj {
+namespace detail {
+void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> position,
+                          dh::device_vector<size_t>* p_ridx, HostDeviceVector<size_t>* p_nptr,
+                          HostDeviceVector<bst_node_t>* p_nidx, RegTree const& tree) {
+  // copy position to buffer
+  dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
+  size_t n_samples = position.size();
+  dh::XGBDeviceAllocator<char> alloc;
+  dh::device_vector<bst_node_t> sorted_position(position.size());
+  dh::safe_cuda(cudaMemcpyAsync(sorted_position.data().get(), position.data(),
+                                position.size_bytes(), cudaMemcpyDeviceToDevice));
+
+  p_ridx->resize(position.size());
+  dh::Iota(dh::ToSpan(*p_ridx));
+  // sort row index according to node index
+  thrust::stable_sort_by_key(thrust::cuda::par(alloc), sorted_position.begin(),
+                             sorted_position.begin() + n_samples, p_ridx->begin());
+  dh::XGBCachingDeviceAllocator<char> caching;
+  auto beg_pos =
+      thrust::find_if(thrust::cuda::par(caching), sorted_position.cbegin(), sorted_position.cend(),
+                      [] XGBOOST_DEVICE(bst_node_t nidx) { return nidx >= 0; }) -
+      sorted_position.cbegin();
+  if (beg_pos == sorted_position.size()) {
+    auto& leaf = p_nidx->HostVector();
+    tree.WalkTree([&](bst_node_t nidx) {
+      if (tree[nidx].IsLeaf()) {
+        leaf.push_back(nidx);
+      }
+      return true;
+    });
+    return;
+  }
+
+  size_t n_leaf = tree.GetNumLeaves();
+  size_t max_n_unique = n_leaf;
+
+  dh::caching_device_vector<size_t> counts_out(max_n_unique + 1, 0);
+  auto d_counts_out = dh::ToSpan(counts_out).subspan(0, max_n_unique);
+  auto d_num_runs_out = dh::ToSpan(counts_out).subspan(max_n_unique, 1);
+  dh::caching_device_vector<bst_node_t> unique_out(max_n_unique, 0);
+  auto d_unique_out = dh::ToSpan(unique_out);
+
+  size_t nbytes;
+  auto begin_it = sorted_position.begin() + beg_pos;
+  cub::DeviceRunLengthEncode::Encode(nullptr, nbytes, begin_it, unique_out.data().get(),
+                                     counts_out.data().get(), d_num_runs_out.data(),
+                                     n_samples - beg_pos);
+  dh::TemporaryArray<char> temp(nbytes);
+  cub::DeviceRunLengthEncode::Encode(temp.data().get(), nbytes, begin_it, unique_out.data().get(),
+                                     counts_out.data().get(), d_num_runs_out.data(),
+                                     n_samples - beg_pos);
+
+  dh::PinnedMemory pinned_pool;
+  auto pinned = pinned_pool.GetSpan<char>(sizeof(size_t) + sizeof(bst_node_t));
+  dh::CUDAStream copy_stream;
+  size_t* h_num_runs = reinterpret_cast<size_t*>(pinned.subspan(0, sizeof(size_t)).data());
+  // flag for whether there's ignored position
+  bst_node_t* h_first_unique =
+      reinterpret_cast<bst_node_t*>(pinned.subspan(sizeof(size_t), sizeof(bst_node_t)).data());
+  dh::safe_cuda(cudaMemcpyAsync(h_num_runs, d_num_runs_out.data(), sizeof(size_t),
+                                cudaMemcpyDeviceToHost, copy_stream.View()));
+  dh::safe_cuda(cudaMemcpyAsync(h_first_unique, d_unique_out.data(), sizeof(bst_node_t),
+                                cudaMemcpyDeviceToHost, copy_stream.View()));
+
+  /**
+   * copy node index (leaf index)
+   */
+  auto& nidx = *p_nidx;
+  auto& nptr = *p_nptr;
+  nidx.SetDevice(ctx->gpu_id);
+  nidx.Resize(n_leaf);
+  auto d_node_idx = nidx.DeviceSpan();
+
+  nptr.SetDevice(ctx->gpu_id);
+  nptr.Resize(n_leaf + 1, 0);
+  auto d_node_ptr = nptr.DeviceSpan();
+
+  dh::LaunchN(n_leaf, [=] XGBOOST_DEVICE(size_t i) {
+    if (i >= d_num_runs_out[0]) {
+      // d_num_runs_out <= max_n_unique
+      // this omits all the leaf that are empty. A leaf can be empty when there's
+      // missing data, which can be caused by sparse input and distributed training.
+      return;
+    }
+    d_node_idx[i] = d_unique_out[i];
+    d_node_ptr[i + 1] = d_counts_out[i];
+    if (i == 0) {
+      d_node_ptr[0] = beg_pos;
+    }
+  });
+  thrust::inclusive_scan(thrust::cuda::par(caching), dh::tbegin(d_node_ptr), dh::tend(d_node_ptr),
+                         dh::tbegin(d_node_ptr));
+  copy_stream.View().Sync();
+  CHECK_GT(*h_num_runs, 0);
+  CHECK_LE(*h_num_runs, n_leaf);
+
+  if (*h_num_runs < n_leaf) {
+    // shrink to omit the sampled nodes.
+    nptr.Resize(*h_num_runs + 1);
+    nidx.Resize(*h_num_runs);
+
+    std::vector<bst_node_t> leaves;
+    tree.WalkTree([&](bst_node_t nidx) {
+      if (tree[nidx].IsLeaf()) {
+        leaves.push_back(nidx);
+      }
+      return true;
+    });
+    CHECK_EQ(leaves.size(), n_leaf);
+    // Fill all the leaves that don't have any sample. This is hacky and inefficient. An
+    // alternative is to leave the objective to handle missing leaf, which is more messy
+    // as we need to take other distributed workers into account.
+    auto& h_nidx = nidx.HostVector();
+    auto& h_nptr = nptr.HostVector();
+    FillMissingLeaf(leaves, &h_nidx, &h_nptr);
+    nidx.DevicePointer();
+    nptr.DevicePointer();
+  }
+  CHECK_EQ(nidx.Size(), n_leaf);
+  CHECK_EQ(nptr.Size(), n_leaf + 1);
+}
+
+void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> position,
+                          MetaInfo const& info, HostDeviceVector<float> const& predt, float alpha,
+                          RegTree* p_tree) {
+  dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
+  dh::device_vector<size_t> ridx;
+  HostDeviceVector<size_t> nptr;
+  HostDeviceVector<bst_node_t> nidx;
+
+  EncodeTreeLeafDevice(ctx, position, &ridx, &nptr, &nidx, *p_tree);
+
+  if (nptr.Empty()) {
+    std::vector<float> quantiles;
+    UpdateLeafValues(&quantiles, nidx.ConstHostVector(), p_tree);
+  }
+
+  HostDeviceVector<float> quantiles;
+  predt.SetDevice(ctx->gpu_id);
+  auto d_predt = predt.ConstDeviceSpan();
+  auto d_labels = info.labels.View(ctx->gpu_id);
+
+  auto d_row_index = dh::ToSpan(ridx);
+  auto seg_beg = nptr.DevicePointer();
+  auto seg_end = seg_beg + nptr.Size();
+  auto val_beg = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
+                                                  [=] XGBOOST_DEVICE(size_t i) {
+                                                    auto predt = d_predt[d_row_index[i]];
+                                                    auto y = d_labels(d_row_index[i]);
+                                                    return y - predt;
+                                                  });
+  auto val_end = val_beg + d_labels.Size();
+  CHECK_EQ(nidx.Size() + 1, nptr.Size());
+  if (info.weights_.Empty()) {
+    common::SegmentedQuantile(ctx, alpha, seg_beg, seg_end, val_beg, val_end, &quantiles);
+  } else {
+    info.weights_.SetDevice(ctx->gpu_id);
+    auto d_weights = info.weights_.ConstDeviceSpan();
+    CHECK_EQ(d_weights.size(), d_row_index.size());
+    auto w_it = thrust::make_permutation_iterator(dh::tcbegin(d_weights), dh::tcbegin(d_row_index));
+    common::SegmentedWeightedQuantile(ctx, alpha, seg_beg, seg_end, val_beg, val_end, w_it,
+                                      w_it + d_weights.size(), &quantiles);
+  }
+
+  UpdateLeafValues(&quantiles.HostVector(), nidx.ConstHostVector(), p_tree);
+}
+}  // namespace detail
+}  // namespace obj
+}  // namespace xgboost
--- a/src/objective/adaptive.h
+++ b/src/objective/adaptive.h
@@ -0,0 +1,83 @@
+/*!
+ * Copyright 2022 by XGBoost Contributors
+ */
+#pragma once
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include "rabit/rabit.h"
+#include "xgboost/generic_parameters.h"
+#include "xgboost/host_device_vector.h"
+#include "xgboost/tree_model.h"
+
+namespace xgboost {
+namespace obj {
+namespace detail {
+inline void FillMissingLeaf(std::vector<bst_node_t> const& maybe_missing,
+                            std::vector<bst_node_t>* p_nidx, std::vector<size_t>* p_nptr) {
+  auto& h_node_idx = *p_nidx;
+  auto& h_node_ptr = *p_nptr;
+
+  for (auto leaf : maybe_missing) {
+    if (std::binary_search(h_node_idx.cbegin(), h_node_idx.cend(), leaf)) {
+      continue;
+    }
+    auto it = std::upper_bound(h_node_idx.cbegin(), h_node_idx.cend(), leaf);
+    auto pos = it - h_node_idx.cbegin();
+    h_node_idx.insert(h_node_idx.cbegin() + pos, leaf);
+    h_node_ptr.insert(h_node_ptr.cbegin() + pos, h_node_ptr[pos]);
+  }
+}
+
+inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_node_t> const nidx,
+                             RegTree* p_tree) {
+  auto& tree = *p_tree;
+  auto& quantiles = *p_quantiles;
+  auto const& h_node_idx = nidx;
+
+  size_t n_leaf{h_node_idx.size()};
+  rabit::Allreduce<rabit::op::Max>(&n_leaf, 1);
+  CHECK(quantiles.empty() || quantiles.size() == n_leaf);
+  if (quantiles.empty()) {
+    quantiles.resize(n_leaf, std::numeric_limits<float>::quiet_NaN());
+  }
+
+  // number of workers that have valid quantiles
+  std::vector<int32_t> n_valids(quantiles.size());
+  std::transform(quantiles.cbegin(), quantiles.cend(), n_valids.begin(),
+                 [](float q) { return static_cast<int32_t>(!std::isnan(q)); });
+  rabit::Allreduce<rabit::op::Sum>(n_valids.data(), n_valids.size());
+  // convert to 0 for all reduce
+  std::replace_if(
+      quantiles.begin(), quantiles.end(), [](float q) { return std::isnan(q); }, 0.f);
+  // use the mean value
+  rabit::Allreduce<rabit::op::Sum>(quantiles.data(), quantiles.size());
+  for (size_t i = 0; i < n_leaf; ++i) {
+    if (n_valids[i] > 0) {
+      quantiles[i] /= static_cast<float>(n_valids[i]);
+    } else {
+      // Use original leaf value if no worker can provide the quantile.
+      quantiles[i] = tree[h_node_idx[i]].LeafValue();
+    }
+  }
+
+  for (size_t i = 0; i < nidx.size(); ++i) {
+    auto nidx = h_node_idx[i];
+    auto q = quantiles[i];
+    CHECK(tree[nidx].IsLeaf());
+    tree[nidx].SetLeaf(q);
+  }
+}
+
+void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> position,
+                          MetaInfo const& info, HostDeviceVector<float> const& predt, float alpha,
+                          RegTree* p_tree);
+
+void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& position,
+                        MetaInfo const& info, HostDeviceVector<float> const& predt, float alpha,
+                        RegTree* p_tree);
+}  // namespace detail
+}  // namespace obj
+}  // namespace xgboost
--- a/src/objective/aft_obj.cu
+++ b/src/objective/aft_obj.cu
@@ -34,11 +34,11 @@ DMLC_REGISTRY_FILE_TAG(aft_obj_gpu);

 class AFTObj : public ObjFunction {
 public:
-  void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
+  void Configure(Args const& args) override {
    param_.UpdateAllowUnknown(args);
  }

-  ObjInfo Task() const override { return {ObjInfo::kSurvival, false}; }
+  ObjInfo Task() const override { return ObjInfo::kSurvival; }

  template <typename Distribution>
  void GetGradientImpl(const HostDeviceVector<bst_float> &preds,
--- a/src/objective/hinge.cu
+++ b/src/objective/hinge.cu
@@ -24,10 +24,8 @@ class HingeObj : public ObjFunction {
 public:
  HingeObj() = default;

-  void Configure(
-      const std::vector<std::pair<std::string, std::string> > &args) override {}
-
-  ObjInfo Task() const override { return {ObjInfo::kRegression, false}; }
+  void Configure(Args const&) override {}
+  ObjInfo Task() const override { return ObjInfo::kRegression; }

  void GetGradient(const HostDeviceVector<bst_float> &preds,
                   const MetaInfo &info,
--- a/src/objective/multiclass_obj.cu
+++ b/src/objective/multiclass_obj.cu
@@ -46,7 +46,7 @@ class SoftmaxMultiClassObj : public ObjFunction {
    param_.UpdateAllowUnknown(args);
  }

-  ObjInfo Task() const override { return {ObjInfo::kClassification, false}; }
+  ObjInfo Task() const override { return ObjInfo::kClassification; }

  void GetGradient(const HostDeviceVector<bst_float>& preds,
                   const MetaInfo& info,
--- a/src/objective/rank_obj.cu
+++ b/src/objective/rank_obj.cu
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2015-2019 XGBoost contributors
+ * Copyright 2015-2022 XGBoost contributors
 */
 #include <dmlc/omp.h>
 #include <dmlc/timer.h>
@@ -750,11 +750,8 @@ class SortedLabelList : dh::SegmentSorter<float> {
 template <typename LambdaWeightComputerT>
 class LambdaRankObj : public ObjFunction {
 public:
-  void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
-    param_.UpdateAllowUnknown(args);
-  }
-
-  ObjInfo Task() const override { return {ObjInfo::kRanking, false}; }
+  void Configure(Args const &args) override { param_.UpdateAllowUnknown(args); }
+  ObjInfo Task() const override { return ObjInfo::kRanking; }

  void GetGradient(const HostDeviceVector<bst_float>& preds,
                   const MetaInfo& info,
--- a/src/objective/regression_loss.h
+++ b/src/objective/regression_loss.h
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2017-2019 XGBoost contributors
+ * Copyright 2017-2022 XGBoost contributors
 */
 #ifndef XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_
 #define XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_
@@ -38,7 +38,7 @@ struct LinearSquareLoss {
  static const char* DefaultEvalMetric() { return "rmse"; }

  static const char* Name() { return "reg:squarederror"; }
-  static ObjInfo Info() { return {ObjInfo::kRegression, true}; }
+  static ObjInfo Info() { return {ObjInfo::kRegression, true, false}; }
 };

 struct SquaredLogError {
@@ -65,7 +65,7 @@ struct SquaredLogError {

  static const char* Name() { return "reg:squaredlogerror"; }

-  static ObjInfo Info() { return {ObjInfo::kRegression, false}; }
+  static ObjInfo Info() { return ObjInfo::kRegression; }
 };

 // logistic loss for probability regression task
@@ -102,14 +102,14 @@ struct LogisticRegression {

  static const char* Name() { return "reg:logistic"; }

-  static ObjInfo Info() { return {ObjInfo::kRegression, false}; }
+  static ObjInfo Info() { return ObjInfo::kRegression; }
 };

 // logistic loss for binary classification task
 struct LogisticClassification : public LogisticRegression {
  static const char* DefaultEvalMetric() { return "logloss"; }
  static const char* Name() { return "binary:logistic"; }
-  static ObjInfo Info() { return {ObjInfo::kBinary, false}; }
+  static ObjInfo Info() { return ObjInfo::kBinary; }
 };

 // logistic loss, but predict un-transformed margin
@@ -146,7 +146,7 @@ struct LogisticRaw : public LogisticRegression {

  static const char* Name() { return "binary:logitraw"; }

-  static ObjInfo Info() { return {ObjInfo::kRegression, false}; }
+  static ObjInfo Info() { return ObjInfo::kRegression; }
 };

 }  // namespace obj
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -4,10 +4,10 @@
 * \brief Definition of single-value regression and classification objectives.
 * \author Tianqi Chen, Kailong Chen
 */
-
 #include <dmlc/omp.h>
 #include <xgboost/logging.h>
 #include <xgboost/objective.h>
+#include <xgboost/tree_model.h>

 #include <cmath>
 #include <memory>
@@ -19,12 +19,18 @@
 #include "../common/threading_utils.h"
 #include "../common/transform.h"
 #include "./regression_loss.h"
+#include "adaptive.h"
+#include "xgboost/base.h"
+#include "xgboost/data.h"
+#include "xgboost/generic_parameters.h"
 #include "xgboost/host_device_vector.h"
 #include "xgboost/json.h"
+#include "xgboost/linalg.h"
 #include "xgboost/parameter.h"
 #include "xgboost/span.h"

 #if defined(XGBOOST_USE_CUDA)
+#include "../common/device_helpers.cuh"
 #include "../common/linalg_op.cuh"
 #endif  // defined(XGBOOST_USE_CUDA)

@@ -67,9 +73,7 @@ class RegLossObj : public ObjFunction {
    param_.UpdateAllowUnknown(args);
  }

-  struct ObjInfo Task() const override {
-    return Loss::Info();
-  }
+  ObjInfo Task() const override { return Loss::Info(); }

  uint32_t Targets(MetaInfo const& info) const override {
    // Multi-target regression.
@@ -209,7 +213,7 @@ class PseudoHuberRegression : public ObjFunction {

 public:
  void Configure(Args const& args) override { param_.UpdateAllowUnknown(args); }
-  struct ObjInfo Task() const override { return {ObjInfo::kRegression, false}; }
+  ObjInfo Task() const override { return ObjInfo::kRegression; }
  uint32_t Targets(MetaInfo const& info) const override {
    return std::max(static_cast<size_t>(1), info.labels.Shape(1));
  }
@@ -286,9 +290,7 @@ class PoissonRegression : public ObjFunction {
    param_.UpdateAllowUnknown(args);
  }

-  struct ObjInfo Task() const override {
-    return {ObjInfo::kRegression, false};
-  }
+  ObjInfo Task() const override { return ObjInfo::kRegression; }

  void GetGradient(const HostDeviceVector<bst_float>& preds,
                   const MetaInfo &info, int,
@@ -378,12 +380,8 @@ XGBOOST_REGISTER_OBJECTIVE(PoissonRegression, "count:poisson")
 // cox regression for survival data (negative values mean they are censored)
 class CoxRegression : public ObjFunction {
 public:
-  void Configure(
-      const std::vector<std::pair<std::string, std::string> >&) override {}
-
-  struct ObjInfo Task() const override {
-    return {ObjInfo::kRegression, false};
-  }
+  void Configure(Args const&) override {}
+  ObjInfo Task() const override { return ObjInfo::kRegression; }

  void GetGradient(const HostDeviceVector<bst_float>& preds,
                   const MetaInfo &info, int,
@@ -479,12 +477,8 @@ XGBOOST_REGISTER_OBJECTIVE(CoxRegression, "survival:cox")
 // gamma regression
 class GammaRegression : public ObjFunction {
 public:
-  void Configure(
-      const std::vector<std::pair<std::string, std::string> >&) override {}
-
-  struct ObjInfo Task() const override {
-    return {ObjInfo::kRegression, false};
-  }
+  void Configure(Args const&) override {}
+  ObjInfo Task() const override { return ObjInfo::kRegression; }

  void GetGradient(const HostDeviceVector<bst_float> &preds,
                   const MetaInfo &info, int,
@@ -582,9 +576,7 @@ class TweedieRegression : public ObjFunction {
    metric_ = os.str();
  }

-  struct ObjInfo Task() const override {
-    return {ObjInfo::kRegression, false};
-  }
+  ObjInfo Task() const override { return ObjInfo::kRegression; }

  void GetGradient(const HostDeviceVector<bst_float>& preds,
                   const MetaInfo &info, int,
@@ -675,5 +667,65 @@ XGBOOST_REGISTER_OBJECTIVE(TweedieRegression, "reg:tweedie")
 .describe("Tweedie regression for insurance data.")
 .set_body([]() { return new TweedieRegression(); });

+class MeanAbsoluteError : public ObjFunction {
+ public:
+  void Configure(Args const&) override {}
+  ObjInfo Task() const override { return {ObjInfo::kRegression, true, true}; }
+
+  void GetGradient(HostDeviceVector<bst_float> const& preds, const MetaInfo& info, int iter,
+                   HostDeviceVector<GradientPair>* out_gpair) override {
+    CheckRegInputs(info, preds);
+    auto labels = info.labels.View(ctx_->gpu_id);
+
+    out_gpair->SetDevice(ctx_->gpu_id);
+    out_gpair->Resize(info.labels.Size());
+    auto gpair = linalg::MakeVec(out_gpair);
+
+    preds.SetDevice(ctx_->gpu_id);
+    auto predt = linalg::MakeVec(&preds);
+    info.weights_.SetDevice(ctx_->gpu_id);
+    common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()
+                                                 : info.weights_.ConstDeviceSpan()};
+
+    linalg::ElementWiseKernel(ctx_, labels, [=] XGBOOST_DEVICE(size_t i, float const y) mutable {
+      auto sign = [](auto x) {
+        return (x > static_cast<decltype(x)>(0)) - (x < static_cast<decltype(x)>(0));
+      };
+      auto sample_id = std::get<0>(linalg::UnravelIndex(i, labels.Shape()));
+      auto grad = sign(predt(i) - y) * weight[i];
+      auto hess = weight[sample_id];
+      gpair(i) = GradientPair{grad, hess};
+    });
+  }
+
+  void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,
+                      HostDeviceVector<float> const& prediction, RegTree* p_tree) const override {
+    if (ctx_->IsCPU()) {
+      auto const& h_position = position.ConstHostVector();
+      detail::UpdateTreeLeafHost(ctx_, h_position, info, prediction, 0.5, p_tree);
+    } else {
+#if defined(XGBOOST_USE_CUDA)
+      position.SetDevice(ctx_->gpu_id);
+      auto d_position = position.ConstDeviceSpan();
+      detail::UpdateTreeLeafDevice(ctx_, d_position, info, prediction, 0.5, p_tree);
+#else
+      common::AssertGPUSupport();
+#endif  //  defined(XGBOOST_USE_CUDA)
+    }
+  }
+
+  const char* DefaultEvalMetric() const override { return "mae"; }
+
+  void SaveConfig(Json* p_out) const override {
+    auto& out = *p_out;
+    out["name"] = String("reg:absoluteerror");
+  }
+
+  void LoadConfig(Json const& in) override {}
+};
+
+XGBOOST_REGISTER_OBJECTIVE(MeanAbsoluteError, "reg:absoluteerror")
+    .describe("Mean absoluate error.")
+    .set_body([]() { return new MeanAbsoluteError(); });
 }  // namespace obj
 }  // namespace xgboost