From 85c3334c2b97a41eb062696f4c0963b81c3cc3b8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 8 Mar 2023 13:15:39 +0800
Subject: [PATCH 1/9] Bump hadoop-common from 3.2.4 to 3.3.4 in /jvm-packages
 (#8882)

Bumps hadoop-common from 3.2.4 to 3.3.4.

---
updated-dependencies:
- dependency-name: org.apache.hadoop:hadoop-common
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 jvm-packages/xgboost4j-flink/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml
index 27925ff47..e48feb876 100644
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@@ -51,7 +51,7 @@
         <dependency>
             <groupId>org.apache.hadoop</groupId>
             <artifactId>hadoop-common</artifactId>
-            <version>3.2.4</version>
+            <version>3.3.4</version>
         </dependency>
     </dependencies>
 

From 8c16da8863a74dfa27381046a9849299364ab319 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Wed, 8 Mar 2023 19:00:10 +0800
Subject: [PATCH 2/9] [doc] Add note for rabit port. [skip ci] (#8879)

---
 doc/tutorials/dask.rst | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/tutorials/dask.rst b/doc/tutorials/dask.rst
index 87b2bf996..e35c0b24c 100644
--- a/doc/tutorials/dask.rst
+++ b/doc/tutorials/dask.rst
@@ -498,11 +498,15 @@ dask config is used:
     with Client(scheduler_file="sched.json") as client:
         reg = dxgb.DaskXGBRegressor()
 
-    # or we can specify the port too
+    # We can specify the port for XGBoost as well
     with dask.config.set({"xgboost.scheduler_address": "192.0.0.100:12345"}):
         reg = dxgb.DaskXGBRegressor()
 
 
+Please note that XGBoost requires a different port than dask. By default, on a unix-like
+system XGBoost uses the port 0 to find available ports, which may fail if a user is
+running in a restricted docker environment. In this case, please open additional ports in
+the container and specify it as in the above snippet.
 
 ************
 IPv6 Support

From e8a69013e6e0310b9d8808f61a30ff4e1ddecc5d Mon Sep 17 00:00:00 2001
From: Krzysztof Dyba <35004826+kadyb@users.noreply.github.com>
Date: Wed, 8 Mar 2023 22:58:39 +0100
Subject: [PATCH 3/9] [R] update `predict` docs (#8886)

---
 R-package/R/xgb.Booster.R            | 4 ++++
 R-package/man/predict.xgb.Booster.Rd | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
index 6fa07c27f..080067039 100644
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -214,6 +214,10 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #' Since it quadratically depends on the number of features, it is recommended to perform selection
 #' of the most important features first. See below about the format of the returned results.
 #'
+#' The \code{predict()} method uses as many threads as defined in \code{xgb.Booster} object (all by default).
+#' If you want to change their number, then assign a new number to \code{nthread} using \code{\link{xgb.parameters<-}}.
+#' Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple threads too.
+#'
 #' @return
 #' The return type is different depending whether \code{strict_shape} is set to \code{TRUE}.  By default,
 #' for regression or binary classification, it returns a vector of length \code{nrows(newdata)}.
diff --git a/R-package/man/predict.xgb.Booster.Rd b/R-package/man/predict.xgb.Booster.Rd
index 067cbf207..87f06d451 100644
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@@ -122,6 +122,10 @@ With \code{predinteraction = TRUE}, SHAP values of contributions of interaction
 are computed. Note that this operation might be rather expensive in terms of compute and memory.
 Since it quadratically depends on the number of features, it is recommended to perform selection
 of the most important features first. See below about the format of the returned results.
+
+The \code{predict()} method uses as many threads as defined in \code{xgb.Booster} object (all by default).
+If you want to change their number, then assign a new number to \code{nthread} using \code{\link{xgb.parameters<-}}.
+Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple threads too.
 }
 \examples{
 ## binary classification:

From 46dfcc7d22f553fcd57699e96b3486b029352b5c Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 9 Mar 2023 17:46:24 +0800
Subject: [PATCH 4/9] Define a new ranking parameter. (#8887)

---
 src/common/error_msg.h                 |  24 +++++
 src/common/ranking_utils.cc            |  44 ++++++--
 src/common/ranking_utils.h             | 140 +++++++++++++++++++++++--
 src/common/threading_utils.cuh         |  35 +++----
 src/metric/rank_metric.cc              |   2 +-
 tests/cpp/common/test_ranking_utils.cc |  57 +++++++---
 6 files changed, 249 insertions(+), 53 deletions(-)
 create mode 100644 src/common/error_msg.h

diff --git a/src/common/error_msg.h b/src/common/error_msg.h
new file mode 100644
index 000000000..48a2c92a4
--- /dev/null
+++ b/src/common/error_msg.h
@@ -0,0 +1,24 @@
+/**
+ * Copyright 2023 by XGBoost contributors
+ *
+ * \brief Common error message for various checks.
+ */
+#ifndef XGBOOST_COMMON_ERROR_MSG_H_
+#define XGBOOST_COMMON_ERROR_MSG_H_
+
+#include "xgboost/string_view.h"  // for StringView
+
+namespace xgboost::error {
+constexpr StringView GroupWeight() {
+  return "Size of weight must equal to the number of query groups when ranking group is used.";
+}
+
+constexpr StringView GroupSize() {
+  return "Invalid query group structure. The number of rows obtained from group doesn't equal to ";
+}
+
+constexpr StringView LabelScoreSize() {
+  return "The size of label doesn't match the size of prediction.";
+}
+}  // namespace xgboost::error
+#endif  // XGBOOST_COMMON_ERROR_MSG_H_
diff --git a/src/common/ranking_utils.cc b/src/common/ranking_utils.cc
index f0b1c1a5e..8fad9a206 100644
--- a/src/common/ranking_utils.cc
+++ b/src/common/ranking_utils.cc
@@ -3,15 +3,28 @@
  */
 #include "ranking_utils.h"
 
-#include <cstdint>                // std::uint32_t
-#include <sstream>                // std::ostringstream
-#include <string>                 // std::string,std::sscanf
+#include <algorithm>          // for copy_n, max, min, none_of, all_of
+#include <cstddef>            // for size_t
+#include <cstdio>             // for sscanf
+#include <exception>          // for exception
+#include <functional>         // for greater
+#include <iterator>           // for reverse_iterator
+#include <string>             // for char_traits, string
 
-#include "xgboost/string_view.h"  // StringView
+#include "algorithm.h"        // for ArgSort
+#include "linalg_op.h"        // for cbegin, cend
+#include "optional_weight.h"  // for MakeOptionalWeights
+#include "threading_utils.h"  // for ParallelFor
+#include "xgboost/base.h"     // for bst_group_t
+#include "xgboost/context.h"  // for Context
+#include "xgboost/data.h"     // for MetaInfo
+#include "xgboost/linalg.h"   // for All, TensorView, Range, Tensor, Vector
+#include "xgboost/logging.h"  // for Error, LogCheck_EQ, CHECK_EQ
 
-namespace xgboost {
-namespace ltr {
-std::string MakeMetricName(StringView name, StringView param, std::uint32_t* topn, bool* minus) {
+namespace xgboost::ltr {
+DMLC_REGISTER_PARAMETER(LambdaRankParam);
+
+std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
   std::string out_name;
   if (!param.empty()) {
     std::ostringstream os;
@@ -30,5 +43,18 @@ std::string MakeMetricName(StringView name, StringView param, std::uint32_t* top
   }
   return out_name;
 }
-}  // namespace ltr
-}  // namespace xgboost
+
+std::string MakeMetricName(StringView name, position_t topn, bool minus) {
+  std::ostringstream ss;
+  if (topn == LambdaRankParam::NotSet()) {
+    ss << name;
+  } else {
+    ss << name << "@" << topn;
+  }
+  if (minus) {
+    ss << "-";
+  }
+  std::string out_name = ss.str();
+  return out_name;
+}
+}  // namespace xgboost::ltr
diff --git a/src/common/ranking_utils.h b/src/common/ranking_utils.h
index 35ee36c21..631de4d70 100644
--- a/src/common/ranking_utils.h
+++ b/src/common/ranking_utils.h
@@ -3,17 +3,131 @@
  */
 #ifndef XGBOOST_COMMON_RANKING_UTILS_H_
 #define XGBOOST_COMMON_RANKING_UTILS_H_
+#include <algorithm>                     // for min
+#include <cmath>                         // for log2, fabs, floor
+#include <cstddef>                       // for size_t
+#include <cstdint>                       // for uint32_t, uint8_t, int32_t
+#include <limits>                        // for numeric_limits
+#include <string>                        // for char_traits, string
+#include <vector>                        // for vector
 
-#include <cstddef>                // std::size_t
-#include <cstdint>                // std::uint32_t
-#include <string>                 // std::string
+#include "./math.h"                      // for CloseTo
+#include "dmlc/parameter.h"              // for FieldEntry, DMLC_DECLARE_FIELD
+#include "error_msg.h"                   // for GroupWeight, GroupSize
+#include "xgboost/base.h"                // for XGBOOST_DEVICE, bst_group_t
+#include "xgboost/context.h"             // for Context
+#include "xgboost/data.h"                // for MetaInfo
+#include "xgboost/host_device_vector.h"  // for HostDeviceVector
+#include "xgboost/linalg.h"              // for Vector, VectorView, Tensor
+#include "xgboost/logging.h"             // for LogCheck_EQ, CHECK_EQ, CHECK
+#include "xgboost/parameter.h"           // for XGBoostParameter
+#include "xgboost/span.h"                // for Span
+#include "xgboost/string_view.h"         // for StringView
 
-#include "xgboost/string_view.h"  // StringView
-
-namespace xgboost {
-namespace ltr {
+namespace xgboost::ltr {
 /**
- * \brief Construct name for ranking metric given parameters.
+ * \brief Relevance degree
+ */
+using rel_degree_t = std::uint32_t;  // NOLINT
+/**
+ * \brief top-k position
+ */
+using position_t = std::uint32_t;  // NOLINT
+
+enum class PairMethod : std::int32_t {
+  kTopK = 0,
+  kMean = 1,
+};
+}  // namespace xgboost::ltr
+
+DECLARE_FIELD_ENUM_CLASS(xgboost::ltr::PairMethod);
+
+namespace xgboost::ltr {
+struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
+ private:
+  static constexpr position_t DefaultK() { return 32; }
+  static constexpr position_t DefaultSamplePairs() { return 1; }
+
+ protected:
+  // pairs
+  // should be accessed by getter for auto configuration.
+  // nolint so that we can keep the string name.
+  PairMethod lambdarank_pair_method{PairMethod::kMean};  // NOLINT
+  std::size_t lambdarank_num_pair_per_sample{NotSet()};  // NOLINT
+
+ public:
+  static constexpr position_t NotSet() { return std::numeric_limits<position_t>::max(); }
+
+  // unbiased
+  bool lambdarank_unbiased{false};
+  double lambdarank_bias_norm{2.0};
+  // ndcg
+  bool ndcg_exp_gain{true};
+
+  bool operator==(LambdaRankParam const& that) const {
+    return lambdarank_pair_method == that.lambdarank_pair_method &&
+           lambdarank_num_pair_per_sample == that.lambdarank_num_pair_per_sample &&
+           lambdarank_unbiased == that.lambdarank_unbiased &&
+           lambdarank_bias_norm == that.lambdarank_bias_norm && ndcg_exp_gain == that.ndcg_exp_gain;
+  }
+  bool operator!=(LambdaRankParam const& that) const { return !(*this == that); }
+
+  [[nodiscard]] double Regularizer() const { return 1.0 / (1.0 + this->lambdarank_bias_norm); }
+
+  /**
+   * \brief Get number of pairs for each sample
+   */
+  [[nodiscard]] position_t NumPair() const {
+    if (lambdarank_num_pair_per_sample == NotSet()) {
+      switch (lambdarank_pair_method) {
+        case PairMethod::kMean:
+          return DefaultSamplePairs();
+        case PairMethod::kTopK:
+          return DefaultK();
+      }
+    } else {
+      return lambdarank_num_pair_per_sample;
+    }
+    LOG(FATAL) << "Unreachable.";
+    return 0;
+  }
+
+  [[nodiscard]] bool HasTruncation() const { return lambdarank_pair_method == PairMethod::kTopK; }
+
+  // Used for evaluation metric and cache initialization, iterate through top-k or the whole list
+  [[nodiscard]] auto TopK() const {
+    if (HasTruncation()) {
+      return NumPair();
+    } else {
+      return NotSet();
+    }
+  }
+
+  DMLC_DECLARE_PARAMETER(LambdaRankParam) {
+    DMLC_DECLARE_FIELD(lambdarank_pair_method)
+        .set_default(PairMethod::kMean)
+        .add_enum("mean", PairMethod::kMean)
+        .add_enum("topk", PairMethod::kTopK)
+        .describe("Method for constructing pairs.");
+    DMLC_DECLARE_FIELD(lambdarank_num_pair_per_sample)
+        .set_default(NotSet())
+        .set_lower_bound(1)
+        .describe("Number of pairs for each sample in the list.");
+    DMLC_DECLARE_FIELD(lambdarank_unbiased)
+        .set_default(false)
+        .describe("Unbiased lambda mart. Use IPW to debias click position");
+    DMLC_DECLARE_FIELD(lambdarank_bias_norm)
+        .set_default(2.0)
+        .set_lower_bound(0.0)
+        .describe("Lp regularization for unbiased lambdarank.");
+    DMLC_DECLARE_FIELD(ndcg_exp_gain)
+        .set_default(true)
+        .describe("When set to true, the label gain is 2^rel - 1, otherwise it's rel.");
+  }
+};
+
+/**
+ * \brief Parse name for ranking metric given parameters.
  *
  * \param [in] name   Null terminated string for metric name
  * \param [in] param  Null terminated string for parameter like the `3-` in `ndcg@3-`.
@@ -23,7 +137,11 @@ namespace ltr {
  *
  * \return The name of the metric.
  */
-std::string MakeMetricName(StringView name, StringView param, std::uint32_t* topn, bool* minus);
-}  // namespace ltr
-}  // namespace xgboost
+std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus);
+
+/**
+ * \brief Parse name for ranking metric given parameters.
+ */
+std::string MakeMetricName(StringView name, position_t topn, bool minus);
+}  // namespace xgboost::ltr
 #endif  // XGBOOST_COMMON_RANKING_UTILS_H_
diff --git a/src/common/threading_utils.cuh b/src/common/threading_utils.cuh
index c21d312d2..db5fe82f9 100644
--- a/src/common/threading_utils.cuh
+++ b/src/common/threading_utils.cuh
@@ -43,36 +43,33 @@ XGBOOST_DEVICE inline std::size_t DiscreteTrapezoidArea(std::size_t n, std::size
  * with h <= n
  */
 template <typename U>
-inline size_t
-SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
-                          xgboost::common::Span<size_t> out_group_threads_ptr,
-                          size_t h) {
+std::size_t SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
+                                      xgboost::common::Span<std::size_t> out_group_threads_ptr,
+                                      std::size_t h) {
   CHECK_GE(group_ptr.size(), 1);
   CHECK_EQ(group_ptr.size(), out_group_threads_ptr.size());
-  dh::LaunchN(
-      group_ptr.size(), [=] XGBOOST_DEVICE(size_t idx) {
-        if (idx == 0) {
-          out_group_threads_ptr[0] = 0;
-          return;
-        }
+  dh::LaunchN(group_ptr.size(), [=] XGBOOST_DEVICE(std::size_t idx) {
+    if (idx == 0) {
+      out_group_threads_ptr[0] = 0;
+      return;
+    }
 
-        size_t cnt = static_cast<size_t>(group_ptr[idx] - group_ptr[idx - 1]);
-        out_group_threads_ptr[idx] = DiscreteTrapezoidArea(cnt, h);
-      });
+    std::size_t cnt = static_cast<std::size_t>(group_ptr[idx] - group_ptr[idx - 1]);
+    out_group_threads_ptr[idx] = DiscreteTrapezoidArea(cnt, h);
+  });
   dh::InclusiveSum(out_group_threads_ptr.data(), out_group_threads_ptr.data(),
                    out_group_threads_ptr.size());
-  size_t total = 0;
-  dh::safe_cuda(cudaMemcpy(
-      &total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
-      sizeof(total), cudaMemcpyDeviceToHost));
+  std::size_t total = 0;
+  dh::safe_cuda(cudaMemcpy(&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
+                           sizeof(total), cudaMemcpyDeviceToHost));
   return total;
 }
 
 /**
  * Called inside kernel to obtain coordinate from trapezoid grid.
  */
-XGBOOST_DEVICE inline void UnravelTrapeziodIdx(size_t i_idx, size_t n,
-                                               size_t *out_i, size_t *out_j) {
+XGBOOST_DEVICE inline void UnravelTrapeziodIdx(std::size_t i_idx, std::size_t n, std::size_t *out_i,
+                                               std::size_t *out_j) {
   auto &i = *out_i;
   auto &j = *out_j;
   double idx = static_cast<double>(i_idx);
diff --git a/src/metric/rank_metric.cc b/src/metric/rank_metric.cc
index d39c7302a..69e6e24cd 100644
--- a/src/metric/rank_metric.cc
+++ b/src/metric/rank_metric.cc
@@ -234,7 +234,7 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig {
 
  protected:
   explicit EvalRank(const char* name, const char* param) {
-    this->name = ltr::MakeMetricName(name, param, &topn, &minus);
+    this->name = ltr::ParseMetricName(name, param, &topn, &minus);
   }
 
   virtual double EvalGroup(PredIndPairContainer *recptr) const = 0;
diff --git a/tests/cpp/common/test_ranking_utils.cc b/tests/cpp/common/test_ranking_utils.cc
index ea72edd9f..c73cffed7 100644
--- a/tests/cpp/common/test_ranking_utils.cc
+++ b/tests/cpp/common/test_ranking_utils.cc
@@ -1,38 +1,69 @@
 /**
  * Copyright 2023 by XGBoost Contributors
  */
-#include <gtest/gtest.h>
+#include <gtest/gtest.h>                        // for Test, AssertionResult, Message, TestPartR...
+#include <gtest/gtest.h>                        // for ASSERT_NEAR, ASSERT_T...
+#include <xgboost/base.h>                       // for Args
+#include <xgboost/context.h>                    // for Context
+#include <xgboost/string_view.h>                // for StringView
 
-#include <cstdint>  // std::uint32_t
+#include <cstdint>                              // for uint32_t
+#include <utility>                              // for pair
 
-#include "../../../src/common/ranking_utils.h"
+#include "../../../src/common/ranking_utils.h"  // for LambdaRankParam, ParseMetricName, MakeMet...
 
-namespace xgboost {
-namespace ltr {
-TEST(RankingUtils, MakeMetricName) {
+namespace xgboost::ltr {
+TEST(RankingUtils, LambdaRankParam) {
+  // make sure no memory is shared in dmlc parameter.
+  LambdaRankParam p0;
+  p0.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "3"}});
+  ASSERT_EQ(p0.NumPair(), 3);
+
+  LambdaRankParam p1;
+  p1.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "8"}});
+
+  ASSERT_EQ(p0.NumPair(), 3);
+  ASSERT_EQ(p1.NumPair(), 8);
+
+  p0.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "17"}});
+  ASSERT_EQ(p0.NumPair(), 17);
+  ASSERT_EQ(p1.NumPair(), 8);
+}
+
+TEST(RankingUtils, ParseMetricName) {
   std::uint32_t topn{32};
   bool minus{false};
-  auto name = MakeMetricName("ndcg", "3-", &topn, &minus);
+  auto name = ParseMetricName("ndcg", "3-", &topn, &minus);
   ASSERT_EQ(name, "ndcg@3-");
   ASSERT_EQ(topn, 3);
   ASSERT_TRUE(minus);
 
-  name = MakeMetricName("ndcg", "6", &topn, &minus);
+  name = ParseMetricName("ndcg", "6", &topn, &minus);
   ASSERT_EQ(topn, 6);
   ASSERT_TRUE(minus);  // unchanged
 
   minus = false;
-  name = MakeMetricName("ndcg", "-", &topn, &minus);
+  name = ParseMetricName("ndcg", "-", &topn, &minus);
   ASSERT_EQ(topn, 6);  // unchanged
   ASSERT_TRUE(minus);
 
-  name = MakeMetricName("ndcg", nullptr, &topn, &minus);
+  name = ParseMetricName("ndcg", nullptr, &topn, &minus);
   ASSERT_EQ(topn, 6);  // unchanged
   ASSERT_TRUE(minus);  // unchanged
 
-  name = MakeMetricName("ndcg", StringView{}, &topn, &minus);
+  name = ParseMetricName("ndcg", StringView{}, &topn, &minus);
   ASSERT_EQ(topn, 6);  // unchanged
   ASSERT_TRUE(minus);  // unchanged
 }
-}  // namespace ltr
-}  // namespace xgboost
+
+TEST(RankingUtils, MakeMetricName) {
+  auto name = MakeMetricName("map", LambdaRankParam::NotSet(), true);
+  ASSERT_EQ(name, "map-");
+  name = MakeMetricName("map", LambdaRankParam::NotSet(), false);
+  ASSERT_EQ(name, "map");
+  name = MakeMetricName("map", 2, true);
+  ASSERT_EQ(name, "map@2-");
+  name = MakeMetricName("map", 2, false);
+  ASSERT_EQ(name, "map@2");
+}
+}  // namespace xgboost::ltr

From 5feee8d4a9db3265d922837ab4141fd1a8714b5a Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 9 Mar 2023 19:03:06 +0800
Subject: [PATCH 5/9] Define core multi-target regression tree structure.
 (#8884)

- Define a new tree struct embedded in the `RegTree`.
- Provide dispatching functions in `RegTree`.
- Fix some c++-17 warnings about the use of nodiscard (currently we disable the warning on
  the CI).
- Use uint32_t instead of size_t for `bst_target_t` as it has a defined size and can be used
  as part of dmlc parameter.
- Hide the `Segment` struct inside the categorical split matrix.
---
 R-package/src/Makevars.in                     |   1 +
 R-package/src/Makevars.win                    |   1 +
 include/xgboost/base.h                        |   8 +-
 include/xgboost/multi_target_tree_model.h     |  96 ++++++
 include/xgboost/tree_model.h                  | 304 +++++++++++++-----
 src/common/host_device_vector.cc              |   5 +-
 src/common/host_device_vector.cu              |   7 +-
 src/predictor/gpu_predictor.cu                |  32 +-
 src/tree/fit_stump.cc                         |   5 +-
 src/tree/hist/histogram.h                     |  18 +-
 src/tree/io_utils.h                           |  65 ++++
 src/tree/multi_target_tree_model.cc           | 220 +++++++++++++
 src/tree/tree_model.cc                        | 248 +++++++-------
 src/tree/updater_gpu_hist.cu                  |  13 +-
 .../cpp/tree/test_multi_target_tree_model.cc  |  48 +++
 tests/cpp/tree/test_tree_model.cc             |   2 +-
 16 files changed, 809 insertions(+), 264 deletions(-)
 create mode 100644 include/xgboost/multi_target_tree_model.h
 create mode 100644 src/tree/io_utils.h
 create mode 100644 src/tree/multi_target_tree_model.cc
 create mode 100644 tests/cpp/tree/test_multi_target_tree_model.cc

diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in
index ed3f10571..743bf0a66 100644
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@@ -61,6 +61,7 @@ OBJECTS= \
     $(PKGROOT)/src/tree/fit_stump.o \
     $(PKGROOT)/src/tree/tree_model.o \
     $(PKGROOT)/src/tree/tree_updater.o \
+    $(PKGROOT)/src/tree/multi_target_tree_model.o \
     $(PKGROOT)/src/tree/updater_approx.o \
     $(PKGROOT)/src/tree/updater_colmaker.o \
     $(PKGROOT)/src/tree/updater_prune.o \
diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win
index 024ba1aa1..a32d2fd2e 100644
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@@ -60,6 +60,7 @@ OBJECTS= \
     $(PKGROOT)/src/tree/param.o \
     $(PKGROOT)/src/tree/fit_stump.o \
     $(PKGROOT)/src/tree/tree_model.o \
+    $(PKGROOT)/src/tree/multi_target_tree_model.o \
     $(PKGROOT)/src/tree/tree_updater.o \
     $(PKGROOT)/src/tree/updater_approx.o \
     $(PKGROOT)/src/tree/updater_colmaker.o \
diff --git a/include/xgboost/base.h b/include/xgboost/base.h
index d12e71a3a..00fc7fb4a 100644
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@@ -110,11 +110,11 @@ using bst_bin_t = int32_t;  // NOLINT
  */
 using bst_row_t = std::size_t;   // NOLINT
 /*! \brief Type for tree node index. */
-using bst_node_t = int32_t;      // NOLINT
+using bst_node_t = std::int32_t;      // NOLINT
 /*! \brief Type for ranking group index. */
-using bst_group_t = uint32_t;    // NOLINT
-/*! \brief Type for indexing target variables. */
-using bst_target_t = std::size_t;  // NOLINT
+using bst_group_t = std::uint32_t;      // NOLINT
+/*! \brief Type for indexing into output targets. */
+using bst_target_t = std::uint32_t;  // NOLINT
 
 namespace detail {
 /*! \brief Implementation of gradient statistics pair. Template specialisation
diff --git a/include/xgboost/multi_target_tree_model.h b/include/xgboost/multi_target_tree_model.h
new file mode 100644
index 000000000..1ad7d6bf6
--- /dev/null
+++ b/include/xgboost/multi_target_tree_model.h
@@ -0,0 +1,96 @@
+/**
+ * Copyright 2023 by XGBoost contributors
+ *
+ * \brief Core data structure for multi-target trees.
+ */
+#ifndef XGBOOST_MULTI_TARGET_TREE_MODEL_H_
+#define XGBOOST_MULTI_TARGET_TREE_MODEL_H_
+#include <xgboost/base.h>     // for bst_node_t, bst_target_t, bst_feature_t
+#include <xgboost/context.h>  // for Context
+#include <xgboost/linalg.h>   // for VectorView
+#include <xgboost/model.h>    // for Model
+#include <xgboost/span.h>     // for Span
+
+#include <cinttypes>          // for uint8_t
+#include <cstddef>            // for size_t
+#include <vector>             // for vector
+
+namespace xgboost {
+struct TreeParam;
+/**
+ * \brief Tree structure for multi-target model.
+ */
+class MultiTargetTree : public Model {
+ public:
+  static bst_node_t constexpr InvalidNodeId() { return -1; }
+
+ private:
+  TreeParam const* param_;
+  std::vector<bst_node_t> left_;
+  std::vector<bst_node_t> right_;
+  std::vector<bst_node_t> parent_;
+  std::vector<bst_feature_t> split_index_;
+  std::vector<std::uint8_t> default_left_;
+  std::vector<float> split_conds_;
+  std::vector<float> weights_;
+
+  [[nodiscard]] linalg::VectorView<float const> NodeWeight(bst_node_t nidx) const {
+    auto beg = nidx * this->NumTarget();
+    auto v = common::Span<float const>{weights_}.subspan(beg, this->NumTarget());
+    return linalg::MakeTensorView(Context::kCpuId, v, v.size());
+  }
+  [[nodiscard]] linalg::VectorView<float> NodeWeight(bst_node_t nidx) {
+    auto beg = nidx * this->NumTarget();
+    auto v = common::Span<float>{weights_}.subspan(beg, this->NumTarget());
+    return linalg::MakeTensorView(Context::kCpuId, v, v.size());
+  }
+
+ public:
+  explicit MultiTargetTree(TreeParam const* param);
+  /**
+   * \brief Set the weight for a leaf.
+   */
+  void SetLeaf(bst_node_t nidx, linalg::VectorView<float const> weight);
+  /**
+   * \brief Expand a leaf into split node.
+   */
+  void Expand(bst_node_t nidx, bst_feature_t split_idx, float split_cond, bool default_left,
+              linalg::VectorView<float const> base_weight,
+              linalg::VectorView<float const> left_weight,
+              linalg::VectorView<float const> right_weight);
+
+  [[nodiscard]] bool IsLeaf(bst_node_t nidx) const { return left_[nidx] == InvalidNodeId(); }
+  [[nodiscard]] bst_node_t Parent(bst_node_t nidx) const { return parent_.at(nidx); }
+  [[nodiscard]] bst_node_t LeftChild(bst_node_t nidx) const { return left_.at(nidx); }
+  [[nodiscard]] bst_node_t RightChild(bst_node_t nidx) const { return right_.at(nidx); }
+
+  [[nodiscard]] bst_feature_t SplitIndex(bst_node_t nidx) const { return split_index_[nidx]; }
+  [[nodiscard]] float SplitCond(bst_node_t nidx) const { return split_conds_[nidx]; }
+  [[nodiscard]] bool DefaultLeft(bst_node_t nidx) const { return default_left_[nidx]; }
+  [[nodiscard]] bst_node_t DefaultChild(bst_node_t nidx) const {
+    return this->DefaultLeft(nidx) ? this->LeftChild(nidx) : this->RightChild(nidx);
+  }
+
+  [[nodiscard]] bst_target_t NumTarget() const;
+
+  [[nodiscard]] std::size_t Size() const;
+
+  [[nodiscard]] bst_node_t Depth(bst_node_t nidx) const {
+    bst_node_t depth{0};
+    while (Parent(nidx) != InvalidNodeId()) {
+      ++depth;
+      nidx = Parent(nidx);
+    }
+    return depth;
+  }
+
+  [[nodiscard]] linalg::VectorView<float const> LeafValue(bst_node_t nidx) const {
+    CHECK(IsLeaf(nidx));
+    return this->NodeWeight(nidx);
+  }
+
+  void LoadModel(Json const& in) override;
+  void SaveModel(Json* out) const override;
+};
+}  // namespace xgboost
+#endif  // XGBOOST_MULTI_TARGET_TREE_MODEL_H_
diff --git a/include/xgboost/tree_model.h b/include/xgboost/tree_model.h
index 70c71cac1..f646140dc 100644
--- a/include/xgboost/tree_model.h
+++ b/include/xgboost/tree_model.h
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2014-2022 by Contributors
+/**
+ * Copyright 2014-2023 by Contributors
  * \file tree_model.h
  * \brief model structure for tree
  * \author Tianqi Chen
@@ -9,60 +9,57 @@
 
 #include <dmlc/io.h>
 #include <dmlc/parameter.h>
-
 #include <xgboost/base.h>
 #include <xgboost/data.h>
-#include <xgboost/logging.h>
 #include <xgboost/feature_map.h>
+#include <xgboost/linalg.h>  // for VectorView
+#include <xgboost/logging.h>
 #include <xgboost/model.h>
+#include <xgboost/multi_target_tree_model.h>  // for MultiTargetTree
 
-#include <limits>
-#include <vector>
-#include <string>
-#include <cstring>
 #include <algorithm>
-#include <tuple>
+#include <cstring>
+#include <limits>
+#include <memory>  // for make_unique
 #include <stack>
+#include <string>
+#include <tuple>
+#include <vector>
 
 namespace xgboost {
-
-struct PathElement;  // forward declaration
-
 class Json;
+
 // FIXME(trivialfis): Once binary IO is gone, make this parameter internal as it should
 // not be configured by users.
 /*! \brief meta parameters of the tree */
 struct TreeParam : public dmlc::Parameter<TreeParam> {
   /*! \brief (Deprecated) number of start root */
-  int deprecated_num_roots;
+  int deprecated_num_roots{1};
   /*! \brief total number of nodes */
-  int num_nodes;
+  int num_nodes{1};
   /*!\brief number of deleted nodes */
-  int num_deleted;
+  int num_deleted{0};
   /*! \brief maximum depth, this is a statistics of the tree */
-  int deprecated_max_depth;
+  int deprecated_max_depth{0};
   /*! \brief number of features used for tree construction */
-  bst_feature_t num_feature;
+  bst_feature_t num_feature{0};
   /*!
    * \brief leaf vector size, used for vector tree
    * used to store more than one dimensional information in tree
    */
-  int size_leaf_vector;
+  bst_target_t size_leaf_vector{1};
   /*! \brief reserved part, make sure alignment works for 64bit */
   int reserved[31];
   /*! \brief constructor */
   TreeParam() {
     // assert compact alignment
-    static_assert(sizeof(TreeParam) == (31 + 6) * sizeof(int),
-                  "TreeParam: 64 bit align");
-    std::memset(this, 0, sizeof(TreeParam));
-    num_nodes = 1;
-    deprecated_num_roots = 1;
+    static_assert(sizeof(TreeParam) == (31 + 6) * sizeof(int), "TreeParam: 64 bit align");
+    std::memset(reserved, 0, sizeof(reserved));
   }
 
   // Swap byte order for all fields. Useful for transporting models between machines with different
   // endianness (big endian vs little endian)
-  inline TreeParam ByteSwap() const {
+  [[nodiscard]] TreeParam ByteSwap() const {
     TreeParam x = *this;
     dmlc::ByteSwap(&x.deprecated_num_roots, sizeof(x.deprecated_num_roots), 1);
     dmlc::ByteSwap(&x.num_nodes, sizeof(x.num_nodes), 1);
@@ -80,17 +77,18 @@ struct TreeParam : public dmlc::Parameter<TreeParam> {
     // other arguments are set by the algorithm.
     DMLC_DECLARE_FIELD(num_nodes).set_lower_bound(1).set_default(1);
     DMLC_DECLARE_FIELD(num_feature)
+        .set_default(0)
         .describe("Number of features used in tree construction.");
-    DMLC_DECLARE_FIELD(num_deleted);
-    DMLC_DECLARE_FIELD(size_leaf_vector).set_lower_bound(0).set_default(0)
+    DMLC_DECLARE_FIELD(num_deleted).set_default(0);
+    DMLC_DECLARE_FIELD(size_leaf_vector)
+        .set_lower_bound(0)
+        .set_default(1)
         .describe("Size of leaf vector, reserved for vector tree");
   }
 
   bool operator==(const TreeParam& b) const {
-    return num_nodes == b.num_nodes &&
-           num_deleted == b.num_deleted &&
-           num_feature == b.num_feature &&
-           size_leaf_vector == b.size_leaf_vector;
+    return num_nodes == b.num_nodes && num_deleted == b.num_deleted &&
+           num_feature == b.num_feature && size_leaf_vector == b.size_leaf_vector;
   }
 };
 
@@ -114,7 +112,7 @@ struct RTreeNodeStat {
   }
   // Swap byte order for all fields. Useful for transporting models between machines with different
   // endianness (big endian vs little endian)
-  inline RTreeNodeStat ByteSwap() const {
+  [[nodiscard]] RTreeNodeStat ByteSwap() const {
     RTreeNodeStat x = *this;
     dmlc::ByteSwap(&x.loss_chg, sizeof(x.loss_chg), 1);
     dmlc::ByteSwap(&x.sum_hess, sizeof(x.sum_hess), 1);
@@ -124,16 +122,45 @@ struct RTreeNodeStat {
   }
 };
 
-/*!
+/**
+ * \brief Helper for defining copyable data structure that contains unique pointers.
+ */
+template <typename T>
+class CopyUniquePtr {
+  std::unique_ptr<T> ptr_{nullptr};
+
+ public:
+  CopyUniquePtr() = default;
+  CopyUniquePtr(CopyUniquePtr const& that) {
+    ptr_.reset(nullptr);
+    if (that.ptr_) {
+      ptr_ = std::make_unique<T>(*that);
+    }
+  }
+  T* get() const noexcept { return ptr_.get(); }  // NOLINT
+
+  T& operator*() { return *ptr_; }
+  T* operator->() noexcept { return this->get(); }
+
+  T const& operator*() const { return *ptr_; }
+  T const* operator->() const noexcept { return this->get(); }
+
+  explicit operator bool() const { return static_cast<bool>(ptr_); }
+  bool operator!() const { return !ptr_; }
+  void reset(T* ptr) { ptr_.reset(ptr); }  // NOLINT
+};
+
+/**
  * \brief define regression tree to be the most common tree model.
+ *
  *  This is the data structure used in xgboost's major tree models.
  */
 class RegTree : public Model {
  public:
   using SplitCondT = bst_float;
-  static constexpr bst_node_t kInvalidNodeId {-1};
+  static constexpr bst_node_t kInvalidNodeId{MultiTargetTree::InvalidNodeId()};
   static constexpr uint32_t kDeletedNodeMarker = std::numeric_limits<uint32_t>::max();
-  static constexpr bst_node_t kRoot { 0 };
+  static constexpr bst_node_t kRoot{0};
 
   /*! \brief tree node */
   class Node {
@@ -151,51 +178,51 @@ class RegTree : public Model {
     }
 
     /*! \brief index of left child */
-    XGBOOST_DEVICE int LeftChild() const {
+    XGBOOST_DEVICE [[nodiscard]] int LeftChild() const {
       return this->cleft_;
     }
     /*! \brief index of right child */
-    XGBOOST_DEVICE int RightChild() const {
+    XGBOOST_DEVICE [[nodiscard]] int RightChild() const {
       return this->cright_;
     }
     /*! \brief index of default child when feature is missing */
-    XGBOOST_DEVICE int DefaultChild() const {
+    XGBOOST_DEVICE [[nodiscard]] int DefaultChild() const {
       return this->DefaultLeft() ? this->LeftChild() : this->RightChild();
     }
     /*! \brief feature index of split condition */
-    XGBOOST_DEVICE unsigned SplitIndex() const {
+    XGBOOST_DEVICE [[nodiscard]] unsigned SplitIndex() const {
       return sindex_ & ((1U << 31) - 1U);
     }
     /*! \brief when feature is unknown, whether goes to left child */
-    XGBOOST_DEVICE bool DefaultLeft() const {
+    XGBOOST_DEVICE [[nodiscard]] bool DefaultLeft() const {
       return (sindex_ >> 31) != 0;
     }
     /*! \brief whether current node is leaf node */
-    XGBOOST_DEVICE bool IsLeaf() const {
+    XGBOOST_DEVICE [[nodiscard]] bool IsLeaf() const {
       return cleft_ == kInvalidNodeId;
     }
     /*! \return get leaf value of leaf node */
-    XGBOOST_DEVICE bst_float LeafValue() const {
+    XGBOOST_DEVICE [[nodiscard]] float LeafValue() const {
       return (this->info_).leaf_value;
     }
     /*! \return get split condition of the node */
-    XGBOOST_DEVICE SplitCondT SplitCond() const {
+    XGBOOST_DEVICE [[nodiscard]] SplitCondT SplitCond() const {
       return (this->info_).split_cond;
     }
     /*! \brief get parent of the node */
-    XGBOOST_DEVICE int Parent() const {
+    XGBOOST_DEVICE [[nodiscard]] int Parent() const {
       return parent_ & ((1U << 31) - 1);
     }
     /*! \brief whether current node is left child */
-    XGBOOST_DEVICE bool IsLeftChild() const {
+    XGBOOST_DEVICE [[nodiscard]] bool IsLeftChild() const {
       return (parent_ & (1U << 31)) != 0;
     }
     /*! \brief whether this node is deleted */
-    XGBOOST_DEVICE bool IsDeleted() const {
+    XGBOOST_DEVICE [[nodiscard]] bool IsDeleted() const {
       return sindex_ == kDeletedNodeMarker;
     }
     /*! \brief whether current node is root */
-    XGBOOST_DEVICE bool IsRoot() const { return parent_ == kInvalidNodeId; }
+    XGBOOST_DEVICE [[nodiscard]] bool IsRoot() const { return parent_ == kInvalidNodeId; }
     /*!
      * \brief set the left child
      * \param nid node id to right child
@@ -252,7 +279,7 @@ class RegTree : public Model {
              info_.leaf_value == b.info_.leaf_value;
     }
 
-    inline Node ByteSwap() const {
+    [[nodiscard]] Node ByteSwap() const {
       Node x = *this;
       dmlc::ByteSwap(&x.parent_, sizeof(x.parent_), 1);
       dmlc::ByteSwap(&x.cleft_, sizeof(x.cleft_), 1);
@@ -312,19 +339,28 @@ class RegTree : public Model {
 
   /*! \brief model parameter */
   TreeParam param;
-  /*! \brief constructor */
   RegTree() {
-    param.num_nodes = 1;
-    param.num_deleted = 0;
+    param.Init(Args{});
     nodes_.resize(param.num_nodes);
     stats_.resize(param.num_nodes);
     split_types_.resize(param.num_nodes, FeatureType::kNumerical);
     split_categories_segments_.resize(param.num_nodes);
-    for (int i = 0; i < param.num_nodes; i ++) {
+    for (int i = 0; i < param.num_nodes; i++) {
       nodes_[i].SetLeaf(0.0f);
       nodes_[i].SetParent(kInvalidNodeId);
     }
   }
+  /**
+   * \brief Constructor that initializes the tree model with shape.
+   */
+  explicit RegTree(bst_target_t n_targets, bst_feature_t n_features) : RegTree{} {
+    param.num_feature = n_features;
+    param.size_leaf_vector = n_targets;
+    if (n_targets > 1) {
+      this->p_mt_tree_.reset(new MultiTargetTree{&param});
+    }
+  }
+
   /*! \brief get node given nid */
   Node& operator[](int nid) {
     return nodes_[nid];
@@ -335,17 +371,17 @@ class RegTree : public Model {
   }
 
   /*! \brief get const reference to nodes */
-  const std::vector<Node>& GetNodes() const { return nodes_; }
+  [[nodiscard]] const std::vector<Node>& GetNodes() const { return nodes_; }
 
   /*! \brief get const reference to stats */
-  const std::vector<RTreeNodeStat>& GetStats() const { return stats_; }
+  [[nodiscard]] const std::vector<RTreeNodeStat>& GetStats() const { return stats_; }
 
   /*! \brief get node statistics given nid */
   RTreeNodeStat& Stat(int nid) {
     return stats_[nid];
   }
   /*! \brief get node statistics given nid */
-  const RTreeNodeStat& Stat(int nid) const {
+  [[nodiscard]] const RTreeNodeStat& Stat(int nid) const {
     return stats_[nid];
   }
 
@@ -398,7 +434,7 @@ class RegTree : public Model {
    *
    * \param b The other tree.
    */
-  bool Equal(const RegTree& b) const;
+  [[nodiscard]] bool Equal(const RegTree& b) const;
 
   /**
    * \brief Expands a leaf node into two additional leaf nodes.
@@ -424,6 +460,11 @@ class RegTree : public Model {
                   float right_sum,
                   bst_node_t leaf_right_child = kInvalidNodeId);
 
+  void ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond, bool default_left,
+                  linalg::VectorView<float const> base_weight,
+                  linalg::VectorView<float const> left_weight,
+                  linalg::VectorView<float const> right_weight);
+
   /**
    * \brief Expands a leaf node with categories
    *
@@ -445,15 +486,27 @@ class RegTree : public Model {
                          bst_float right_leaf_weight, bst_float loss_change, float sum_hess,
                          float left_sum, float right_sum);
 
-  bool HasCategoricalSplit() const {
+  [[nodiscard]] bool HasCategoricalSplit() const {
     return !split_categories_.empty();
   }
+  /**
+   * \brief Whether this is a multi-target tree.
+   */
+  [[nodiscard]] bool IsMultiTarget() const { return static_cast<bool>(p_mt_tree_); }
+  [[nodiscard]] bst_target_t NumTargets() const { return param.size_leaf_vector; }
+  [[nodiscard]] auto GetMultiTargetTree() const {
+    CHECK(IsMultiTarget());
+    return p_mt_tree_.get();
+  }
 
   /*!
    * \brief get current depth
    * \param nid node id
    */
-  int GetDepth(int nid) const {
+  [[nodiscard]] std::int32_t GetDepth(bst_node_t nid) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->Depth(nid);
+    }
     int depth = 0;
     while (!nodes_[nid].IsRoot()) {
       ++depth;
@@ -461,12 +514,16 @@ class RegTree : public Model {
     }
     return depth;
   }
+  void SetLeaf(bst_node_t nidx, linalg::VectorView<float const> weight) {
+    CHECK(IsMultiTarget());
+    return this->p_mt_tree_->SetLeaf(nidx, weight);
+  }
 
   /*!
    * \brief get maximum depth
    * \param nid node id
    */
-  int MaxDepth(int nid) const {
+  [[nodiscard]] int MaxDepth(int nid) const {
     if (nodes_[nid].IsLeaf()) return 0;
     return std::max(MaxDepth(nodes_[nid].LeftChild())+1,
                      MaxDepth(nodes_[nid].RightChild())+1);
@@ -480,13 +537,13 @@ class RegTree : public Model {
   }
 
   /*! \brief number of extra nodes besides the root */
-  int NumExtraNodes() const {
+  [[nodiscard]] int NumExtraNodes() const {
     return param.num_nodes - 1 - param.num_deleted;
   }
 
   /* \brief Count number of leaves in tree. */
-  bst_node_t GetNumLeaves() const;
-  bst_node_t GetNumSplitNodes() const;
+  [[nodiscard]] bst_node_t GetNumLeaves() const;
+  [[nodiscard]] bst_node_t GetNumSplitNodes() const;
 
   /*!
    * \brief dense feature vector that can be taken by RegTree
@@ -513,20 +570,20 @@ class RegTree : public Model {
      * \brief returns the size of the feature vector
      * \return the size of the feature vector
      */
-    size_t Size() const;
+    [[nodiscard]] size_t Size() const;
     /*!
      * \brief get ith value
      * \param i feature index.
      * \return the i-th feature value
      */
-    bst_float GetFvalue(size_t i) const;
+    [[nodiscard]] bst_float GetFvalue(size_t i) const;
     /*!
      * \brief check whether i-th entry is missing
      * \param i feature index.
      * \return whether i-th value is missing.
      */
-    bool IsMissing(size_t i) const;
-    bool HasMissing() const;
+    [[nodiscard]] bool IsMissing(size_t i) const;
+    [[nodiscard]] bool HasMissing() const;
 
 
    private:
@@ -557,56 +614,123 @@ class RegTree : public Model {
    * \param format the format to dump the model in
    * \return the string of dumped model
    */
-  std::string DumpModel(const FeatureMap& fmap,
-                        bool with_stats,
-                        std::string format) const;
+  [[nodiscard]] std::string DumpModel(const FeatureMap& fmap, bool with_stats,
+                                      std::string format) const;
   /*!
    * \brief Get split type for a node.
    * \param nidx Index of node.
    * \return The type of this split.  For leaf node it's always kNumerical.
    */
-  FeatureType NodeSplitType(bst_node_t nidx) const {
-    return split_types_.at(nidx);
-  }
+  [[nodiscard]] FeatureType NodeSplitType(bst_node_t nidx) const { return split_types_.at(nidx); }
   /*!
    * \brief Get split types for all nodes.
    */
-  std::vector<FeatureType> const &GetSplitTypes() const { return split_types_; }
-  common::Span<uint32_t const> GetSplitCategories() const { return split_categories_; }
+  [[nodiscard]] std::vector<FeatureType> const& GetSplitTypes() const {
+    return split_types_;
+  }
+  [[nodiscard]] common::Span<uint32_t const> GetSplitCategories() const {
+    return split_categories_;
+  }
   /*!
    * \brief Get the bit storage for categories
    */
-  common::Span<uint32_t const> NodeCats(bst_node_t nidx) const {
+  [[nodiscard]] common::Span<uint32_t const> NodeCats(bst_node_t nidx) const {
     auto node_ptr = GetCategoriesMatrix().node_ptr;
     auto categories = GetCategoriesMatrix().categories;
     auto segment = node_ptr[nidx];
     auto node_cats = categories.subspan(segment.beg, segment.size);
     return node_cats;
   }
-  auto const& GetSplitCategoriesPtr() const { return split_categories_segments_; }
-
-  // The fields of split_categories_segments_[i] are set such that
-  // the range split_categories_[beg:(beg+size)] stores the bitset for
-  // the matching categories for the i-th node.
-  struct Segment {
-    size_t beg {0};
-    size_t size {0};
-  };
+  [[nodiscard]] auto const& GetSplitCategoriesPtr() const { return split_categories_segments_; }
 
+  /**
+   * \brief CSR-like matrix for categorical splits.
+   *
+   * The fields of split_categories_segments_[i] are set such that the range
+   * node_ptr[beg:(beg+size)] stores the bitset for the matching categories for the
+   * i-th node.
+   */
   struct CategoricalSplitMatrix {
+    struct Segment {
+      std::size_t beg{0};
+      std::size_t size{0};
+    };
     common::Span<FeatureType const> split_type;
     common::Span<uint32_t const> categories;
     common::Span<Segment const> node_ptr;
   };
 
-  CategoricalSplitMatrix GetCategoriesMatrix() const {
+  [[nodiscard]] CategoricalSplitMatrix GetCategoriesMatrix() const {
     CategoricalSplitMatrix view;
     view.split_type = common::Span<FeatureType const>(this->GetSplitTypes());
     view.categories = this->GetSplitCategories();
-    view.node_ptr = common::Span<Segment const>(split_categories_segments_);
+    view.node_ptr = common::Span<CategoricalSplitMatrix::Segment const>(split_categories_segments_);
     return view;
   }
 
+  [[nodiscard]] bst_feature_t SplitIndex(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->SplitIndex(nidx);
+    }
+    return (*this)[nidx].SplitIndex();
+  }
+  [[nodiscard]] float SplitCond(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->SplitCond(nidx);
+    }
+    return (*this)[nidx].SplitCond();
+  }
+  [[nodiscard]] bool DefaultLeft(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->DefaultLeft(nidx);
+    }
+    return (*this)[nidx].DefaultLeft();
+  }
+  [[nodiscard]] bool IsRoot(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return nidx == kRoot;
+    }
+    return (*this)[nidx].IsRoot();
+  }
+  [[nodiscard]] bool IsLeaf(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->IsLeaf(nidx);
+    }
+    return (*this)[nidx].IsLeaf();
+  }
+  [[nodiscard]] bst_node_t Parent(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->Parent(nidx);
+    }
+    return (*this)[nidx].Parent();
+  }
+  [[nodiscard]] bst_node_t LeftChild(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->LeftChild(nidx);
+    }
+    return (*this)[nidx].LeftChild();
+  }
+  [[nodiscard]] bst_node_t RightChild(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->RightChild(nidx);
+    }
+    return (*this)[nidx].RightChild();
+  }
+  [[nodiscard]] bool IsLeftChild(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      CHECK_NE(nidx, kRoot);
+      auto p = this->p_mt_tree_->Parent(nidx);
+      return nidx == this->p_mt_tree_->LeftChild(p);
+    }
+    return (*this)[nidx].IsLeftChild();
+  }
+  [[nodiscard]] bst_node_t Size() const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->Size();
+    }
+    return this->nodes_.size();
+  }
+
  private:
   template <bool typed>
   void LoadCategoricalSplit(Json const& in);
@@ -622,8 +746,9 @@ class RegTree : public Model {
   // Categories for each internal node.
   std::vector<uint32_t> split_categories_;
   // Ptr to split categories of each node.
-  std::vector<Segment> split_categories_segments_;
-
+  std::vector<CategoricalSplitMatrix::Segment> split_categories_segments_;
+  // ptr to multi-target tree with vector leaf.
+  CopyUniquePtr<MultiTargetTree> p_mt_tree_;
   // allocate a new node,
   // !!!!!! NOTE: may cause BUG here, nodes.resize
   bst_node_t AllocNode() {
@@ -703,5 +828,10 @@ inline bool RegTree::FVec::IsMissing(size_t i) const {
 inline bool RegTree::FVec::HasMissing() const {
   return has_missing_;
 }
+
+// Multi-target tree not yet implemented error
+inline StringView MTNotImplemented() {
+  return " support for multi-target tree is not yet implemented.";
+}
 }  // namespace xgboost
 #endif  // XGBOOST_TREE_MODEL_H_
diff --git a/src/common/host_device_vector.cc b/src/common/host_device_vector.cc
index 030070d9a..55c0ecf20 100644
--- a/src/common/host_device_vector.cc
+++ b/src/common/host_device_vector.cc
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2017 XGBoost contributors
+/**
+ * Copyright 2017-2023 by XGBoost contributors
  */
 #ifndef XGBOOST_USE_CUDA
 
@@ -179,7 +179,6 @@ template class HostDeviceVector<FeatureType>;
 template class HostDeviceVector<Entry>;
 template class HostDeviceVector<uint64_t>;  // bst_row_t
 template class HostDeviceVector<uint32_t>;  // bst_feature_t
-template class HostDeviceVector<RegTree::Segment>;
 
 #if defined(__APPLE__) || defined(__EMSCRIPTEN__)
 /*
diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu
index a5c5dbf8f..1fa9a3b22 100644
--- a/src/common/host_device_vector.cu
+++ b/src/common/host_device_vector.cu
@@ -1,7 +1,6 @@
-/*!
- * Copyright 2017 XGBoost contributors
+/**
+ * Copyright 2017-2023 by XGBoost contributors
  */
-
 #include <thrust/fill.h>
 #include <thrust/device_ptr.h>
 
@@ -412,7 +411,7 @@ template class HostDeviceVector<Entry>;
 template class HostDeviceVector<uint64_t>;  // bst_row_t
 template class HostDeviceVector<uint32_t>;  // bst_feature_t
 template class HostDeviceVector<RegTree::Node>;
-template class HostDeviceVector<RegTree::Segment>;
+template class HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>;
 template class HostDeviceVector<RTreeNodeStat>;
 
 #if defined(__APPLE__)
diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu
index 35daf701c..caf4b6bb4 100644
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2017-2021 by Contributors
+/**
+ * Copyright 2017-2023 by XGBoost Contributors
  */
 #include <GPUTreeShap/gpu_treeshap.h>
 #include <thrust/copy.h>
@@ -25,9 +25,7 @@
 #include "xgboost/tree_model.h"
 #include "xgboost/tree_updater.h"
 
-namespace xgboost {
-namespace predictor {
-
+namespace xgboost::predictor {
 DMLC_REGISTRY_FILE_TAG(gpu_predictor);
 
 struct TreeView {
@@ -35,12 +33,11 @@ struct TreeView {
   common::Span<RegTree::Node const> d_tree;
 
   XGBOOST_DEVICE
-  TreeView(size_t tree_begin, size_t tree_idx,
-           common::Span<const RegTree::Node> d_nodes,
+  TreeView(size_t tree_begin, size_t tree_idx, common::Span<const RegTree::Node> d_nodes,
            common::Span<size_t const> d_tree_segments,
            common::Span<FeatureType const> d_tree_split_types,
            common::Span<uint32_t const> d_cat_tree_segments,
-           common::Span<RegTree::Segment const> d_cat_node_segments,
+           common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
            common::Span<uint32_t const> d_categories) {
     auto begin = d_tree_segments[tree_idx - tree_begin];
     auto n_nodes = d_tree_segments[tree_idx - tree_begin + 1] -
@@ -255,7 +252,7 @@ PredictLeafKernel(Data data, common::Span<const RegTree::Node> d_nodes,
 
                   common::Span<FeatureType const> d_tree_split_types,
                   common::Span<uint32_t const> d_cat_tree_segments,
-                  common::Span<RegTree::Segment const> d_cat_node_segments,
+                  common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
                   common::Span<uint32_t const> d_categories,
 
                   size_t tree_begin, size_t tree_end, size_t num_features,
@@ -290,7 +287,7 @@ PredictKernel(Data data, common::Span<const RegTree::Node> d_nodes,
               common::Span<int const> d_tree_group,
               common::Span<FeatureType const> d_tree_split_types,
               common::Span<uint32_t const> d_cat_tree_segments,
-              common::Span<RegTree::Segment const> d_cat_node_segments,
+              common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
               common::Span<uint32_t const> d_categories, size_t tree_begin,
               size_t tree_end, size_t num_features, size_t num_rows,
               size_t entry_start, bool use_shared, int num_group, float missing) {
@@ -334,7 +331,7 @@ class DeviceModel {
   // Pointer to each tree, segmenting the node array.
   HostDeviceVector<uint32_t> categories_tree_segments;
   // Pointer to each node, segmenting categories array.
-  HostDeviceVector<RegTree::Segment> categories_node_segments;
+  HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment> categories_node_segments;
   HostDeviceVector<uint32_t> categories;
 
   size_t tree_beg_;  // NOLINT
@@ -400,9 +397,9 @@ class DeviceModel {
       h_split_cat_segments.push_back(h_categories.size());
     }
 
-    categories_node_segments =
-        HostDeviceVector<RegTree::Segment>(h_tree_segments.back(), {}, gpu_id);
-    std::vector<RegTree::Segment> &h_categories_node_segments =
+    categories_node_segments = HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>(
+        h_tree_segments.back(), {}, gpu_id);
+    std::vector<RegTree::CategoricalSplitMatrix::Segment>& h_categories_node_segments =
         categories_node_segments.HostVector();
     for (auto tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) {
       auto const &src_cats_ptr = model.trees.at(tree_idx)->GetSplitCategoriesPtr();
@@ -542,10 +539,10 @@ void ExtractPaths(
   if (thrust::any_of(dh::tbegin(d_split_types), dh::tend(d_split_types),
                      common::IsCatOp{})) {
     dh::PinnedMemory pinned;
-    auto h_max_cat = pinned.GetSpan<RegTree::Segment>(1);
+    auto h_max_cat = pinned.GetSpan<RegTree::CategoricalSplitMatrix::Segment>(1);
     auto max_elem_it = dh::MakeTransformIterator<size_t>(
         dh::tbegin(d_cat_node_segments),
-        [] __device__(RegTree::Segment seg) { return seg.size; });
+        [] __device__(RegTree::CategoricalSplitMatrix::Segment seg) { return seg.size; });
     size_t max_cat_it =
         thrust::max_element(thrust::device, max_elem_it,
                             max_elem_it + d_cat_node_segments.size()) -
@@ -1028,5 +1025,4 @@ XGBOOST_REGISTER_PREDICTOR(GPUPredictor, "gpu_predictor")
     .describe("Make predictions using GPU.")
     .set_body([](Context const* ctx) { return new GPUPredictor(ctx); });
 
-}  // namespace predictor
-}  // namespace xgboost
+}  // namespace xgboost::predictor
diff --git a/src/tree/fit_stump.cc b/src/tree/fit_stump.cc
index 82efff2c7..ad0253d22 100644
--- a/src/tree/fit_stump.cc
+++ b/src/tree/fit_stump.cc
@@ -71,10 +71,7 @@ void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair,
   auto n_samples = gpair.Size() / n_targets;
 
   gpair.SetDevice(ctx->gpu_id);
-  linalg::TensorView<GradientPair const, 2> gpair_t{
-      ctx->IsCPU() ? gpair.ConstHostSpan() : gpair.ConstDeviceSpan(),
-      {n_samples, n_targets},
-      ctx->gpu_id};
+  auto gpair_t = linalg::MakeTensorView(ctx, &gpair, n_samples, n_targets);
   ctx->IsCPU() ? cpu_impl::FitStump(ctx, gpair_t, out->HostView())
                : cuda_impl::FitStump(ctx, gpair_t, out->View(ctx->gpu_id));
 }
diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h
index 4e64cbd75..50b90f244 100644
--- a/src/tree/hist/histogram.h
+++ b/src/tree/hist/histogram.h
@@ -12,7 +12,7 @@
 #include "../../common/hist_util.h"
 #include "../../data/gradient_index.h"
 #include "expand_entry.h"
-#include "xgboost/tree_model.h"
+#include "xgboost/tree_model.h"  // for RegTree
 
 namespace xgboost {
 namespace tree {
@@ -175,8 +175,8 @@ class HistogramBuilder {
       auto this_local = hist_local_worker_[entry.nid];
       common::CopyHist(this_local, this_hist, r.begin(), r.end());
 
-      if (!(*p_tree)[entry.nid].IsRoot()) {
-        const size_t parent_id = (*p_tree)[entry.nid].Parent();
+      if (!p_tree->IsRoot(entry.nid)) {
+        const size_t parent_id = p_tree->Parent(entry.nid);
         const int subtraction_node_id = nodes_for_subtraction_trick[node].nid;
         auto parent_hist = this->hist_local_worker_[parent_id];
         auto sibling_hist = this->hist_[subtraction_node_id];
@@ -213,8 +213,8 @@ class HistogramBuilder {
       // Merging histograms from each thread into once
       this->buffer_.ReduceHist(node, r.begin(), r.end());
 
-      if (!(*p_tree)[entry.nid].IsRoot()) {
-        auto const parent_id = (*p_tree)[entry.nid].Parent();
+      if (!p_tree->IsRoot(entry.nid)) {
+        auto const parent_id = p_tree->Parent(entry.nid);
         auto const subtraction_node_id = nodes_for_subtraction_trick[node].nid;
         auto parent_hist = this->hist_[parent_id];
         auto sibling_hist = this->hist_[subtraction_node_id];
@@ -237,10 +237,10 @@ class HistogramBuilder {
     common::ParallelFor2d(
         space, this->n_threads_, [&](size_t node, common::Range1d r) {
           const auto &entry = nodes[node];
-          if (!((*p_tree)[entry.nid].IsLeftChild())) {
+          if (!(p_tree->IsLeftChild(entry.nid))) {
             auto this_hist = this->hist_[entry.nid];
 
-            if (!(*p_tree)[entry.nid].IsRoot()) {
+            if (!p_tree->IsRoot(entry.nid)) {
               const int subtraction_node_id = subtraction_nodes[node].nid;
               auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()];
               auto sibling_hist = hist_[subtraction_node_id];
@@ -285,7 +285,7 @@ class HistogramBuilder {
     std::sort(merged_node_ids.begin(), merged_node_ids.end());
     int n_left = 0;
     for (auto const &nid : merged_node_ids) {
-      if ((*p_tree)[nid].IsLeftChild()) {
+      if (p_tree->IsLeftChild(nid)) {
         this->hist_.AddHistRow(nid);
         (*starting_index) = std::min(nid, (*starting_index));
         n_left++;
@@ -293,7 +293,7 @@ class HistogramBuilder {
       }
     }
     for (auto const &nid : merged_node_ids) {
-      if (!((*p_tree)[nid].IsLeftChild())) {
+      if (!(p_tree->IsLeftChild(nid))) {
         this->hist_.AddHistRow(nid);
         this->hist_local_worker_.AddHistRow(nid);
       }
diff --git a/src/tree/io_utils.h b/src/tree/io_utils.h
new file mode 100644
index 000000000..a0d31cc83
--- /dev/null
+++ b/src/tree/io_utils.h
@@ -0,0 +1,65 @@
+/**
+ * Copyright 2023 by XGBoost Contributors
+ */
+#ifndef XGBOOST_TREE_IO_UTILS_H_
+#define XGBOOST_TREE_IO_UTILS_H_
+#include <string>          // for string
+#include <type_traits>     // for enable_if_t, is_same, conditional_t
+#include <vector>          // for vector
+
+#include "xgboost/json.h"  // for Json
+
+namespace xgboost {
+template <bool typed>
+using FloatArrayT = std::conditional_t<typed, F32Array const, Array const>;
+template <bool typed>
+using U8ArrayT = std::conditional_t<typed, U8Array const, Array const>;
+template <bool typed>
+using I32ArrayT = std::conditional_t<typed, I32Array const, Array const>;
+template <bool typed>
+using I64ArrayT = std::conditional_t<typed, I64Array const, Array const>;
+template <bool typed, bool feature_is_64>
+using IndexArrayT = std::conditional_t<feature_is_64, I64ArrayT<typed>, I32ArrayT<typed>>;
+
+// typed array, not boolean
+template <typename JT, typename T>
+std::enable_if_t<!std::is_same<T, Json>::value && !std::is_same<JT, Boolean>::value, T> GetElem(
+    std::vector<T> const& arr, size_t i) {
+  return arr[i];
+}
+// typed array boolean
+template <typename JT, typename T>
+std::enable_if_t<!std::is_same<T, Json>::value && std::is_same<T, uint8_t>::value &&
+                     std::is_same<JT, Boolean>::value,
+                 bool>
+GetElem(std::vector<T> const& arr, size_t i) {
+  return arr[i] == 1;
+}
+// json array
+template <typename JT, typename T>
+std::enable_if_t<
+    std::is_same<T, Json>::value,
+    std::conditional_t<std::is_same<JT, Integer>::value, int64_t,
+                       std::conditional_t<std::is_same<Boolean, JT>::value, bool, float>>>
+GetElem(std::vector<T> const& arr, size_t i) {
+  if (std::is_same<JT, Boolean>::value && !IsA<Boolean>(arr[i])) {
+    return get<Integer const>(arr[i]) == 1;
+  }
+  return get<JT const>(arr[i]);
+}
+
+namespace tree_field {
+inline std::string const kLossChg{"loss_changes"};
+inline std::string const kSumHess{"sum_hessian"};
+inline std::string const kBaseWeight{"base_weights"};
+
+inline std::string const kSplitIdx{"split_indices"};
+inline std::string const kSplitCond{"split_conditions"};
+inline std::string const kDftLeft{"default_left"};
+
+inline std::string const kParent{"parents"};
+inline std::string const kLeft{"left_children"};
+inline std::string const kRight{"right_children"};
+}  // namespace tree_field
+}  // namespace xgboost
+#endif  // XGBOOST_TREE_IO_UTILS_H_
diff --git a/src/tree/multi_target_tree_model.cc b/src/tree/multi_target_tree_model.cc
new file mode 100644
index 000000000..bccc1967e
--- /dev/null
+++ b/src/tree/multi_target_tree_model.cc
@@ -0,0 +1,220 @@
+/**
+ * Copyright 2023 by XGBoost Contributors
+ */
+#include "xgboost/multi_target_tree_model.h"
+
+#include <algorithm>             // for copy_n
+#include <cstddef>               // for size_t
+#include <cstdint>               // for int32_t, uint8_t
+#include <limits>                // for numeric_limits
+#include <string_view>           // for string_view
+#include <utility>               // for move
+#include <vector>                // for vector
+
+#include "io_utils.h"            // for I32ArrayT, FloatArrayT, GetElem, ...
+#include "xgboost/base.h"        // for bst_node_t, bst_feature_t, bst_target_t
+#include "xgboost/json.h"        // for Json, get, Object, Number, Integer, ...
+#include "xgboost/logging.h"
+#include "xgboost/tree_model.h"  // for TreeParam
+
+namespace xgboost {
+MultiTargetTree::MultiTargetTree(TreeParam const* param)
+    : param_{param},
+      left_(1ul, InvalidNodeId()),
+      right_(1ul, InvalidNodeId()),
+      parent_(1ul, InvalidNodeId()),
+      split_index_(1ul, 0),
+      default_left_(1ul, 0),
+      split_conds_(1ul, std::numeric_limits<float>::quiet_NaN()),
+      weights_(param->size_leaf_vector, std::numeric_limits<float>::quiet_NaN()) {
+  CHECK_GT(param_->size_leaf_vector, 1);
+}
+
+template <bool typed, bool feature_is_64>
+void LoadModelImpl(Json const& in, std::vector<float>* p_weights, std::vector<bst_node_t>* p_lefts,
+                   std::vector<bst_node_t>* p_rights, std::vector<bst_node_t>* p_parents,
+                   std::vector<float>* p_conds, std::vector<bst_feature_t>* p_fidx,
+                   std::vector<std::uint8_t>* p_dft_left) {
+  namespace tf = tree_field;
+
+  auto get_float = [&](std::string_view name, std::vector<float>* p_out) {
+    auto& values = get<FloatArrayT<typed>>(get<Object const>(in).find(name)->second);
+    auto& out = *p_out;
+    out.resize(values.size());
+    for (std::size_t i = 0; i < values.size(); ++i) {
+      out[i] = GetElem<Number>(values, i);
+    }
+  };
+  get_float(tf::kBaseWeight, p_weights);
+  get_float(tf::kSplitCond, p_conds);
+
+  auto get_nidx = [&](std::string_view name, std::vector<bst_node_t>* p_nidx) {
+    auto& nidx = get<I32ArrayT<typed>>(get<Object const>(in).find(name)->second);
+    auto& out_nidx = *p_nidx;
+    out_nidx.resize(nidx.size());
+    for (std::size_t i = 0; i < nidx.size(); ++i) {
+      out_nidx[i] = GetElem<Integer>(nidx, i);
+    }
+  };
+  get_nidx(tf::kLeft, p_lefts);
+  get_nidx(tf::kRight, p_rights);
+  get_nidx(tf::kParent, p_parents);
+
+  auto const& splits = get<IndexArrayT<typed, feature_is_64> const>(in[tf::kSplitIdx]);
+  p_fidx->resize(splits.size());
+  auto& out_fidx = *p_fidx;
+  for (std::size_t i = 0; i < splits.size(); ++i) {
+    out_fidx[i] = GetElem<Integer>(splits, i);
+  }
+
+  auto const& dft_left = get<U8ArrayT<typed> const>(in[tf::kDftLeft]);
+  auto& out_dft_l = *p_dft_left;
+  out_dft_l.resize(dft_left.size());
+  for (std::size_t i = 0; i < dft_left.size(); ++i) {
+    out_dft_l[i] = GetElem<Boolean>(dft_left, i);
+  }
+}
+
+void MultiTargetTree::LoadModel(Json const& in) {
+  namespace tf = tree_field;
+  bool typed = IsA<F32Array>(in[tf::kBaseWeight]);
+  bool feature_is_64 = IsA<I64Array>(in[tf::kSplitIdx]);
+
+  if (typed && feature_is_64) {
+    LoadModelImpl<true, true>(in, &weights_, &left_, &right_, &parent_, &split_conds_,
+                              &split_index_, &default_left_);
+  } else if (typed && !feature_is_64) {
+    LoadModelImpl<true, false>(in, &weights_, &left_, &right_, &parent_, &split_conds_,
+                               &split_index_, &default_left_);
+  } else if (!typed && feature_is_64) {
+    LoadModelImpl<false, true>(in, &weights_, &left_, &right_, &parent_, &split_conds_,
+                               &split_index_, &default_left_);
+  } else {
+    LoadModelImpl<false, false>(in, &weights_, &left_, &right_, &parent_, &split_conds_,
+                                &split_index_, &default_left_);
+  }
+}
+
+void MultiTargetTree::SaveModel(Json* p_out) const {
+  CHECK(p_out);
+  auto& out = *p_out;
+
+  auto n_nodes = param_->num_nodes;
+
+  // nodes
+  I32Array lefts(n_nodes);
+  I32Array rights(n_nodes);
+  I32Array parents(n_nodes);
+  F32Array conds(n_nodes);
+  U8Array default_left(n_nodes);
+  F32Array weights(n_nodes * this->NumTarget());
+
+  auto save_tree = [&](auto* p_indices_array) {
+    auto& indices_array = *p_indices_array;
+    for (bst_node_t nidx = 0; nidx < n_nodes; ++nidx) {
+      CHECK_LT(nidx, left_.size());
+      lefts.Set(nidx, left_[nidx]);
+      CHECK_LT(nidx, right_.size());
+      rights.Set(nidx, right_[nidx]);
+      CHECK_LT(nidx, parent_.size());
+      parents.Set(nidx, parent_[nidx]);
+      CHECK_LT(nidx, split_index_.size());
+      indices_array.Set(nidx, split_index_[nidx]);
+      conds.Set(nidx, split_conds_[nidx]);
+      default_left.Set(nidx, default_left_[nidx]);
+
+      auto in_weight = this->NodeWeight(nidx);
+      auto weight_out = common::Span<float>(weights.GetArray())
+                            .subspan(nidx * this->NumTarget(), this->NumTarget());
+      CHECK_EQ(in_weight.Size(), weight_out.size());
+      std::copy_n(in_weight.Values().data(), in_weight.Size(), weight_out.data());
+    }
+  };
+
+  namespace tf = tree_field;
+
+  if (this->param_->num_feature >
+      static_cast<bst_feature_t>(std::numeric_limits<std::int32_t>::max())) {
+    I64Array indices_64(n_nodes);
+    save_tree(&indices_64);
+    out[tf::kSplitIdx] = std::move(indices_64);
+  } else {
+    I32Array indices_32(n_nodes);
+    save_tree(&indices_32);
+    out[tf::kSplitIdx] = std::move(indices_32);
+  }
+
+  out[tf::kBaseWeight] = std::move(weights);
+  out[tf::kLeft] = std::move(lefts);
+  out[tf::kRight] = std::move(rights);
+  out[tf::kParent] = std::move(parents);
+
+  out[tf::kSplitCond] = std::move(conds);
+  out[tf::kDftLeft] = std::move(default_left);
+}
+
+void MultiTargetTree::SetLeaf(bst_node_t nidx, linalg::VectorView<float const> weight) {
+  CHECK(this->IsLeaf(nidx)) << "Collapsing a split node to leaf " << MTNotImplemented();
+  auto const next_nidx = nidx + 1;
+  CHECK_EQ(weight.Size(), this->NumTarget());
+  CHECK_GE(weights_.size(), next_nidx * weight.Size());
+  auto out_weight = common::Span<float>(weights_).subspan(nidx * weight.Size(), weight.Size());
+  for (std::size_t i = 0; i < weight.Size(); ++i) {
+    out_weight[i] = weight(i);
+  }
+}
+
+void MultiTargetTree::Expand(bst_node_t nidx, bst_feature_t split_idx, float split_cond,
+                             bool default_left, linalg::VectorView<float const> base_weight,
+                             linalg::VectorView<float const> left_weight,
+                             linalg::VectorView<float const> right_weight) {
+  CHECK(this->IsLeaf(nidx));
+  CHECK_GE(parent_.size(), 1);
+  CHECK_EQ(parent_.size(), left_.size());
+  CHECK_EQ(left_.size(), right_.size());
+
+  std::size_t n = param_->num_nodes + 2;
+  CHECK_LT(split_idx, this->param_->num_feature);
+  left_.resize(n, InvalidNodeId());
+  right_.resize(n, InvalidNodeId());
+  parent_.resize(n, InvalidNodeId());
+
+  auto left_child = parent_.size() - 2;
+  auto right_child = parent_.size() - 1;
+
+  left_[nidx] = left_child;
+  right_[nidx] = right_child;
+
+  if (nidx != 0) {
+    CHECK_NE(parent_[nidx], InvalidNodeId());
+  }
+
+  parent_[left_child] = nidx;
+  parent_[right_child] = nidx;
+
+  split_index_.resize(n);
+  split_index_[nidx] = split_idx;
+
+  split_conds_.resize(n);
+  split_conds_[nidx] = split_cond;
+  default_left_.resize(n);
+  default_left_[nidx] = static_cast<std::uint8_t>(default_left);
+
+  weights_.resize(n * this->NumTarget());
+  auto p_weight = this->NodeWeight(nidx);
+  CHECK_EQ(p_weight.Size(), base_weight.Size());
+  auto l_weight = this->NodeWeight(left_child);
+  CHECK_EQ(l_weight.Size(), left_weight.Size());
+  auto r_weight = this->NodeWeight(right_child);
+  CHECK_EQ(r_weight.Size(), right_weight.Size());
+
+  for (std::size_t i = 0; i < base_weight.Size(); ++i) {
+    p_weight(i) = base_weight(i);
+    l_weight(i) = left_weight(i);
+    r_weight(i) = right_weight(i);
+  }
+}
+
+bst_target_t MultiTargetTree::NumTarget() const { return param_->size_leaf_vector; }
+std::size_t MultiTargetTree::Size() const { return parent_.size(); }
+}  // namespace xgboost
diff --git a/src/tree/tree_model.cc b/src/tree/tree_model.cc
index 55e37a919..0891ec3b2 100644
--- a/src/tree/tree_model.cc
+++ b/src/tree/tree_model.cc
@@ -1,25 +1,27 @@
-/*!
- * Copyright 2015-2022 by Contributors
+/**
+ * Copyright 2015-2023 by Contributors
  * \file tree_model.cc
  * \brief model structure for tree
  */
-#include <dmlc/registry.h>
 #include <dmlc/json.h>
-
-#include <xgboost/tree_model.h>
-#include <xgboost/logging.h>
+#include <dmlc/registry.h>
 #include <xgboost/json.h>
+#include <xgboost/tree_model.h>
 
-#include <sstream>
-#include <limits>
 #include <cmath>
 #include <iomanip>
-#include <stack>
+#include <limits>
+#include <sstream>
+#include <type_traits>
 
-#include "param.h"
-#include "../common/common.h"
 #include "../common/categorical.h"
+#include "../common/common.h"
 #include "../predictor/predict_fn.h"
+#include "io_utils.h"  // GetElem
+#include "param.h"
+#include "xgboost/base.h"
+#include "xgboost/data.h"
+#include "xgboost/logging.h"
 
 namespace xgboost {
 // register tree parameter
@@ -729,12 +731,9 @@ XGBOOST_REGISTER_TREE_IO(GraphvizGenerator, "dot")
 
 constexpr bst_node_t RegTree::kRoot;
 
-std::string RegTree::DumpModel(const FeatureMap& fmap,
-                               bool with_stats,
-                               std::string format) const {
-  std::unique_ptr<TreeGenerator> builder {
-    TreeGenerator::Create(format, fmap, with_stats)
-  };
+std::string RegTree::DumpModel(const FeatureMap& fmap, bool with_stats, std::string format) const {
+  CHECK(!IsMultiTarget());
+  std::unique_ptr<TreeGenerator> builder{TreeGenerator::Create(format, fmap, with_stats)};
   builder->BuildTree(*this);
 
   std::string result = builder->Str();
@@ -742,6 +741,7 @@ std::string RegTree::DumpModel(const FeatureMap& fmap,
 }
 
 bool RegTree::Equal(const RegTree& b) const {
+  CHECK(!IsMultiTarget());
   if (NumExtraNodes() != b.NumExtraNodes()) {
     return false;
   }
@@ -758,6 +758,7 @@ bool RegTree::Equal(const RegTree& b) const {
 }
 
 bst_node_t RegTree::GetNumLeaves() const {
+  CHECK(!IsMultiTarget());
   bst_node_t leaves { 0 };
   auto const& self = *this;
   this->WalkTree([&leaves, &self](bst_node_t nidx) {
@@ -770,6 +771,7 @@ bst_node_t RegTree::GetNumLeaves() const {
 }
 
 bst_node_t RegTree::GetNumSplitNodes() const {
+  CHECK(!IsMultiTarget());
   bst_node_t splits { 0 };
   auto const& self = *this;
   this->WalkTree([&splits, &self](bst_node_t nidx) {
@@ -787,6 +789,7 @@ void RegTree::ExpandNode(bst_node_t nid, unsigned split_index, bst_float split_v
                          bst_float right_leaf_weight, bst_float loss_change,
                          float sum_hess, float left_sum, float right_sum,
                          bst_node_t leaf_right_child) {
+  CHECK(!IsMultiTarget());
   int pleft = this->AllocNode();
   int pright = this->AllocNode();
   auto &node = nodes_[nid];
@@ -807,11 +810,31 @@ void RegTree::ExpandNode(bst_node_t nid, unsigned split_index, bst_float split_v
   this->split_types_.at(nid) = FeatureType::kNumerical;
 }
 
+void RegTree::ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond,
+                         bool default_left, linalg::VectorView<float const> base_weight,
+                         linalg::VectorView<float const> left_weight,
+                         linalg::VectorView<float const> right_weight) {
+  CHECK(IsMultiTarget());
+  CHECK_LT(split_index, this->param.num_feature);
+  CHECK(this->p_mt_tree_);
+  CHECK_GT(param.size_leaf_vector, 1);
+
+  this->p_mt_tree_->Expand(nidx, split_index, split_cond, default_left, base_weight, left_weight,
+                           right_weight);
+
+  split_types_.resize(this->Size(), FeatureType::kNumerical);
+  split_categories_segments_.resize(this->Size());
+  this->split_types_.at(nidx) = FeatureType::kNumerical;
+
+  this->param.num_nodes = this->p_mt_tree_->Size();
+}
+
 void RegTree::ExpandCategorical(bst_node_t nid, bst_feature_t split_index,
                                 common::Span<const uint32_t> split_cat, bool default_left,
                                 bst_float base_weight, bst_float left_leaf_weight,
                                 bst_float right_leaf_weight, bst_float loss_change, float sum_hess,
                                 float left_sum, float right_sum) {
+  CHECK(!IsMultiTarget());
   this->ExpandNode(nid, split_index, std::numeric_limits<float>::quiet_NaN(),
                    default_left, base_weight,
                    left_leaf_weight, right_leaf_weight, loss_change, sum_hess,
@@ -893,44 +916,17 @@ void RegTree::Save(dmlc::Stream* fo) const {
     }
   }
 }
-// typed array, not boolean
-template <typename JT, typename T>
-std::enable_if_t<!std::is_same<T, Json>::value && !std::is_same<JT, Boolean>::value, T> GetElem(
-    std::vector<T> const& arr, size_t i) {
-  return arr[i];
-}
-// typed array boolean
-template <typename JT, typename T>
-std::enable_if_t<!std::is_same<T, Json>::value && std::is_same<T, uint8_t>::value &&
-                     std::is_same<JT, Boolean>::value,
-                 bool>
-GetElem(std::vector<T> const& arr, size_t i) {
-  return arr[i] == 1;
-}
-// json array
-template <typename JT, typename T>
-std::enable_if_t<
-    std::is_same<T, Json>::value,
-    std::conditional_t<std::is_same<JT, Integer>::value, int64_t,
-                       std::conditional_t<std::is_same<Boolean, JT>::value, bool, float>>>
-GetElem(std::vector<T> const& arr, size_t i) {
-  if (std::is_same<JT, Boolean>::value && !IsA<Boolean>(arr[i])) {
-    return get<Integer const>(arr[i]) == 1;
-  }
-  return get<JT const>(arr[i]);
-}
 
 template <bool typed>
 void RegTree::LoadCategoricalSplit(Json const& in) {
-  using I64ArrayT = std::conditional_t<typed, I64Array const, Array const>;
-  using I32ArrayT = std::conditional_t<typed, I32Array const, Array const>;
+  auto const& categories_segments = get<I64ArrayT<typed>>(in["categories_segments"]);
+  auto const& categories_sizes = get<I64ArrayT<typed>>(in["categories_sizes"]);
+  auto const& categories_nodes = get<I32ArrayT<typed>>(in["categories_nodes"]);
+  auto const& categories = get<I32ArrayT<typed>>(in["categories"]);
 
-  auto const& categories_segments = get<I64ArrayT>(in["categories_segments"]);
-  auto const& categories_sizes = get<I64ArrayT>(in["categories_sizes"]);
-  auto const& categories_nodes = get<I32ArrayT>(in["categories_nodes"]);
-  auto const& categories = get<I32ArrayT>(in["categories"]);
-
-  size_t cnt = 0;
+  auto split_type = get<U8ArrayT<typed>>(in["split_type"]);
+  bst_node_t n_nodes = split_type.size();
+  std::size_t cnt = 0;
   bst_node_t last_cat_node = -1;
   if (!categories_nodes.empty()) {
     last_cat_node = GetElem<Integer>(categories_nodes, cnt);
@@ -938,7 +934,10 @@ void RegTree::LoadCategoricalSplit(Json const& in) {
   // `categories_segments' is only available for categorical nodes to prevent overhead for
   // numerical node. As a result, we need to track the categorical nodes we have processed
   // so far.
-  for (bst_node_t nidx = 0; nidx < param.num_nodes; ++nidx) {
+  split_types_.resize(n_nodes, FeatureType::kNumerical);
+  split_categories_segments_.resize(n_nodes);
+  for (bst_node_t nidx = 0; nidx < n_nodes; ++nidx) {
+    split_types_[nidx] = static_cast<FeatureType>(GetElem<Integer>(split_type, nidx));
     if (nidx == last_cat_node) {
       auto j_begin = GetElem<Integer>(categories_segments, cnt);
       auto j_end = GetElem<Integer>(categories_sizes, cnt) + j_begin;
@@ -985,15 +984,17 @@ template void RegTree::LoadCategoricalSplit<false>(Json const& in);
 
 void RegTree::SaveCategoricalSplit(Json* p_out) const {
   auto& out = *p_out;
-  CHECK_EQ(this->split_types_.size(), param.num_nodes);
-  CHECK_EQ(this->GetSplitCategoriesPtr().size(), param.num_nodes);
+  CHECK_EQ(this->split_types_.size(), this->Size());
+  CHECK_EQ(this->GetSplitCategoriesPtr().size(), this->Size());
 
   I64Array categories_segments;
   I64Array categories_sizes;
   I32Array categories;        // bst_cat_t = int32_t
   I32Array categories_nodes;  // bst_note_t = int32_t
+  U8Array split_type(split_types_.size());
 
   for (size_t i = 0; i < nodes_.size(); ++i) {
+    split_type.Set(i, static_cast<std::underlying_type_t<FeatureType>>(this->NodeSplitType(i)));
     if (this->split_types_[i] == FeatureType::kCategorical) {
       categories_nodes.GetArray().emplace_back(i);
       auto begin = categories.Size();
@@ -1012,66 +1013,49 @@ void RegTree::SaveCategoricalSplit(Json* p_out) const {
     }
   }
 
+  out["split_type"] = std::move(split_type);
   out["categories_segments"] = std::move(categories_segments);
   out["categories_sizes"] = std::move(categories_sizes);
   out["categories_nodes"] = std::move(categories_nodes);
   out["categories"] = std::move(categories);
 }
 
-template <bool typed, bool feature_is_64,
-          typename FloatArrayT = std::conditional_t<typed, F32Array const, Array const>,
-          typename U8ArrayT = std::conditional_t<typed, U8Array const, Array const>,
-          typename I32ArrayT = std::conditional_t<typed, I32Array const, Array const>,
-          typename I64ArrayT = std::conditional_t<typed, I64Array const, Array const>,
-          typename IndexArrayT = std::conditional_t<feature_is_64, I64ArrayT, I32ArrayT>>
-bool LoadModelImpl(Json const& in, TreeParam* param, std::vector<RTreeNodeStat>* p_stats,
-                   std::vector<FeatureType>* p_split_types, std::vector<RegTree::Node>* p_nodes,
-                   std::vector<RegTree::Segment>* p_split_categories_segments) {
+template <bool typed, bool feature_is_64>
+void LoadModelImpl(Json const& in, TreeParam const& param, std::vector<RTreeNodeStat>* p_stats,
+                   std::vector<RegTree::Node>* p_nodes) {
+  namespace tf = tree_field;
   auto& stats = *p_stats;
-  auto& split_types = *p_split_types;
   auto& nodes = *p_nodes;
-  auto& split_categories_segments = *p_split_categories_segments;
 
-  FromJson(in["tree_param"], param);
-  auto n_nodes = param->num_nodes;
+  auto n_nodes = param.num_nodes;
   CHECK_NE(n_nodes, 0);
   // stats
-  auto const& loss_changes = get<FloatArrayT>(in["loss_changes"]);
+  auto const& loss_changes = get<FloatArrayT<typed>>(in[tf::kLossChg]);
   CHECK_EQ(loss_changes.size(), n_nodes);
-  auto const& sum_hessian = get<FloatArrayT>(in["sum_hessian"]);
+  auto const& sum_hessian = get<FloatArrayT<typed>>(in[tf::kSumHess]);
   CHECK_EQ(sum_hessian.size(), n_nodes);
-  auto const& base_weights = get<FloatArrayT>(in["base_weights"]);
+  auto const& base_weights = get<FloatArrayT<typed>>(in[tf::kBaseWeight]);
   CHECK_EQ(base_weights.size(), n_nodes);
   // nodes
-  auto const& lefts = get<I32ArrayT>(in["left_children"]);
+  auto const& lefts = get<I32ArrayT<typed>>(in[tf::kLeft]);
   CHECK_EQ(lefts.size(), n_nodes);
-  auto const& rights = get<I32ArrayT>(in["right_children"]);
+  auto const& rights = get<I32ArrayT<typed>>(in[tf::kRight]);
   CHECK_EQ(rights.size(), n_nodes);
-  auto const& parents = get<I32ArrayT>(in["parents"]);
+  auto const& parents = get<I32ArrayT<typed>>(in[tf::kParent]);
   CHECK_EQ(parents.size(), n_nodes);
-  auto const& indices = get<IndexArrayT>(in["split_indices"]);
+  auto const& indices = get<IndexArrayT<typed, feature_is_64>>(in[tf::kSplitIdx]);
   CHECK_EQ(indices.size(), n_nodes);
-  auto const& conds = get<FloatArrayT>(in["split_conditions"]);
+  auto const& conds = get<FloatArrayT<typed>>(in[tf::kSplitCond]);
   CHECK_EQ(conds.size(), n_nodes);
-  auto const& default_left = get<U8ArrayT>(in["default_left"]);
+  auto const& default_left = get<U8ArrayT<typed>>(in[tf::kDftLeft]);
   CHECK_EQ(default_left.size(), n_nodes);
 
-  bool has_cat = get<Object const>(in).find("split_type") != get<Object const>(in).cend();
-  std::remove_const_t<std::remove_reference_t<decltype(get<U8ArrayT const>(in["split_type"]))>>
-      split_type;
-  if (has_cat) {
-    split_type = get<U8ArrayT const>(in["split_type"]);
-  }
-
   // Initialization
   stats = std::remove_reference_t<decltype(stats)>(n_nodes);
   nodes = std::remove_reference_t<decltype(nodes)>(n_nodes);
-  split_types = std::remove_reference_t<decltype(split_types)>(n_nodes);
-  split_categories_segments = std::remove_reference_t<decltype(split_categories_segments)>(n_nodes);
 
   static_assert(std::is_integral<decltype(GetElem<Integer>(lefts, 0))>::value);
   static_assert(std::is_floating_point<decltype(GetElem<Number>(loss_changes, 0))>::value);
-  CHECK_EQ(n_nodes, split_categories_segments.size());
 
   // Set node
   for (int32_t i = 0; i < n_nodes; ++i) {
@@ -1088,41 +1072,46 @@ bool LoadModelImpl(Json const& in, TreeParam* param, std::vector<RTreeNodeStat>*
     float cond{GetElem<Number>(conds, i)};
     bool dft_left{GetElem<Boolean>(default_left, i)};
     n = RegTree::Node{left, right, parent, ind, cond, dft_left};
-
-    if (has_cat) {
-      split_types[i] = static_cast<FeatureType>(GetElem<Integer>(split_type, i));
-    }
   }
-
-  return has_cat;
 }
 
 void RegTree::LoadModel(Json const& in) {
-  bool has_cat{false};
-  bool typed = IsA<F32Array>(in["loss_changes"]);
-  bool feature_is_64 = IsA<I64Array>(in["split_indices"]);
-  if (typed && feature_is_64) {
-    has_cat = LoadModelImpl<true, true>(in, &param, &stats_, &split_types_, &nodes_,
-                                        &split_categories_segments_);
-  } else if (typed && !feature_is_64) {
-    has_cat = LoadModelImpl<true, false>(in, &param, &stats_, &split_types_, &nodes_,
-                                         &split_categories_segments_);
-  } else if (!typed && feature_is_64) {
-    has_cat = LoadModelImpl<false, true>(in, &param, &stats_, &split_types_, &nodes_,
-                                         &split_categories_segments_);
-  } else {
-    has_cat = LoadModelImpl<false, false>(in, &param, &stats_, &split_types_, &nodes_,
-                                          &split_categories_segments_);
-  }
+  namespace tf = tree_field;
 
+  bool typed = IsA<I32Array>(in[tf::kParent]);
+  auto const& in_obj = get<Object const>(in);
+  // basic properties
+  FromJson(in["tree_param"], &param);
+  // categorical splits
+  bool has_cat = in_obj.find("split_type") != in_obj.cend();
   if (has_cat) {
     if (typed) {
       this->LoadCategoricalSplit<true>(in);
     } else {
       this->LoadCategoricalSplit<false>(in);
     }
+  }
+  // multi-target
+  if (param.size_leaf_vector > 1) {
+    this->p_mt_tree_.reset(new MultiTargetTree{&param});
+    this->GetMultiTargetTree()->LoadModel(in);
+    return;
+  }
+
+  bool feature_is_64 = IsA<I64Array>(in["split_indices"]);
+  if (typed && feature_is_64) {
+    LoadModelImpl<true, true>(in, param, &stats_, &nodes_);
+  } else if (typed && !feature_is_64) {
+    LoadModelImpl<true, false>(in, param, &stats_, &nodes_);
+  } else if (!typed && feature_is_64) {
+    LoadModelImpl<false, true>(in, param, &stats_, &nodes_);
   } else {
+    LoadModelImpl<false, false>(in, param, &stats_, &nodes_);
+  }
+
+  if (!has_cat) {
     this->split_categories_segments_.resize(this->param.num_nodes);
+    this->split_types_.resize(this->param.num_nodes);
     std::fill(split_types_.begin(), split_types_.end(), FeatureType::kNumerical);
   }
 
@@ -1144,16 +1133,26 @@ void RegTree::LoadModel(Json const& in) {
 }
 
 void RegTree::SaveModel(Json* p_out) const {
+  auto& out = *p_out;
+  // basic properties
+  out["tree_param"] = ToJson(param);
+  // categorical splits
+  this->SaveCategoricalSplit(p_out);
+  // multi-target
+  if (this->IsMultiTarget()) {
+    CHECK_GT(param.size_leaf_vector, 1);
+    this->GetMultiTargetTree()->SaveModel(p_out);
+    return;
+  }
   /*  Here we are treating leaf node and internal node equally.  Some information like
    *  child node id doesn't make sense for leaf node but we will have to save them to
    *  avoid creating a huge map.  One difficulty is XGBoost has deleted node created by
    *  pruner, and this pruner can be used inside another updater so leaf are not necessary
    *  at the end of node array.
    */
-  auto& out = *p_out;
   CHECK_EQ(param.num_nodes, static_cast<int>(nodes_.size()));
   CHECK_EQ(param.num_nodes, static_cast<int>(stats_.size()));
-  out["tree_param"] = ToJson(param);
+
   CHECK_EQ(get<String>(out["tree_param"]["num_nodes"]), std::to_string(param.num_nodes));
   auto n_nodes = param.num_nodes;
 
@@ -1167,12 +1166,12 @@ void RegTree::SaveModel(Json* p_out) const {
   I32Array rights(n_nodes);
   I32Array parents(n_nodes);
 
-
   F32Array conds(n_nodes);
   U8Array default_left(n_nodes);
-  U8Array split_type(n_nodes);
   CHECK_EQ(this->split_types_.size(), param.num_nodes);
 
+  namespace tf = tree_field;
+
   auto save_tree = [&](auto* p_indices_array) {
     auto& indices_array = *p_indices_array;
     for (bst_node_t i = 0; i < n_nodes; ++i) {
@@ -1188,33 +1187,28 @@ void RegTree::SaveModel(Json* p_out) const {
       indices_array.Set(i, n.SplitIndex());
       conds.Set(i, n.SplitCond());
       default_left.Set(i, static_cast<uint8_t>(!!n.DefaultLeft()));
-
-      split_type.Set(i, static_cast<uint8_t>(this->NodeSplitType(i)));
     }
   };
   if (this->param.num_feature > static_cast<bst_feature_t>(std::numeric_limits<int32_t>::max())) {
     I64Array indices_64(n_nodes);
     save_tree(&indices_64);
-    out["split_indices"] = std::move(indices_64);
+    out[tf::kSplitIdx] = std::move(indices_64);
   } else {
     I32Array indices_32(n_nodes);
     save_tree(&indices_32);
-    out["split_indices"] = std::move(indices_32);
+    out[tf::kSplitIdx] = std::move(indices_32);
   }
 
-  this->SaveCategoricalSplit(&out);
+  out[tf::kLossChg] = std::move(loss_changes);
+  out[tf::kSumHess] = std::move(sum_hessian);
+  out[tf::kBaseWeight] = std::move(base_weights);
 
-  out["split_type"] = std::move(split_type);
-  out["loss_changes"] = std::move(loss_changes);
-  out["sum_hessian"] = std::move(sum_hessian);
-  out["base_weights"] = std::move(base_weights);
+  out[tf::kLeft] = std::move(lefts);
+  out[tf::kRight] = std::move(rights);
+  out[tf::kParent] = std::move(parents);
 
-  out["left_children"] = std::move(lefts);
-  out["right_children"] = std::move(rights);
-  out["parents"] = std::move(parents);
-
-  out["split_conditions"] = std::move(conds);
-  out["default_left"] = std::move(default_left);
+  out[tf::kSplitCond] = std::move(conds);
+  out[tf::kDftLeft] = std::move(default_left);
 }
 
 void RegTree::CalculateContributionsApprox(const RegTree::FVec &feat,
diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu
index 32b3f4a03..607aa8dc4 100644
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@@ -445,7 +445,7 @@ struct GPUHistMakerDevice {
 
     dh::caching_device_vector<FeatureType> d_split_types;
     dh::caching_device_vector<uint32_t> d_categories;
-    dh::caching_device_vector<RegTree::Segment> d_categories_segments;
+    dh::caching_device_vector<RegTree::CategoricalSplitMatrix::Segment> d_categories_segments;
 
     if (!categories.empty()) {
       dh::CopyToD(h_split_types, &d_split_types);
@@ -458,12 +458,11 @@ struct GPUHistMakerDevice {
                            p_out_position);
   }
 
-  void FinalisePositionInPage(EllpackPageImpl const *page,
-                              const common::Span<RegTree::Node> d_nodes,
-                              common::Span<FeatureType const> d_feature_types,
-                              common::Span<uint32_t const> categories,
-                              common::Span<RegTree::Segment> categories_segments,
-                              HostDeviceVector<bst_node_t>* p_out_position) {
+  void FinalisePositionInPage(
+      EllpackPageImpl const* page, const common::Span<RegTree::Node> d_nodes,
+      common::Span<FeatureType const> d_feature_types, common::Span<uint32_t const> categories,
+      common::Span<RegTree::CategoricalSplitMatrix::Segment> categories_segments,
+      HostDeviceVector<bst_node_t>* p_out_position) {
     auto d_matrix = page->GetDeviceAccessor(ctx_->gpu_id);
     auto d_gpair = this->gpair;
     p_out_position->SetDevice(ctx_->gpu_id);
diff --git a/tests/cpp/tree/test_multi_target_tree_model.cc b/tests/cpp/tree/test_multi_target_tree_model.cc
new file mode 100644
index 000000000..7d2bd9c7c
--- /dev/null
+++ b/tests/cpp/tree/test_multi_target_tree_model.cc
@@ -0,0 +1,48 @@
+/**
+ * Copyright 2023 by XGBoost Contributors
+ */
+#include <gtest/gtest.h>
+#include <xgboost/context.h>     // for Context
+#include <xgboost/multi_target_tree_model.h>
+#include <xgboost/tree_model.h>  // for RegTree
+
+namespace xgboost {
+TEST(MultiTargetTree, JsonIO) {
+  bst_target_t n_targets{3};
+  bst_feature_t n_features{4};
+  RegTree tree{n_targets, n_features};
+  ASSERT_TRUE(tree.IsMultiTarget());
+  linalg::Vector<float> base_weight{{1.0f, 2.0f, 3.0f}, {3ul}, Context::kCpuId};
+  linalg::Vector<float> left_weight{{2.0f, 3.0f, 4.0f}, {3ul}, Context::kCpuId};
+  linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, Context::kCpuId};
+  tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),
+                  left_weight.HostView(), right_weight.HostView());
+  ASSERT_EQ(tree.param.num_nodes, 3);
+  ASSERT_EQ(tree.param.size_leaf_vector, 3);
+  ASSERT_EQ(tree.GetMultiTargetTree()->Size(), 3);
+  ASSERT_EQ(tree.Size(), 3);
+
+  Json jtree{Object{}};
+  tree.SaveModel(&jtree);
+
+  auto check_jtree = [](Json jtree, RegTree const& tree) {
+    ASSERT_EQ(get<String const>(jtree["tree_param"]["num_nodes"]),
+              std::to_string(tree.param.num_nodes));
+    ASSERT_EQ(get<F32Array const>(jtree["base_weights"]).size(),
+              tree.param.num_nodes * tree.param.size_leaf_vector);
+    ASSERT_EQ(get<I32Array const>(jtree["parents"]).size(), tree.param.num_nodes);
+    ASSERT_EQ(get<I32Array const>(jtree["left_children"]).size(), tree.param.num_nodes);
+    ASSERT_EQ(get<I32Array const>(jtree["right_children"]).size(), tree.param.num_nodes);
+  };
+  check_jtree(jtree, tree);
+
+  RegTree loaded;
+  loaded.LoadModel(jtree);
+  ASSERT_TRUE(loaded.IsMultiTarget());
+  ASSERT_EQ(loaded.param.num_nodes, 3);
+
+  Json jtree1{Object{}};
+  loaded.SaveModel(&jtree1);
+  check_jtree(jtree1, tree);
+}
+}  // namespace xgboost
diff --git a/tests/cpp/tree/test_tree_model.cc b/tests/cpp/tree/test_tree_model.cc
index 65957255b..130a0ef70 100644
--- a/tests/cpp/tree/test_tree_model.cc
+++ b/tests/cpp/tree/test_tree_model.cc
@@ -477,7 +477,7 @@ TEST(Tree, JsonIO) {
   auto tparam = j_tree["tree_param"];
   ASSERT_EQ(get<String>(tparam["num_feature"]), "0");
   ASSERT_EQ(get<String>(tparam["num_nodes"]), "3");
-  ASSERT_EQ(get<String>(tparam["size_leaf_vector"]), "0");
+  ASSERT_EQ(get<String>(tparam["size_leaf_vector"]), "1");
 
   ASSERT_EQ(get<I32Array const>(j_tree["left_children"]).size(), 3ul);
   ASSERT_EQ(get<I32Array const>(j_tree["right_children"]).size(), 3ul);

From c5c8f643f260a0d5cc09efc1070b51efb099c66b Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Fri, 10 Mar 2023 11:43:02 +0800
Subject: [PATCH 6/9] Remove the cub submodule. (#8888)

XGBoost now uses CTK-11.8 for binary packages, there's no need to maintain a cub
submodule anymore.
---
 .gitmodules                            | 3 ---
 CMakeLists.txt                         | 8 --------
 cmake/Utils.cmake                      | 9 ++-------
 cub                                    | 1 -
 doc/build.rst                          | 3 +--
 plugin/federated/README.md             | 1 -
 tests/buildkite/build-cuda-with-rmm.sh | 2 +-
 7 files changed, 4 insertions(+), 23 deletions(-)
 delete mode 160000 cub

diff --git a/.gitmodules b/.gitmodules
index 1f52fff57..01ecae42e 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -2,9 +2,6 @@
 	path = dmlc-core
 	url = https://github.com/dmlc/dmlc-core
 	branch = main
-[submodule "cub"]
-	path = cub
-	url = https://github.com/NVlabs/cub
 [submodule "gputreeshap"]
 	path = gputreeshap
 	url = https://github.com/rapidsai/gputreeshap.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 17dfe285f..e5a61c60b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -51,7 +51,6 @@ option(HIDE_CXX_SYMBOLS "Build shared library and hide all C++ symbols" OFF)
 option(USE_CUDA  "Build with GPU acceleration" OFF)
 option(USE_NCCL  "Build with NCCL to enable distributed GPU support." OFF)
 option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
-option(BUILD_WITH_CUDA_CUB "Build with cub in CUDA installation" OFF)
 set(GPU_COMPUTE_VER "" CACHE STRING
   "Semicolon separated list of compute versions to be built against, e.g. '35;61'")
 ## Copied From dmlc
@@ -115,9 +114,6 @@ endif (ENABLE_ALL_WARNINGS)
 if (BUILD_STATIC_LIB AND (R_LIB OR JVM_BINDINGS))
   message(SEND_ERROR "Cannot build a static library libxgboost.a when R or JVM packages are enabled.")
 endif (BUILD_STATIC_LIB AND (R_LIB OR JVM_BINDINGS))
-if (PLUGIN_RMM AND (NOT BUILD_WITH_CUDA_CUB))
-  message(SEND_ERROR "Cannot build with RMM using cub submodule.")
-endif (PLUGIN_RMM AND (NOT BUILD_WITH_CUDA_CUB))
 if (PLUGIN_FEDERATED)
   if (CMAKE_CROSSCOMPILING)
     message(SEND_ERROR "Cannot cross compile with federated learning support")
@@ -152,10 +148,6 @@ if (USE_CUDA)
   set(GEN_CODE "")
   format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE)
   add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)
-
-  if ((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 11.4) AND (NOT BUILD_WITH_CUDA_CUB))
-    set(BUILD_WITH_CUDA_CUB ON)
-  endif ()
 endif (USE_CUDA)
 
 if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
index 3a66735fe..57a45ca42 100644
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@@ -165,13 +165,8 @@ function(xgboost_set_cuda_flags target)
     enable_nvtx(${target})
   endif (USE_NVTX)
 
-  if (NOT BUILD_WITH_CUDA_CUB)
-    target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1 -DTHRUST_IGNORE_CUB_VERSION_CHECK=1)
-    target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/cub/ ${xgboost_SOURCE_DIR}/gputreeshap)
-  else ()
-    target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1)
-    target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/gputreeshap)
-  endif (NOT BUILD_WITH_CUDA_CUB)
+  target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1)
+  target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/gputreeshap)
 
   if (MSVC)
     target_compile_options(${target} PRIVATE
diff --git a/cub b/cub
deleted file mode 160000
index af39ee264..000000000
--- a/cub
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit af39ee264f4627608072bf54730bf3a862e56875
diff --git a/doc/build.rst b/doc/build.rst
index 7abd8dbbd..53d9a3209 100644
--- a/doc/build.rst
+++ b/doc/build.rst
@@ -128,8 +128,7 @@ From the command line on Linux starting from the XGBoost directory:
 
   mkdir build
   cd build
-  # For CUDA toolkit >= 11.4, `BUILD_WITH_CUDA_CUB` is required.
-  cmake .. -DUSE_CUDA=ON -DBUILD_WITH_CUDA_CUB=ON
+  cmake .. -DUSE_CUDA=ON
   make -j4
 
 .. note:: Specifying compute capability
diff --git a/plugin/federated/README.md b/plugin/federated/README.md
index 061cb7714..d83db6be1 100644
--- a/plugin/federated/README.md
+++ b/plugin/federated/README.md
@@ -15,7 +15,6 @@ mkdir build
 cd build
 cmake .. -GNinja \
  -DPLUGIN_FEDERATED=ON \
- -DBUILD_WITH_CUDA_CUB=ON \
  -DUSE_CUDA=ON\
  -DUSE_NCCL=ON
 ninja
diff --git a/tests/buildkite/build-cuda-with-rmm.sh b/tests/buildkite/build-cuda-with-rmm.sh
index f474f318b..2e0b9fe2c 100755
--- a/tests/buildkite/build-cuda-with-rmm.sh
+++ b/tests/buildkite/build-cuda-with-rmm.sh
@@ -20,7 +20,7 @@ command_wrapper="tests/ci_build/ci_build.sh rmm docker --build-arg "`
 
 echo "--- Build libxgboost from the source"
 $command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=gpu_test -DUSE_CUDA=ON \
-  -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DBUILD_WITH_CUDA_CUB=ON ${arch_flag}
+  -DUSE_NCCL=ON -DPLUGIN_RMM=ON ${arch_flag}
 
 echo "-- Stash C++ test executable (testxgboost)"
 buildkite-agent artifact upload build/testxgboost

From 54e001bbf45853e1755a6bc51c5b2279de230121 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Fri, 10 Mar 2023 12:03:24 +0800
Subject: [PATCH 7/9] [doc][dask] Reference examples from coiled. [skip ci]
 (#8891)

---
 doc/tutorials/dask.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/doc/tutorials/dask.rst b/doc/tutorials/dask.rst
index e35c0b24c..c010aa0e2 100644
--- a/doc/tutorials/dask.rst
+++ b/doc/tutorials/dask.rst
@@ -474,6 +474,14 @@ interface, including callback functions, custom evaluation metric and objective:
         callbacks=[early_stop],
     )
 
+**********************
+Hyper-parameter tuning
+**********************
+
+See https://github.com/coiled/dask-xgboost-nyctaxi for a set of examples of using XGBoost
+with dask and optuna.
+
+
 .. _tracker-ip:
 
 ***************

From 6deaec80271fc19471b305c4a64fcb1de7d84f40 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Sat, 11 Mar 2023 01:38:28 +0800
Subject: [PATCH 8/9] Pass obj info by reference instead of by value. (#8889)

- Pass obj info into tree updater as const pointer.

This way we don't have to initialize the learner model param before configuring gbm, hence
breaking up the dependency of configurations.
---
 include/xgboost/tree_updater.h          | 42 ++++++++++++-------------
 src/gbm/gbtree.cc                       |  4 +--
 src/tree/tree_updater.cc                | 19 +++++------
 src/tree/updater_approx.cc              | 27 +++++++++-------
 src/tree/updater_colmaker.cc            |  2 +-
 src/tree/updater_gpu_hist.cu            | 36 ++++++++++-----------
 src/tree/updater_prune.cc               |  6 ++--
 src/tree/updater_quantile_hist.cc       |  6 ++--
 src/tree/updater_quantile_hist.h        |  5 +--
 src/tree/updater_refresh.cc             |  2 +-
 src/tree/updater_sync.cc                |  4 +--
 tests/cpp/tree/test_gpu_hist.cu         | 11 ++++---
 tests/cpp/tree/test_histmaker.cc        | 15 +++++----
 tests/cpp/tree/test_node_partition.cc   | 13 +++++---
 tests/cpp/tree/test_prediction_cache.cc |  5 +--
 tests/cpp/tree/test_prune.cc            |  4 +--
 tests/cpp/tree/test_refresh.cc          | 16 +++++-----
 tests/cpp/tree/test_tree_stat.cc        | 20 +++++++-----
 18 files changed, 125 insertions(+), 112 deletions(-)

diff --git a/include/xgboost/tree_updater.h b/include/xgboost/tree_updater.h
index 59f4c2cf8..02248ed8c 100644
--- a/include/xgboost/tree_updater.h
+++ b/include/xgboost/tree_updater.h
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2014-2022 by XGBoost Contributors
+/**
+ * Copyright 2014-2023 by XGBoost Contributors
  * \file tree_updater.h
  * \brief General primitive for tree learning,
  *   Updating a collection of trees given the information.
@@ -9,19 +9,17 @@
 #define XGBOOST_TREE_UPDATER_H_
 
 #include <dmlc/registry.h>
-#include <xgboost/base.h>
-#include <xgboost/context.h>
-#include <xgboost/data.h>
-#include <xgboost/host_device_vector.h>
-#include <xgboost/linalg.h>
-#include <xgboost/model.h>
-#include <xgboost/task.h>
-#include <xgboost/tree_model.h>
+#include <xgboost/base.h>                // for Args, GradientPair
+#include <xgboost/data.h>                // DMatrix
+#include <xgboost/host_device_vector.h>  // for HostDeviceVector
+#include <xgboost/linalg.h>              // for VectorView
+#include <xgboost/model.h>               // for Configurable
+#include <xgboost/span.h>                // for Span
+#include <xgboost/tree_model.h>          // for RegTree
 
-#include <functional>
-#include <string>
-#include <utility>
-#include <vector>
+#include <functional>                    // for function
+#include <string>                        // for string
+#include <vector>                        // for vector
 
 namespace xgboost {
 namespace tree {
@@ -30,8 +28,9 @@ struct TrainParam;
 
 class Json;
 struct Context;
+struct ObjInfo;
 
-/*!
+/**
  * \brief interface of tree update module, that performs update of a tree.
  */
 class TreeUpdater : public Configurable {
@@ -53,12 +52,12 @@ class TreeUpdater : public Configurable {
    *  used for modifying existing trees (like `prune`).  Return true if it can modify
    *  existing trees.
    */
-  virtual bool CanModifyTree() const { return false; }
+  [[nodiscard]] virtual bool CanModifyTree() const { return false; }
   /*!
    * \brief Wether the out_position in `Update` is valid. This determines whether adaptive
    *        tree can be used.
    */
-  virtual bool HasNodePosition() const { return false; }
+  [[nodiscard]] virtual bool HasNodePosition() const { return false; }
   /**
    * \brief perform update to the tree models
    *
@@ -91,14 +90,15 @@ class TreeUpdater : public Configurable {
     return false;
   }
 
-  virtual char const* Name() const = 0;
+  [[nodiscard]] virtual char const* Name() const = 0;
 
-  /*!
+  /**
    * \brief Create a tree updater given name
    * \param name Name of the tree updater.
    * \param ctx A global runtime parameter
+   * \param task Infomation about the objective.
    */
-  static TreeUpdater* Create(const std::string& name, Context const* ctx, ObjInfo task);
+  static TreeUpdater* Create(const std::string& name, Context const* ctx, ObjInfo const* task);
 };
 
 /*!
@@ -106,7 +106,7 @@ class TreeUpdater : public Configurable {
  */
 struct TreeUpdaterReg
     : public dmlc::FunctionRegEntryBase<
-          TreeUpdaterReg, std::function<TreeUpdater*(Context const* ctx, ObjInfo task)>> {};
+          TreeUpdaterReg, std::function<TreeUpdater*(Context const* ctx, ObjInfo const* task)>> {};
 
 /*!
  * \brief Macro to register tree updater.
diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc
index 39f38c289..c1cb825c1 100644
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -340,7 +340,7 @@ void GBTree::InitUpdater(Args const& cfg) {
   // create new updaters
   for (const std::string& pstr : ups) {
     std::unique_ptr<TreeUpdater> up(
-        TreeUpdater::Create(pstr.c_str(), ctx_, model_.learner_model_param->task));
+        TreeUpdater::Create(pstr.c_str(), ctx_, &model_.learner_model_param->task));
     up->Configure(cfg);
     updaters_.push_back(std::move(up));
   }
@@ -448,7 +448,7 @@ void GBTree::LoadConfig(Json const& in) {
       LOG(WARNING) << "Changing updater from `grow_gpu_hist` to `grow_quantile_histmaker`.";
     }
     std::unique_ptr<TreeUpdater> up{
-        TreeUpdater::Create(name, ctx_, model_.learner_model_param->task)};
+        TreeUpdater::Create(name, ctx_, &model_.learner_model_param->task)};
     up->LoadConfig(kv.second);
     updaters_.push_back(std::move(up));
   }
diff --git a/src/tree/tree_updater.cc b/src/tree/tree_updater.cc
index 286daa4d8..a1d657b82 100644
--- a/src/tree/tree_updater.cc
+++ b/src/tree/tree_updater.cc
@@ -1,20 +1,20 @@
-/*!
- * Copyright 2015-2022 by XGBoost Contributors
+/**
+ * Copyright 2015-2023 by XGBoost Contributors
  * \file tree_updater.cc
  * \brief Registry of tree updaters.
  */
+#include "xgboost/tree_updater.h"
+
 #include <dmlc/registry.h>
 
-#include "xgboost/tree_updater.h"
-#include "xgboost/host_device_vector.h"
+#include <string>  // for string
 
 namespace dmlc {
 DMLC_REGISTRY_ENABLE(::xgboost::TreeUpdaterReg);
 }  // namespace dmlc
 
 namespace xgboost {
-
-TreeUpdater* TreeUpdater::Create(const std::string& name, Context const* ctx, ObjInfo task) {
+TreeUpdater* TreeUpdater::Create(const std::string& name, Context const* ctx, ObjInfo const* task) {
   auto* e = ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->Find(name);
   if (e == nullptr) {
     LOG(FATAL) << "Unknown tree updater " << name;
@@ -22,11 +22,9 @@ TreeUpdater* TreeUpdater::Create(const std::string& name, Context const* ctx, Ob
   auto p_updater = (e->body)(ctx, task);
   return p_updater;
 }
-
 }  // namespace xgboost
 
-namespace xgboost {
-namespace tree {
+namespace xgboost::tree {
 // List of files that will be force linked in static links.
 DMLC_REGISTRY_LINK_TAG(updater_colmaker);
 DMLC_REGISTRY_LINK_TAG(updater_refresh);
@@ -37,5 +35,4 @@ DMLC_REGISTRY_LINK_TAG(updater_sync);
 #ifdef XGBOOST_USE_CUDA
 DMLC_REGISTRY_LINK_TAG(updater_gpu_hist);
 #endif  // XGBOOST_USE_CUDA
-}  // namespace tree
-}  // namespace xgboost
+}  // namespace xgboost::tree
diff --git a/src/tree/updater_approx.cc b/src/tree/updater_approx.cc
index 2bc3ff543..5af2721a6 100644
--- a/src/tree/updater_approx.cc
+++ b/src/tree/updater_approx.cc
@@ -14,14 +14,15 @@
 #include "driver.h"
 #include "hist/evaluate_splits.h"
 #include "hist/histogram.h"
-#include "hist/sampler.h"  // SampleGradient
+#include "hist/sampler.h"  // for SampleGradient
 #include "param.h"
 #include "xgboost/base.h"
 #include "xgboost/data.h"
 #include "xgboost/json.h"
 #include "xgboost/linalg.h"
+#include "xgboost/task.h"          // for ObjInfo
 #include "xgboost/tree_model.h"
-#include "xgboost/tree_updater.h"
+#include "xgboost/tree_updater.h"  // for TreeUpdater
 
 namespace xgboost::tree {
 
@@ -40,12 +41,12 @@ auto BatchSpec(TrainParam const &p, common::Span<float> hess) {
 
 class GloablApproxBuilder {
  protected:
-  TrainParam const* param_;
+  TrainParam const *param_;
   std::shared_ptr<common::ColumnSampler> col_sampler_;
   HistEvaluator<CPUExpandEntry> evaluator_;
   HistogramBuilder<CPUExpandEntry> histogram_builder_;
   Context const *ctx_;
-  ObjInfo const task_;
+  ObjInfo const *const task_;
 
   std::vector<CommonRowPartitioner> partitioner_;
   // Pointer to last updated tree, used for update prediction cache.
@@ -63,7 +64,8 @@ class GloablApproxBuilder {
     bst_bin_t n_total_bins = 0;
     partitioner_.clear();
     // Generating the GHistIndexMatrix is quite slow, is there a way to speed it up?
-    for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess, task_))) {
+    for (auto const &page :
+         p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess, *task_))) {
       if (n_total_bins == 0) {
         n_total_bins = page.cut.TotalBins();
         feature_values_ = page.cut;
@@ -157,7 +159,7 @@ class GloablApproxBuilder {
   void LeafPartition(RegTree const &tree, common::Span<float const> hess,
                      std::vector<bst_node_t> *p_out_position) {
     monitor_->Start(__func__);
-    if (!task_.UpdateTreeLeaf()) {
+    if (!task_->UpdateTreeLeaf()) {
       return;
     }
     for (auto const &part : partitioner_) {
@@ -168,8 +170,8 @@ class GloablApproxBuilder {
 
  public:
   explicit GloablApproxBuilder(TrainParam const *param, MetaInfo const &info, Context const *ctx,
-                               std::shared_ptr<common::ColumnSampler> column_sampler, ObjInfo task,
-                               common::Monitor *monitor)
+                               std::shared_ptr<common::ColumnSampler> column_sampler,
+                               ObjInfo const *task, common::Monitor *monitor)
       : param_{param},
         col_sampler_{std::move(column_sampler)},
         evaluator_{ctx, param_, info, col_sampler_},
@@ -256,10 +258,11 @@ class GlobalApproxUpdater : public TreeUpdater {
   DMatrix *cached_{nullptr};
   std::shared_ptr<common::ColumnSampler> column_sampler_ =
       std::make_shared<common::ColumnSampler>();
-  ObjInfo task_;
+  ObjInfo const *task_;
 
  public:
-  explicit GlobalApproxUpdater(Context const *ctx, ObjInfo task) : TreeUpdater(ctx), task_{task} {
+  explicit GlobalApproxUpdater(Context const *ctx, ObjInfo const *task)
+      : TreeUpdater(ctx), task_{task} {
     monitor_.Init(__func__);
   }
 
@@ -317,5 +320,7 @@ XGBOOST_REGISTER_TREE_UPDATER(GlobalHistMaker, "grow_histmaker")
     .describe(
         "Tree constructor that uses approximate histogram construction "
         "for each node.")
-    .set_body([](Context const *ctx, ObjInfo task) { return new GlobalApproxUpdater(ctx, task); });
+    .set_body([](Context const *ctx, ObjInfo const *task) {
+      return new GlobalApproxUpdater(ctx, task);
+    });
 }  // namespace xgboost::tree
diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc
index 070bfe578..06579c429 100644
--- a/src/tree/updater_colmaker.cc
+++ b/src/tree/updater_colmaker.cc
@@ -603,5 +603,5 @@ class ColMaker: public TreeUpdater {
 
 XGBOOST_REGISTER_TREE_UPDATER(ColMaker, "grow_colmaker")
     .describe("Grow tree with parallelization over columns.")
-    .set_body([](Context const *ctx, ObjInfo) { return new ColMaker(ctx); });
+    .set_body([](Context const *ctx, auto) { return new ColMaker(ctx); });
 }  // namespace xgboost::tree
diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu
index 607aa8dc4..54ff7ea1a 100644
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@@ -15,12 +15,12 @@
 #include "../collective/device_communicator.cuh"
 #include "../common/bitfield.h"
 #include "../common/categorical.h"
+#include "../common/cuda_context.cuh"  // CUDAContext
 #include "../common/device_helpers.cuh"
 #include "../common/hist_util.h"
 #include "../common/io.h"
 #include "../common/timer.h"
 #include "../data/ellpack_page.cuh"
-#include "../common/cuda_context.cuh"  // CUDAContext
 #include "constraints.cuh"
 #include "driver.h"
 #include "gpu_hist/evaluate_splits.cuh"
@@ -39,11 +39,10 @@
 #include "xgboost/json.h"
 #include "xgboost/parameter.h"
 #include "xgboost/span.h"
-#include "xgboost/task.h"
+#include "xgboost/task.h"  // for ObjInfo
 #include "xgboost/tree_model.h"
 
-namespace xgboost {
-namespace tree {
+namespace xgboost::tree {
 #if !defined(GTEST_TEST)
 DMLC_REGISTRY_FILE_TAG(updater_gpu_hist);
 #endif  // !defined(GTEST_TEST)
@@ -106,12 +105,12 @@ class DeviceHistogramStorage {
     nidx_map_.clear();
     overflow_nidx_map_.clear();
   }
-  bool HistogramExists(int nidx) const {
+  [[nodiscard]] bool HistogramExists(int nidx) const {
     return nidx_map_.find(nidx) != nidx_map_.cend() ||
            overflow_nidx_map_.find(nidx) != overflow_nidx_map_.cend();
   }
-  int Bins() const { return n_bins_; }
-  size_t HistogramSize() const { return n_bins_ * kNumItemsInGradientSum; }
+  [[nodiscard]] int Bins() const { return n_bins_; }
+  [[nodiscard]] size_t HistogramSize() const { return n_bins_ * kNumItemsInGradientSum; }
   dh::device_vector<typename GradientSumT::ValueT>& Data() { return data_; }
 
   void AllocateHistograms(const std::vector<int>& new_nidxs) {
@@ -690,8 +689,9 @@ struct GPUHistMakerDevice {
     return root_entry;
   }
 
-  void UpdateTree(HostDeviceVector<GradientPair>* gpair_all, DMatrix* p_fmat, ObjInfo task,
-                  RegTree* p_tree, collective::DeviceCommunicator* communicator,
+  void UpdateTree(HostDeviceVector<GradientPair>* gpair_all, DMatrix* p_fmat,
+                  ObjInfo const* task, RegTree* p_tree,
+                  collective::DeviceCommunicator* communicator,
                   HostDeviceVector<bst_node_t>* p_out_position) {
     auto& tree = *p_tree;
     // Process maximum 32 nodes at a time
@@ -741,7 +741,7 @@ struct GPUHistMakerDevice {
     }
 
     monitor.Start("FinalisePosition");
-    this->FinalisePosition(p_tree, p_fmat, task, p_out_position);
+    this->FinalisePosition(p_tree, p_fmat, *task, p_out_position);
     monitor.Stop("FinalisePosition");
   }
 };
@@ -750,7 +750,7 @@ class GPUHistMaker : public TreeUpdater {
   using GradientSumT = GradientPairPrecise;
 
  public:
-  explicit GPUHistMaker(Context const* ctx, ObjInfo task)
+  explicit GPUHistMaker(Context const* ctx, ObjInfo const* task)
       : TreeUpdater(ctx), task_{task} {};
   void Configure(const Args& args) override {
     // Used in test to count how many configurations are performed
@@ -872,8 +872,8 @@ class GPUHistMaker : public TreeUpdater {
 
   std::unique_ptr<GPUHistMakerDevice<GradientSumT>> maker;  // NOLINT
 
-  char const* Name() const override { return "grow_gpu_hist"; }
-  bool HasNodePosition() const override { return true; }
+  [[nodiscard]] char const* Name() const override { return "grow_gpu_hist"; }
+  [[nodiscard]] bool HasNodePosition() const override { return true; }
 
  private:
   bool initialised_{false};
@@ -882,7 +882,7 @@ class GPUHistMaker : public TreeUpdater {
 
   DMatrix* p_last_fmat_{nullptr};
   RegTree const* p_last_tree_{nullptr};
-  ObjInfo task_;
+  ObjInfo const* task_{nullptr};
 
   common::Monitor monitor_;
 };
@@ -890,8 +890,8 @@ class GPUHistMaker : public TreeUpdater {
 #if !defined(GTEST_TEST)
 XGBOOST_REGISTER_TREE_UPDATER(GPUHistMaker, "grow_gpu_hist")
     .describe("Grow tree with GPU.")
-    .set_body([](Context const* ctx, ObjInfo task) { return new GPUHistMaker(ctx, task); });
+    .set_body([](Context const* ctx, ObjInfo const* task) {
+      return new GPUHistMaker(ctx, task);
+    });
 #endif  // !defined(GTEST_TEST)
-
-}  // namespace tree
-}  // namespace xgboost
+}  // namespace xgboost::tree
diff --git a/src/tree/updater_prune.cc b/src/tree/updater_prune.cc
index c591ce454..0970d2f79 100644
--- a/src/tree/updater_prune.cc
+++ b/src/tree/updater_prune.cc
@@ -18,7 +18,7 @@ DMLC_REGISTRY_FILE_TAG(updater_prune);
 /*! \brief pruner that prunes a tree after growing finishes */
 class TreePruner : public TreeUpdater {
  public:
-  explicit TreePruner(Context const* ctx, ObjInfo task) : TreeUpdater(ctx) {
+  explicit TreePruner(Context const* ctx, ObjInfo const* task) : TreeUpdater(ctx) {
     syncher_.reset(TreeUpdater::Create("sync", ctx_, task));
     pruner_monitor_.Init("TreePruner");
   }
@@ -90,5 +90,7 @@ class TreePruner : public TreeUpdater {
 
 XGBOOST_REGISTER_TREE_UPDATER(TreePruner, "prune")
     .describe("Pruner that prune the tree according to statistics.")
-    .set_body([](Context const* ctx, ObjInfo task) { return new TreePruner(ctx, task); });
+    .set_body([](Context const* ctx, ObjInfo const* task) {
+      return new TreePruner{ctx, task};
+    });
 }  // namespace xgboost::tree
diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc
index 1929efb28..76c402ff5 100644
--- a/src/tree/updater_quantile_hist.cc
+++ b/src/tree/updater_quantile_hist.cc
@@ -35,7 +35,7 @@ void QuantileHistMaker::Update(TrainParam const *param, HostDeviceVector<Gradien
   // build tree
   const size_t n_trees = trees.size();
   if (!pimpl_) {
-    pimpl_.reset(new Builder(n_trees, param, dmat, task_, ctx_));
+    pimpl_.reset(new Builder(n_trees, param, dmat, *task_, ctx_));
   }
 
   size_t t_idx{0};
@@ -287,6 +287,8 @@ void QuantileHistMaker::Builder::InitData(DMatrix *fmat, const RegTree &tree,
 
 XGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, "grow_quantile_histmaker")
     .describe("Grow tree using quantized histogram.")
-    .set_body([](Context const *ctx, ObjInfo task) { return new QuantileHistMaker(ctx, task); });
+    .set_body([](Context const *ctx, ObjInfo const *task) {
+      return new QuantileHistMaker(ctx, task);
+    });
 }  // namespace tree
 }  // namespace xgboost
diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h
index f2e562691..138d5646a 100644
--- a/src/tree/updater_quantile_hist.h
+++ b/src/tree/updater_quantile_hist.h
@@ -43,7 +43,8 @@ inline BatchParam HistBatch(TrainParam const* param) {
 /*! \brief construct a tree using quantized feature values */
 class QuantileHistMaker: public TreeUpdater {
  public:
-  explicit QuantileHistMaker(Context const* ctx, ObjInfo task) : TreeUpdater(ctx), task_{task} {}
+  explicit QuantileHistMaker(Context const* ctx, ObjInfo const* task)
+      : TreeUpdater(ctx), task_{task} {}
   void Configure(const Args&) override {}
 
   void Update(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
@@ -125,7 +126,7 @@ class QuantileHistMaker: public TreeUpdater {
 
  protected:
   std::unique_ptr<Builder> pimpl_;
-  ObjInfo task_;
+  ObjInfo const* task_;
 };
 }  // namespace xgboost::tree
 
diff --git a/src/tree/updater_refresh.cc b/src/tree/updater_refresh.cc
index ebda2a999..4bfe603e0 100644
--- a/src/tree/updater_refresh.cc
+++ b/src/tree/updater_refresh.cc
@@ -142,5 +142,5 @@ class TreeRefresher : public TreeUpdater {
 
 XGBOOST_REGISTER_TREE_UPDATER(TreeRefresher, "refresh")
     .describe("Refresher that refreshes the weight and statistics according to data.")
-    .set_body([](Context const *ctx, ObjInfo) { return new TreeRefresher(ctx); });
+    .set_body([](Context const *ctx, auto) { return new TreeRefresher(ctx); });
 }  // namespace xgboost::tree
diff --git a/src/tree/updater_sync.cc b/src/tree/updater_sync.cc
index bb28bc4e6..2422807e2 100644
--- a/src/tree/updater_sync.cc
+++ b/src/tree/updater_sync.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2014-2013 by XBGoost Contributors
+ * Copyright 2014-2023 by XBGoost Contributors
  * \file updater_sync.cc
  * \brief synchronize the tree in all distributed nodes
  */
@@ -53,5 +53,5 @@ class TreeSyncher : public TreeUpdater {
 
 XGBOOST_REGISTER_TREE_UPDATER(TreeSyncher, "sync")
     .describe("Syncher that synchronize the tree in all distributed nodes.")
-    .set_body([](Context const* ctx, ObjInfo) { return new TreeSyncher(ctx); });
+    .set_body([](Context const* ctx, auto) { return new TreeSyncher(ctx); });
 }  // namespace xgboost::tree
diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu
index e828d1379..ed21230ed 100644
--- a/tests/cpp/tree/test_gpu_hist.cu
+++ b/tests/cpp/tree/test_gpu_hist.cu
@@ -170,8 +170,8 @@ void TestHistogramIndexImpl() {
 
   // Build 2 matrices and build a histogram maker with that
   Context ctx(CreateEmptyGenericParam(0));
-  tree::GPUHistMaker hist_maker{&ctx, ObjInfo{ObjInfo::kRegression}},
-      hist_maker_ext{&ctx, ObjInfo{ObjInfo::kRegression}};
+  ObjInfo task{ObjInfo::kRegression};
+  tree::GPUHistMaker hist_maker{&ctx, &task}, hist_maker_ext{&ctx, &task};
   std::unique_ptr<DMatrix> hist_maker_dmat(
     CreateSparsePageDMatrixWithRC(kNRows, kNCols, 0, true));
 
@@ -240,7 +240,8 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
   param.UpdateAllowUnknown(args);
 
   Context ctx(CreateEmptyGenericParam(0));
-  tree::GPUHistMaker hist_maker{&ctx,ObjInfo{ObjInfo::kRegression}};
+  ObjInfo task{ObjInfo::kRegression};
+  tree::GPUHistMaker hist_maker{&ctx, &task};
 
   std::vector<HostDeviceVector<bst_node_t>> position(1);
   hist_maker.Update(&param, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
@@ -385,8 +386,8 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
 
 TEST(GpuHist, ConfigIO) {
   Context ctx(CreateEmptyGenericParam(0));
-  std::unique_ptr<TreeUpdater> updater{
-      TreeUpdater::Create("grow_gpu_hist", &ctx, ObjInfo{ObjInfo::kRegression})};
+  ObjInfo task{ObjInfo::kRegression};
+  std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_gpu_hist", &ctx, &task)};
   updater->Configure(Args{});
 
   Json j_updater { Object() };
diff --git a/tests/cpp/tree/test_histmaker.cc b/tests/cpp/tree/test_histmaker.cc
index 20340f539..aa6a18797 100644
--- a/tests/cpp/tree/test_histmaker.cc
+++ b/tests/cpp/tree/test_histmaker.cc
@@ -37,13 +37,13 @@ TEST(GrowHistMaker, InteractionConstraint)
   auto p_gradients = GenerateGradients(kRows);
 
   Context ctx;
+  ObjInfo task{ObjInfo::kRegression};
   {
     // With constraints
     RegTree tree;
     tree.param.num_feature = kCols;
 
-    std::unique_ptr<TreeUpdater> updater{
-        TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
+    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
     TrainParam param;
     param.UpdateAllowUnknown(
         Args{{"interaction_constraints", "[[0, 1]]"}, {"num_feature", std::to_string(kCols)}});
@@ -61,8 +61,7 @@ TEST(GrowHistMaker, InteractionConstraint)
     RegTree tree;
     tree.param.num_feature = kCols;
 
-    std::unique_ptr<TreeUpdater> updater{
-        TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
+    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
     std::vector<HostDeviceVector<bst_node_t>> position(1);
     TrainParam param;
     param.Init(Args{});
@@ -81,8 +80,8 @@ void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
   auto p_dmat = GenerateDMatrix(rows, cols);
   auto p_gradients = GenerateGradients(rows);
   Context ctx;
-  std::unique_ptr<TreeUpdater> updater{
-      TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
+  ObjInfo task{ObjInfo::kRegression};
+  std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
   std::vector<HostDeviceVector<bst_node_t>> position(1);
 
   std::unique_ptr<DMatrix> sliced{
@@ -110,12 +109,12 @@ TEST(GrowHistMaker, ColumnSplit) {
 
   RegTree expected_tree;
   expected_tree.param.num_feature = kCols;
+  ObjInfo task{ObjInfo::kRegression};
   {
     auto p_dmat = GenerateDMatrix(kRows, kCols);
     auto p_gradients = GenerateGradients(kRows);
     Context ctx;
-    std::unique_ptr<TreeUpdater> updater{
-        TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
+    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
     std::vector<HostDeviceVector<bst_node_t>> position(1);
     TrainParam param;
     param.Init(Args{});
diff --git a/tests/cpp/tree/test_node_partition.cc b/tests/cpp/tree/test_node_partition.cc
index 883c8e68f..d7254fa60 100644
--- a/tests/cpp/tree/test_node_partition.cc
+++ b/tests/cpp/tree/test_node_partition.cc
@@ -2,22 +2,25 @@
  * Copyright 2023 by XGBoost contributors
  */
 #include <gtest/gtest.h>
-#include <xgboost/task.h>
-#include <xgboost/tree_updater.h>
+#include <xgboost/context.h>       // for Context
+#include <xgboost/task.h>          // for ObjInfo
+#include <xgboost/tree_updater.h>  // for TreeUpdater
+
+#include <memory>                  // for unique_ptr
 
 namespace xgboost {
 TEST(Updater, HasNodePosition) {
   Context ctx;
   ObjInfo task{ObjInfo::kRegression, true, true};
-  std::unique_ptr<TreeUpdater> up{TreeUpdater::Create("grow_histmaker", &ctx, task)};
+  std::unique_ptr<TreeUpdater> up{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
   ASSERT_TRUE(up->HasNodePosition());
 
-  up.reset(TreeUpdater::Create("grow_quantile_histmaker", &ctx, task));
+  up.reset(TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task));
   ASSERT_TRUE(up->HasNodePosition());
 
 #if defined(XGBOOST_USE_CUDA)
   ctx.gpu_id = 0;
-  up.reset(TreeUpdater::Create("grow_gpu_hist", &ctx, task));
+  up.reset(TreeUpdater::Create("grow_gpu_hist", &ctx, &task));
   ASSERT_TRUE(up->HasNodePosition());
 #endif  // defined(XGBOOST_USE_CUDA)
 }
diff --git a/tests/cpp/tree/test_prediction_cache.cc b/tests/cpp/tree/test_prediction_cache.cc
index f4e67d836..4f5a05eb6 100644
--- a/tests/cpp/tree/test_prediction_cache.cc
+++ b/tests/cpp/tree/test_prediction_cache.cc
@@ -9,6 +9,7 @@
 
 #include "../../../src/tree/param.h"  // for TrainParam
 #include "../helpers.h"
+#include "xgboost/task.h"             // for ObjInfo
 
 namespace xgboost {
 
@@ -71,8 +72,8 @@ class TestPredictionCache : public ::testing::Test {
         ctx.gpu_id = Context::kCpuId;
       }
 
-      std::unique_ptr<TreeUpdater> updater{
-          TreeUpdater::Create(updater_name, &ctx, ObjInfo{ObjInfo::kRegression})};
+      ObjInfo task{ObjInfo::kRegression};
+      std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(updater_name, &ctx, &task)};
       RegTree tree;
       std::vector<RegTree *> trees{&tree};
       auto gpair = GenerateRandomGradients(n_samples_);
diff --git a/tests/cpp/tree/test_prune.cc b/tests/cpp/tree/test_prune.cc
index 258396976..063816def 100644
--- a/tests/cpp/tree/test_prune.cc
+++ b/tests/cpp/tree/test_prune.cc
@@ -39,8 +39,8 @@ TEST(Updater, Prune) {
   TrainParam param;
   param.UpdateAllowUnknown(cfg);
 
-  std::unique_ptr<TreeUpdater> pruner(
-      TreeUpdater::Create("prune", &ctx, ObjInfo{ObjInfo::kRegression}));
+  ObjInfo task{ObjInfo::kRegression};
+  std::unique_ptr<TreeUpdater> pruner(TreeUpdater::Create("prune", &ctx, &task));
 
   // loss_chg < min_split_loss;
   std::vector<HostDeviceVector<bst_node_t>> position(trees.size());
diff --git a/tests/cpp/tree/test_refresh.cc b/tests/cpp/tree/test_refresh.cc
index 870022724..80a0cbe6f 100644
--- a/tests/cpp/tree/test_refresh.cc
+++ b/tests/cpp/tree/test_refresh.cc
@@ -1,8 +1,9 @@
 /**
- * Copyright 2018-2013 by XGBoost Contributors
+ * Copyright 2018-2023 by XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/host_device_vector.h>
+#include <xgboost/task.h>  // for ObjInfo
 #include <xgboost/tree_updater.h>
 
 #include <memory>
@@ -12,9 +13,7 @@
 #include "../../../src/tree/param.h"  // for TrainParam
 #include "../helpers.h"
 
-namespace xgboost {
-namespace tree {
-
+namespace xgboost::tree {
 TEST(Updater, Refresh) {
   bst_row_t constexpr kRows = 8;
   bst_feature_t constexpr kCols = 16;
@@ -33,8 +32,9 @@ TEST(Updater, Refresh) {
   auto ctx = CreateEmptyGenericParam(GPUIDX);
   tree.param.UpdateAllowUnknown(cfg);
   std::vector<RegTree*> trees{&tree};
-  std::unique_ptr<TreeUpdater> refresher(
-      TreeUpdater::Create("refresh", &ctx, ObjInfo{ObjInfo::kRegression}));
+
+  ObjInfo task{ObjInfo::kRegression};
+  std::unique_ptr<TreeUpdater> refresher(TreeUpdater::Create("refresh", &ctx, &task));
 
   tree.ExpandNode(0, 2, 0.2f, false, 0.0, 0.2f, 0.8f, 0.0f, 0.0f,
                   /*left_sum=*/0.0f, /*right_sum=*/0.0f);
@@ -57,6 +57,4 @@ TEST(Updater, Refresh) {
   ASSERT_NEAR(0, tree.Stat(1).loss_chg, kEps);
   ASSERT_NEAR(0, tree.Stat(2).loss_chg, kEps);
 }
-
-}  // namespace tree
-}  // namespace xgboost
+}  // namespace xgboost::tree
diff --git a/tests/cpp/tree/test_tree_stat.cc b/tests/cpp/tree/test_tree_stat.cc
index 4757bb3c1..a3f5cf9d3 100644
--- a/tests/cpp/tree/test_tree_stat.cc
+++ b/tests/cpp/tree/test_tree_stat.cc
@@ -2,9 +2,13 @@
  * Copyright 2020-2023 by XGBoost Contributors
  */
 #include <gtest/gtest.h>
+#include <xgboost/context.h>  // for Context
+#include <xgboost/task.h>     // for ObjInfo
 #include <xgboost/tree_model.h>
 #include <xgboost/tree_updater.h>
 
+#include <memory>                     // for unique_ptr
+
 #include "../../../src/tree/param.h"  // for TrainParam
 #include "../helpers.h"
 
@@ -26,12 +30,12 @@ class UpdaterTreeStatTest : public ::testing::Test {
 
   void RunTest(std::string updater) {
     tree::TrainParam param;
+    ObjInfo task{ObjInfo::kRegression};
     param.Init(Args{});
 
     Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
                                            : CreateEmptyGenericParam(Context::kCpuId));
-    auto up = std::unique_ptr<TreeUpdater>{
-        TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kRegression})};
+    auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
     up->Configure(Args{});
     RegTree tree;
     tree.param.num_feature = kCols;
@@ -74,18 +78,18 @@ class UpdaterEtaTest : public ::testing::Test {
   }
 
   void RunTest(std::string updater) {
+    ObjInfo task{ObjInfo::kClassification};
+
     Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
                                            : CreateEmptyGenericParam(Context::kCpuId));
 
     float eta = 0.4;
-    auto up_0 = std::unique_ptr<TreeUpdater>{
-        TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
+    auto up_0 = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
     up_0->Configure(Args{});
     tree::TrainParam param0;
     param0.Init(Args{{"eta", std::to_string(eta)}});
 
-    auto up_1 = std::unique_ptr<TreeUpdater>{
-        TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
+    auto up_1 = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
     up_1->Configure(Args{{"eta", "1.0"}});
     tree::TrainParam param1;
     param1.Init(Args{{"eta", "1.0"}});
@@ -153,11 +157,11 @@ class TestMinSplitLoss : public ::testing::Test {
               {"gamma", std::to_string(gamma)}};
     tree::TrainParam param;
     param.UpdateAllowUnknown(args);
+    ObjInfo task{ObjInfo::kRegression};
 
     Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
                                            : CreateEmptyGenericParam(Context::kCpuId));
-    auto up = std::unique_ptr<TreeUpdater>{
-        TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kRegression})};
+    auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
     up->Configure({});
 
     RegTree tree;

From 2aa838c75e7f8e4d212bc0d7856589f7c28133d2 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Sat, 11 Mar 2023 02:58:01 +0800
Subject: [PATCH 9/9] Define multi-strategy parameter. (#8890)

---
 include/xgboost/learner.h      |  94 +++++++++++------
 src/c_api/c_api_utils.h        |   5 +-
 src/learner.cc                 | 177 ++++++++++++++++++++-------------
 src/predictor/predictor.cc     |  34 +++----
 tests/cpp/helpers.h            |   3 +-
 tests/cpp/test_multi_target.cc |  42 +++++---
 6 files changed, 221 insertions(+), 134 deletions(-)

diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
index ccb171370..1d4e35a94 100644
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -8,29 +8,33 @@
 #ifndef XGBOOST_LEARNER_H_
 #define XGBOOST_LEARNER_H_
 
-#include <dmlc/io.h>          // Serializable
-#include <xgboost/base.h>
-#include <xgboost/context.h>  // Context
-#include <xgboost/feature_map.h>
-#include <xgboost/host_device_vector.h>
-#include <xgboost/linalg.h>  // Tensor
-#include <xgboost/model.h>
-#include <xgboost/task.h>
+#include <dmlc/io.h>          // for Serializable
+#include <xgboost/base.h>     // for bst_feature_t, bst_target_t, bst_float, Args, GradientPair
+#include <xgboost/context.h>  // for Context
+#include <xgboost/linalg.h>   // for Tensor, TensorView
+#include <xgboost/metric.h>   // for Metric
+#include <xgboost/model.h>    // for Configurable, Model
+#include <xgboost/span.h>     // for Span
+#include <xgboost/task.h>     // for ObjInfo
 
-#include <map>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
+#include <algorithm>          // for max
+#include <cstdint>            // for int32_t, uint32_t, uint8_t
+#include <map>                // for map
+#include <memory>             // for shared_ptr, unique_ptr
+#include <string>             // for string
+#include <utility>            // for move
+#include <vector>             // for vector
 
 namespace xgboost {
-
+class FeatureMap;
 class Metric;
 class GradientBooster;
 class ObjFunction;
 class DMatrix;
 class Json;
 struct XGBAPIThreadLocalEntry;
+template <typename T>
+class HostDeviceVector;
 
 enum class PredictionType : std::uint8_t {  // NOLINT
   kValue = 0,
@@ -143,7 +147,10 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
    * \brief Get number of boosted rounds from gradient booster.
    */
   virtual int32_t BoostedRounds() const = 0;
-  virtual uint32_t Groups() const = 0;
+  /**
+   * \brief Get the number of output groups from the model.
+   */
+  virtual std::uint32_t Groups() const = 0;
 
   void LoadModel(Json const& in) override = 0;
   void SaveModel(Json* out) const override = 0;
@@ -275,8 +282,16 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
 
 struct LearnerModelParamLegacy;
 
-/*
- * \brief Basic Model Parameters, used to describe the booster.
+/**
+ * \brief Strategy for building multi-target models.
+ */
+enum class MultiStrategy : std::int32_t {
+  kComposite = 0,
+  kMonolithic = 1,
+};
+
+/**
+ * \brief Basic model parameters, used to describe the booster.
  */
 struct LearnerModelParam {
  private:
@@ -287,30 +302,51 @@ struct LearnerModelParam {
   linalg::Tensor<float, 1> base_score_;
 
  public:
-  /* \brief number of features  */
-  uint32_t num_feature { 0 };
-  /* \brief number of classes, if it is multi-class classification  */
-  uint32_t num_output_group { 0 };
-  /* \brief Current task, determined by objective. */
+  /**
+   * \brief The number of features.
+   */
+  bst_feature_t num_feature{0};
+  /**
+   * \brief The number of classes or targets.
+   */
+  std::uint32_t num_output_group{0};
+  /**
+   * \brief Current task, determined by objective.
+   */
   ObjInfo task{ObjInfo::kRegression};
+  /**
+   * \brief Strategy for building multi-target models.
+   */
+  MultiStrategy multi_strategy{MultiStrategy::kComposite};
 
   LearnerModelParam() = default;
   // As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
   // this one as an immutable copy.
   LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
-                    linalg::Tensor<float, 1> base_margin, ObjInfo t);
-  LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t);
-  LearnerModelParam(bst_feature_t n_features, linalg::Tensor<float, 1> base_margin,
-                    uint32_t n_groups)
-      : base_score_{std::move(base_margin)}, num_feature{n_features}, num_output_group{n_groups} {}
+                    linalg::Tensor<float, 1> base_margin, ObjInfo t, MultiStrategy multi_strategy);
+  LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t,
+                    MultiStrategy multi_strategy);
+  LearnerModelParam(bst_feature_t n_features, linalg::Tensor<float, 1> base_score,
+                    std::uint32_t n_groups, bst_target_t n_targets, MultiStrategy multi_strategy)
+      : base_score_{std::move(base_score)},
+        num_feature{n_features},
+        num_output_group{std::max(n_groups, n_targets)},
+        multi_strategy{multi_strategy} {}
 
   linalg::TensorView<float const, 1> BaseScore(Context const* ctx) const;
-  linalg::TensorView<float const, 1> BaseScore(int32_t device) const;
+  [[nodiscard]] linalg::TensorView<float const, 1> BaseScore(std::int32_t device) const;
 
   void Copy(LearnerModelParam const& that);
+  [[nodiscard]] bool IsVectorLeaf() const noexcept {
+    return multi_strategy == MultiStrategy::kMonolithic;
+  }
+  [[nodiscard]] bst_target_t OutputLength() const noexcept { return this->num_output_group; }
+  [[nodiscard]] bst_target_t LeafLength() const noexcept {
+    return this->IsVectorLeaf() ? this->OutputLength() : 1;
+  }
 
   /* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
-  bool Initialized() const { return num_feature != 0 && num_output_group != 0; }
+  [[nodiscard]] bool Initialized() const { return num_feature != 0 && num_output_group != 0; }
 };
 
 }  // namespace xgboost
diff --git a/src/c_api/c_api_utils.h b/src/c_api/c_api_utils.h
index 78c477f42..8908364f2 100644
--- a/src/c_api/c_api_utils.h
+++ b/src/c_api/c_api_utils.h
@@ -12,10 +12,11 @@
 #include <vector>
 
 #include "xgboost/c_api.h"
-#include "xgboost/data.h"  // DMatrix
+#include "xgboost/data.h"         // DMatrix
+#include "xgboost/feature_map.h"  // for FeatureMap
 #include "xgboost/json.h"
 #include "xgboost/learner.h"
-#include "xgboost/linalg.h"  // ArrayInterfaceHandler
+#include "xgboost/linalg.h"       // ArrayInterfaceHandler
 #include "xgboost/logging.h"
 #include "xgboost/string_view.h"  // StringView
 
diff --git a/src/learner.cc b/src/learner.cc
index 0e47c694c..454855355 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -6,54 +6,67 @@
  */
 #include "xgboost/learner.h"
 
-#include <dmlc/any.h>
-#include <dmlc/io.h>
-#include <dmlc/parameter.h>
-#include <dmlc/thread_local.h>
+#include <dmlc/io.h>                      // for Stream
+#include <dmlc/parameter.h>               // for FieldEntry, DMLC_DECLARE_FIELD, Parameter, DMLC...
+#include <dmlc/thread_local.h>            // for ThreadLocalStore
 
-#include <algorithm>
-#include <array>
-#include <atomic>
-#include <iomanip>
-#include <limits>  // std::numeric_limits
-#include <memory>
-#include <mutex>
-#include <sstream>
-#include <stack>
-#include <string>
-#include <utility>  // for as_const
-#include <vector>
+#include <algorithm>                      // for equal, max, transform, sort, find_if, all_of
+#include <array>                          // for array
+#include <atomic>                         // for atomic
+#include <cctype>                         // for isalpha, isspace
+#include <cmath>                          // for isnan, isinf
+#include <cstdint>                        // for int32_t, uint32_t, int64_t, uint64_t
+#include <cstdlib>                        // for atoi
+#include <cstring>                        // for memcpy, size_t, memset
+#include <functional>                     // for less
+#include <iomanip>                        // for operator<<, setiosflags
+#include <iterator>                       // for back_insert_iterator, distance, back_inserter
+#include <limits>                         // for numeric_limits
+#include <memory>                         // for allocator, unique_ptr, shared_ptr, operator==
+#include <mutex>                          // for mutex, lock_guard
+#include <set>                            // for set
+#include <sstream>                        // for operator<<, basic_ostream, basic_ostream::opera...
+#include <stack>                          // for stack
+#include <string>                         // for basic_string, char_traits, operator<, string
+#include <system_error>                   // for errc
+#include <tuple>                          // for get
+#include <unordered_map>                  // for operator!=, unordered_map
+#include <utility>                        // for pair, as_const, move, swap
+#include <vector>                         // for vector
 
-#include "collective/communicator-inl.h"
-#include "common/api_entry.h"  // XGBAPIThreadLocalEntry
-#include "common/charconv.h"
-#include "common/common.h"
-#include "common/io.h"
-#include "common/observer.h"
-#include "common/random.h"
-#include "common/threading_utils.h"
-#include "common/timer.h"
-#include "common/version.h"
-#include "xgboost/base.h"
-#include "xgboost/c_api.h"
-#include "xgboost/context.h"  // Context
-#include "xgboost/data.h"
-#include "xgboost/feature_map.h"
-#include "xgboost/gbm.h"
-#include "xgboost/host_device_vector.h"
-#include "xgboost/json.h"
-#include "xgboost/logging.h"
-#include "xgboost/metric.h"
-#include "xgboost/model.h"
-#include "xgboost/objective.h"
-#include "xgboost/parameter.h"
-#include "xgboost/predictor.h"
+#include "collective/communicator-inl.h"  // for Allreduce, Broadcast, GetRank, IsDistributed
+#include "collective/communicator.h"      // for Operation
+#include "common/api_entry.h"             // for XGBAPIThreadLocalEntry
+#include "common/charconv.h"              // for to_chars, to_chars_result, NumericLimits, from_...
+#include "common/common.h"                // for ToString, Split
+#include "common/io.h"                    // for PeekableInStream, ReadAll, FixedSizeStream, Mem...
+#include "common/observer.h"              // for TrainingObserver
+#include "common/random.h"                // for GlobalRandom
+#include "common/timer.h"                 // for Monitor
+#include "common/version.h"               // for Version
+#include "dmlc/endian.h"                  // for ByteSwap, DMLC_IO_NO_ENDIAN_SWAP
+#include "xgboost/base.h"                 // for Args, bst_float, GradientPair, bst_feature_t
+#include "xgboost/context.h"              // for Context
+#include "xgboost/data.h"                 // for DMatrix, MetaInfo
+#include "xgboost/gbm.h"                  // for GradientBooster
+#include "xgboost/global_config.h"        // for GlobalConfiguration, GlobalConfigThreadLocalStore
+#include "xgboost/host_device_vector.h"   // for HostDeviceVector
+#include "xgboost/json.h"                 // for Json, get, Object, String, IsA, Array, ToJson
+#include "xgboost/linalg.h"               // for Tensor, TensorView
+#include "xgboost/logging.h"              // for CHECK, LOG, CHECK_EQ
+#include "xgboost/metric.h"               // for Metric
+#include "xgboost/objective.h"            // for ObjFunction
+#include "xgboost/parameter.h"            // for DECLARE_FIELD_ENUM_CLASS, XGBoostParameter
+#include "xgboost/predictor.h"            // for PredictionContainer, PredictionCacheEntry
+#include "xgboost/string_view.h"          // for operator<<, StringView
+#include "xgboost/task.h"                 // for ObjInfo
 
 namespace {
-
 const char* kMaxDeltaStepDefaultValue = "0.7";
 }  // anonymous namespace
 
+DECLARE_FIELD_ENUM_CLASS(xgboost::MultiStrategy);
+
 namespace xgboost {
 Learner::~Learner() = default;
 namespace {
@@ -86,8 +99,10 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
   /*! \brief the version of XGBoost. */
   std::uint32_t major_version;
   std::uint32_t minor_version;
-
-  uint32_t num_target{1};
+  /**
+   * \brief Number of target variables.
+   */
+  bst_target_t num_target;
   /**
    * \brief Whether we should calculate the base score from training data.
    *
@@ -113,7 +128,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
   }
 
   // Skip other legacy fields.
-  Json ToJson() const {
+  [[nodiscard]] Json ToJson() const {
     Json obj{Object{}};
     char floats[NumericLimits<float>::kToCharsSize];
     auto ret = to_chars(floats, floats + NumericLimits<float>::kToCharsSize, base_score);
@@ -163,7 +178,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     from_chars(str.c_str(), str.c_str() + str.size(), base_score);
   }
 
-  LearnerModelParamLegacy ByteSwap() const {
+  [[nodiscard]] LearnerModelParamLegacy ByteSwap() const {
     LearnerModelParamLegacy x = *this;
     dmlc::ByteSwap(&x.base_score, sizeof(x.base_score), 1);
     dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
@@ -226,35 +241,38 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     DMLC_DECLARE_FIELD(num_feature)
         .set_default(0)
         .describe(
-            "Number of features in training data,"
-            " this parameter will be automatically detected by learner.");
+            "Number of features in training data, this parameter will be automatically detected by "
+            "learner.");
     DMLC_DECLARE_FIELD(num_class).set_default(0).set_lower_bound(0).describe(
         "Number of class option for multi-class classifier. "
         " By default equals 0 and corresponds to binary classifier.");
     DMLC_DECLARE_FIELD(num_target)
         .set_default(1)
         .set_lower_bound(1)
-        .describe("Number of target for multi-target regression.");
+        .describe("Number of output targets. Can be set automatically if not specified.");
     DMLC_DECLARE_FIELD(boost_from_average)
         .set_default(true)
         .describe("Whether we should calculate the base score from training data.");
   }
 };
 
-LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t)
-    : num_feature{user_param.num_feature}, task{t} {
-  auto n_classes = std::max(static_cast<uint32_t>(user_param.num_class), 1u);
-  auto n_targets = user_param.num_target;
-  num_output_group = std::max(n_classes, n_targets);
-  // For version < 1.6, n_targets == 0
-  CHECK(n_classes <= 1 || n_targets <= 1)
-      << "Multi-class multi-output is not yet supported. n_classes:" << n_classes
-      << ", n_targets:" << n_targets;
+LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t,
+                                     MultiStrategy multi_strategy)
+    : num_feature{user_param.num_feature},
+      num_output_group{
+          std::max(static_cast<std::uint32_t>(user_param.num_class), user_param.num_target)},
+      task{t},
+      multi_strategy{multi_strategy} {
+  if (user_param.num_class > 1 && user_param.num_target > 1) {
+    LOG(FATAL) << "multi-target-multi-class is not yet supported. Output classes:"
+               << user_param.num_class << ", output targets:" << user_param.num_target;
+  }
 }
 
 LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
-                                     linalg::Tensor<float, 1> base_margin, ObjInfo t)
-    : LearnerModelParam{user_param, t} {
+                                     linalg::Tensor<float, 1> base_margin, ObjInfo t,
+                                     MultiStrategy multi_strategy)
+    : LearnerModelParam{user_param, t, multi_strategy} {
   std::swap(base_score_, base_margin);
   // Make sure read access everywhere for thread-safe prediction.
   std::as_const(base_score_).HostView();
@@ -297,6 +315,7 @@ void LearnerModelParam::Copy(LearnerModelParam const& that) {
   num_feature = that.num_feature;
   num_output_group = that.num_output_group;
   task = that.task;
+  multi_strategy = that.multi_strategy;
 }
 
 struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
@@ -306,18 +325,26 @@ struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
   // specified by users.  Move them to model parameter once we can get rid of binary IO.
   std::string booster;
   std::string objective;
+  // This is a training parameter and is not saved (nor loaded) in the model.
+  MultiStrategy multi_strategy{MultiStrategy::kComposite};
 
   // declare parameters
   DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
     DMLC_DECLARE_FIELD(disable_default_eval_metric)
         .set_default(false)
         .describe("Flag to disable default metric. Set to >0 to disable");
-    DMLC_DECLARE_FIELD(booster)
-        .set_default("gbtree")
-        .describe("Gradient booster used for training.");
+    DMLC_DECLARE_FIELD(booster).set_default("gbtree").describe(
+        "Gradient booster used for training.");
     DMLC_DECLARE_FIELD(objective)
         .set_default("reg:squarederror")
         .describe("Objective function used for obtaining gradient.");
+    DMLC_DECLARE_FIELD(multi_strategy)
+        .add_enum("composite", MultiStrategy::kComposite)
+        .add_enum("monolithic", MultiStrategy::kMonolithic)
+        .set_default(MultiStrategy::kComposite)
+        .describe(
+            "Strategy used for training multi-target models. `mono` means building one single tree "
+            "for all targets.");
   }
 };
 
@@ -379,8 +406,10 @@ class LearnerConfiguration : public Learner {
 
     // transform to margin
     h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
+    CHECK(tparam_.GetInitialised());
     // move it to model param, which is shared with all other components.
-    learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task);
+    learner_model_param_ =
+        LearnerModelParam(Ctx(), mparam_, std::move(base_score), task, tparam_.multi_strategy);
     CHECK(learner_model_param_.Initialized());
     CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
   }
@@ -748,7 +777,6 @@ class LearnerConfiguration : public Learner {
         << "0 feature is supplied.  Are you using raw Booster interface?";
     // Remove these once binary IO is gone.
     cfg_["num_feature"] = common::ToString(mparam_.num_feature);
-    cfg_["num_class"] = common::ToString(mparam_.num_class);
   }
 
   void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {
@@ -779,9 +807,17 @@ class LearnerConfiguration : public Learner {
     if (obj_ == nullptr || tparam_.objective != old.objective) {
       obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
     }
+
+    bool has_nc {cfg_.find("num_class") != cfg_.cend()};
+    // Inject num_class into configuration.
+    // FIXME(jiamingy): Remove the duplicated parameter in softmax
+    cfg_["num_class"] = common::ToString(mparam_.num_class);
     auto& args = *p_args;
     args = {cfg_.cbegin(), cfg_.cend()};  // renew
     obj_->Configure(args);
+    if (!has_nc) {
+      cfg_.erase("num_class");
+    }
   }
 
   void ConfigureMetrics(Args const& args) {
@@ -805,7 +841,7 @@ class LearnerConfiguration : public Learner {
   void ConfigureTargets() {
     CHECK(this->obj_);
     auto const& cache = prediction_container_.Container();
-    size_t n_targets = 1;
+    bst_target_t n_targets = 1;
     for (auto const& d : cache) {
       if (n_targets == 1) {
         n_targets = this->obj_->Targets(d.first.ptr->Info());
@@ -814,7 +850,8 @@ class LearnerConfiguration : public Learner {
         CHECK(n_targets == t || 1 == t) << "Inconsistent labels.";
       }
     }
-    if (mparam_.num_target != 1) {
+
+    if (mparam_.num_target > 1) {
       CHECK(n_targets == 1 || n_targets == mparam_.num_target)
           << "Inconsistent configuration of num_target.  Configuration result from input data:"
           << n_targets << ", configuration from parameter:" << mparam_.num_target;
@@ -974,9 +1011,6 @@ class LearnerIO : public LearnerConfiguration {
     if (!DMLC_IO_NO_ENDIAN_SWAP) {
       mparam_ = mparam_.ByteSwap();
     }
-    if (mparam_.num_target == 0) {
-      mparam_.num_target = 1;
-    }
     CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format";
     CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
 
@@ -1030,7 +1064,7 @@ class LearnerIO : public LearnerConfiguration {
                                                         : obj_->ProbToMargin(mparam_.base_score)},
                                                    {1},
                                                    Context::kCpuId},
-                          obj_->Task());
+                          obj_->Task(), tparam_.multi_strategy);
 
     if (attributes_.find("objective") != attributes_.cend()) {
       auto obj_str = attributes_.at("objective");
@@ -1058,7 +1092,6 @@ class LearnerIO : public LearnerConfiguration {
     mparam_.major_version = std::get<0>(Version::Self());
     mparam_.minor_version = std::get<1>(Version::Self());
 
-    cfg_["num_class"] = common::ToString(mparam_.num_class);
     cfg_["num_feature"] = common::ToString(mparam_.num_feature);
 
     auto n = tparam_.__DICT__();
@@ -1071,6 +1104,8 @@ class LearnerIO : public LearnerConfiguration {
   // JSON serialization format.
   void SaveModel(dmlc::Stream* fo) const override {
     this->CheckModelInitialized();
+    CHECK(!this->learner_model_param_.IsVectorLeaf())
+        << "Please use JSON/UBJ format for model serialization with multi-output models.";
 
     LearnerModelParamLegacy mparam = mparam_;  // make a copy to potentially modify
     std::vector<std::pair<std::string, std::string> > extra_attr;
diff --git a/src/predictor/predictor.cc b/src/predictor/predictor.cc
index c6ef7fe51..2559447f3 100644
--- a/src/predictor/predictor.cc
+++ b/src/predictor/predictor.cc
@@ -3,18 +3,19 @@
  */
 #include "xgboost/predictor.h"
 
-#include <dmlc/registry.h>
+#include <dmlc/registry.h>               // for DMLC_REGISTRY_LINK_TAG
 
-#include <string>                        // std::string
+#include <cstdint>                       // for int32_t
+#include <string>                        // for string, to_string
 
-#include "../gbm/gbtree.h"               // GBTreeModel
-#include "xgboost/base.h"                // bst_row_t,bst_group_t
-#include "xgboost/context.h"             // Context
-#include "xgboost/data.h"                // MetaInfo
-#include "xgboost/host_device_vector.h"  // HostDeviceVector
-#include "xgboost/learner.h"             // LearnerModelParam
-#include "xgboost/linalg.h"              // Tensor
-#include "xgboost/logging.h"
+#include "../gbm/gbtree_model.h"         // for GBTreeModel
+#include "xgboost/base.h"                // for bst_float, Args, bst_group_t, bst_row_t
+#include "xgboost/context.h"             // for Context
+#include "xgboost/data.h"                // for MetaInfo
+#include "xgboost/host_device_vector.h"  // for HostDeviceVector
+#include "xgboost/learner.h"             // for LearnerModelParam
+#include "xgboost/linalg.h"              // for Tensor, TensorView
+#include "xgboost/logging.h"             // for CHECK_EQ, CHECK_NE, LOG
 
 namespace dmlc {
 DMLC_REGISTRY_ENABLE(::xgboost::PredictorReg);
@@ -45,15 +46,16 @@ void ValidateBaseMarginShape(linalg::Tensor<float, D> const& margin, bst_row_t n
 void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_float>* out_preds,
                                    const gbm::GBTreeModel& model) const {
   CHECK_NE(model.learner_model_param->num_output_group, 0);
-  size_t n_classes = model.learner_model_param->num_output_group;
-  size_t n = n_classes * info.num_row_;
+  std::size_t n{model.learner_model_param->OutputLength() * info.num_row_};
+
   const HostDeviceVector<bst_float>* base_margin = info.base_margin_.Data();
   if (ctx_->gpu_id >= 0) {
     out_preds->SetDevice(ctx_->gpu_id);
   }
   if (!base_margin->Empty()) {
     out_preds->Resize(n);
-    ValidateBaseMarginShape(info.base_margin_, info.num_row_, n_classes);
+    ValidateBaseMarginShape(info.base_margin_, info.num_row_,
+                            model.learner_model_param->OutputLength());
     out_preds->Copy(*base_margin);
   } else {
     // cannot rely on the Resize to fill as it might skip if the size is already correct.
@@ -64,12 +66,10 @@ void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_fl
 }
 }  // namespace xgboost
 
-namespace xgboost {
-namespace predictor {
+namespace xgboost::predictor {
 // List of files that will be force linked in static links.
 #ifdef XGBOOST_USE_CUDA
 DMLC_REGISTRY_LINK_TAG(gpu_predictor);
 #endif  // XGBOOST_USE_CUDA
 DMLC_REGISTRY_LINK_TAG(cpu_predictor);
-}  // namespace predictor
-}  // namespace xgboost
+}  // namespace xgboost::predictor
diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h
index ec1ace796..ec0abf32b 100644
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@@ -24,6 +24,7 @@
 #include "../../src/data/array_interface.h"
 #include "../../src/gbm/gbtree_model.h"
 #include "filesystem.h"  // dmlc::TemporaryDirectory
+#include "xgboost/linalg.h"
 
 #if defined(__CUDACC__)
 #define DeclareUnifiedTest(name) GPU ## name
@@ -461,7 +462,7 @@ inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint
                                 int32_t device = Context::kCpuId) {
   size_t shape[1]{1};
   LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape, device},
-                           n_groups);
+                           n_groups, 1, MultiStrategy::kComposite);
   return mparam;
 }
 
diff --git a/tests/cpp/test_multi_target.cc b/tests/cpp/test_multi_target.cc
index 79d822601..d2e34235c 100644
--- a/tests/cpp/test_multi_target.cc
+++ b/tests/cpp/test_multi_target.cc
@@ -2,24 +2,26 @@
  * Copyright 2023 by XGBoost Contributors
  */
 #include <gtest/gtest.h>
-#include <xgboost/base.h>     // bst_target_t
-#include <xgboost/data.h>     // DMatrix
-#include <xgboost/json.h>     // Json,Object,Number,get
-#include <xgboost/learner.h>  // Learner
+#include <xgboost/base.h>                         // for Args, bst_target_t
+#include <xgboost/data.h>                         // for DMatrix, MetaInfo
+#include <xgboost/json.h>                         // for Json, get, Object, String
+#include <xgboost/learner.h>                      // for Learner
 
-#include <cstddef>            // size_t
-#include <memory>             // shared_ptr,unique_ptr
-#include <numeric>
-#include <string>             // stod
-#include <vector>
+#include <algorithm>                              // for copy
+#include <cstddef>                                // for size_t
+#include <memory>                                 // for shared_ptr, allocator, __shared_ptr_access
+#include <numeric>                                // for accumulate
+#include <string>                                 // for stod, string
+#include <vector>                                 // for vector
 
-#include "../../src/common/linalg_op.h"  // cbegin,cend
-#include "../../src/common/stats.h"      // Median
-#include "helpers.h"                     // RandomDataGenerator
-#include "xgboost/linalg.h"
+#include "../../src/common/linalg_op.h"           // for begin, cbegin, cend
+#include "../../src/common/stats.h"               // for Median
+#include "../../src/common/transform_iterator.h"  // for IndexTransformIter
+#include "helpers.h"                              // for RandomDataGenerator
+#include "xgboost/host_device_vector.h"           // for HostDeviceVector
+#include "xgboost/linalg.h"                       // for Tensor, All, TensorView, Vector
 
 namespace xgboost {
-
 class TestL1MultiTarget : public ::testing::Test {
   std::shared_ptr<DMatrix> Xy_;
   std::shared_ptr<DMatrix> Xyw_;
@@ -117,4 +119,16 @@ TEST_F(TestL1MultiTarget, Approx) { this->RunTest("approx"); }
 #if defined(XGBOOST_USE_CUDA)
 TEST_F(TestL1MultiTarget, GpuHist) { this->RunTest("gpu_hist"); }
 #endif  // defined(XGBOOST_USE_CUDA)
+
+TEST(MultiStrategy, Configure) {
+  auto p_fmat = RandomDataGenerator{12ul, 3ul, 0.0}.GenerateDMatrix();
+  p_fmat->Info().labels.Reshape(p_fmat->Info().num_row_, 2);
+  std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
+  learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "2"}});
+  learner->Configure();
+  ASSERT_EQ(learner->Groups(), 2);
+
+  learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "0"}});
+  ASSERT_THROW({ learner->Configure(); }, dmlc::Error);
+}
 }  // namespace xgboost