merge latest changes

2023-03-10 22:10:20 +01:00 · 2023-03-10 22:10:20 +01:00 · f0b8c02f15
commit f0b8c02f15
parent 5e8b1842b9 2aa838c75e
57 changed files with 1435 additions and 592 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -2,9 +2,6 @@
 	path = dmlc-core
 	url = https://github.com/dmlc/dmlc-core
 	branch = main
-[submodule "cub"]
-	path = cub
-	url = https://github.com/NVlabs/cub
 [submodule "gputreeshap"]
 	path = gputreeshap
 	url = https://github.com/rapidsai/gputreeshap.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -51,7 +51,6 @@ option(HIDE_CXX_SYMBOLS "Build shared library and hide all C++ symbols" OFF)
 option(USE_CUDA  "Build with GPU acceleration" OFF)
 option(USE_NCCL  "Build with NCCL to enable distributed GPU support." OFF)
 option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
-option(BUILD_WITH_CUDA_CUB "Build with cub in CUDA installation" OFF)
 set(GPU_COMPUTE_VER "" CACHE STRING
  "Semicolon separated list of compute versions to be built against, e.g. '35;61'")
 ## HIP
@ -138,13 +137,6 @@ endif (ENABLE_ALL_WARNINGS)
 if (BUILD_STATIC_LIB AND (R_LIB OR JVM_BINDINGS))
  message(SEND_ERROR "Cannot build a static library libxgboost.a when R or JVM packages are enabled.")
 endif (BUILD_STATIC_LIB AND (R_LIB OR JVM_BINDINGS))
-
-if (PLUGIN_RMM AND (NOT BUILD_WITH_CUDA_CUB))
-  message(SEND_ERROR "Cannot build with RMM using cub submodule.")
-endif (PLUGIN_RMM AND (NOT BUILD_WITH_CUDA_CUB))
-if (PLUGIN_RMM AND (NOT BUILD_WITH_HIP_CUB))
-  message(SEND_ERROR "Cannot build with RMM using cub submodule.")
-endif (PLUGIN_RMM AND (NOT BUILD_WITH_HIP_CUB))
 if (PLUGIN_FEDERATED)
  if (CMAKE_CROSSCOMPILING)
    message(SEND_ERROR "Cannot cross compile with federated learning support")
@ -179,10 +171,6 @@ if (USE_CUDA)
  set(GEN_CODE "")
  format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE)
  add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)
-
-  if ((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 11.4) AND (NOT BUILD_WITH_CUDA_CUB))
-    set(BUILD_WITH_CUDA_CUB ON)
-  endif ()
 endif (USE_CUDA)

 if (USE_HIP)
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@ -214,6 +214,10 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #' Since it quadratically depends on the number of features, it is recommended to perform selection
 #' of the most important features first. See below about the format of the returned results.
 #'
+#' The \code{predict()} method uses as many threads as defined in \code{xgb.Booster} object (all by default).
+#' If you want to change their number, then assign a new number to \code{nthread} using \code{\link{xgb.parameters<-}}.
+#' Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple threads too.
+#'
 #' @return
 #' The return type is different depending whether \code{strict_shape} is set to \code{TRUE}.  By default,
 #' for regression or binary classification, it returns a vector of length \code{nrows(newdata)}.
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@ -122,6 +122,10 @@ With \code{predinteraction = TRUE}, SHAP values of contributions of interaction
 are computed. Note that this operation might be rather expensive in terms of compute and memory.
 Since it quadratically depends on the number of features, it is recommended to perform selection
 of the most important features first. See below about the format of the returned results.
+
+The \code{predict()} method uses as many threads as defined in \code{xgb.Booster} object (all by default).
+If you want to change their number, then assign a new number to \code{nthread} using \code{\link{xgb.parameters<-}}.
+Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple threads too.
 }
 \examples{
 ## binary classification:
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@ -61,6 +61,7 @@ OBJECTS= \
    $(PKGROOT)/src/tree/fit_stump.o \
    $(PKGROOT)/src/tree/tree_model.o \
    $(PKGROOT)/src/tree/tree_updater.o \
+    $(PKGROOT)/src/tree/multi_target_tree_model.o \
    $(PKGROOT)/src/tree/updater_approx.o \
    $(PKGROOT)/src/tree/updater_colmaker.o \
    $(PKGROOT)/src/tree/updater_prune.o \
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@ -60,6 +60,7 @@ OBJECTS= \
    $(PKGROOT)/src/tree/param.o \
    $(PKGROOT)/src/tree/fit_stump.o \
    $(PKGROOT)/src/tree/tree_model.o \
+    $(PKGROOT)/src/tree/multi_target_tree_model.o \
    $(PKGROOT)/src/tree/tree_updater.o \
    $(PKGROOT)/src/tree/updater_approx.o \
    $(PKGROOT)/src/tree/updater_colmaker.o \
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@ -165,13 +165,8 @@ function(xgboost_set_cuda_flags target)
    enable_nvtx(${target})
  endif (USE_NVTX)

-  if (NOT BUILD_WITH_CUDA_CUB)
-    target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1 -DTHRUST_IGNORE_CUB_VERSION_CHECK=1)
-    target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/cub/ ${xgboost_SOURCE_DIR}/gputreeshap)
-  else ()
  target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1)
  target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/gputreeshap)
-  endif (NOT BUILD_WITH_CUDA_CUB)

  if (MSVC)
    target_compile_options(${target} PRIVATE
--- a/1
+++ b/1
@ -1 +0,0 @@
-Subproject commit af39ee264f4627608072bf54730bf3a862e56875
--- a/doc/build.rst
+++ b/doc/build.rst
@ -128,8 +128,7 @@ From the command line on Linux starting from the XGBoost directory:

  mkdir build
  cd build
-  # For CUDA toolkit >= 11.4, `BUILD_WITH_CUDA_CUB` is required.
-  cmake .. -DUSE_CUDA=ON -DBUILD_WITH_CUDA_CUB=ON
+  cmake .. -DUSE_CUDA=ON
  make -j4

 .. note:: Specifying compute capability
--- a/doc/tutorials/dask.rst
+++ b/doc/tutorials/dask.rst
@ -474,6 +474,14 @@ interface, including callback functions, custom evaluation metric and objective:
        callbacks=[early_stop],
    )

+**********************
+Hyper-parameter tuning
+**********************
+
+See https://github.com/coiled/dask-xgboost-nyctaxi for a set of examples of using XGBoost
+with dask and optuna.
+
+
 .. _tracker-ip:

 ***************
@ -498,11 +506,15 @@ dask config is used:
    with Client(scheduler_file="sched.json") as client:
        reg = dxgb.DaskXGBRegressor()

-    # or we can specify the port too
+    # We can specify the port for XGBoost as well
    with dask.config.set({"xgboost.scheduler_address": "192.0.0.100:12345"}):
        reg = dxgb.DaskXGBRegressor()


+Please note that XGBoost requires a different port than dask. By default, on a unix-like
+system XGBoost uses the port 0 to find available ports, which may fail if a user is
+running in a restricted docker environment. In this case, please open additional ports in
+the container and specify it as in the above snippet.

 ************
 IPv6 Support
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@ -110,11 +110,11 @@ using bst_bin_t = int32_t;  // NOLINT
 */
 using bst_row_t = std::size_t;   // NOLINT
 /*! \brief Type for tree node index. */
-using bst_node_t = int32_t;      // NOLINT
+using bst_node_t = std::int32_t;      // NOLINT
 /*! \brief Type for ranking group index. */
-using bst_group_t = uint32_t;    // NOLINT
-/*! \brief Type for indexing target variables. */
-using bst_target_t = std::size_t;  // NOLINT
+using bst_group_t = std::uint32_t;      // NOLINT
+/*! \brief Type for indexing into output targets. */
+using bst_target_t = std::uint32_t;  // NOLINT

 namespace detail {
 /*! \brief Implementation of gradient statistics pair. Template specialisation
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@ -8,29 +8,33 @@
 #ifndef XGBOOST_LEARNER_H_
 #define XGBOOST_LEARNER_H_

-#include <dmlc/io.h>          // Serializable
-#include <xgboost/base.h>
-#include <xgboost/context.h>  // Context
-#include <xgboost/feature_map.h>
-#include <xgboost/host_device_vector.h>
-#include <xgboost/linalg.h>  // Tensor
-#include <xgboost/model.h>
-#include <xgboost/task.h>
+#include <dmlc/io.h>          // for Serializable
+#include <xgboost/base.h>     // for bst_feature_t, bst_target_t, bst_float, Args, GradientPair
+#include <xgboost/context.h>  // for Context
+#include <xgboost/linalg.h>   // for Tensor, TensorView
+#include <xgboost/metric.h>   // for Metric
+#include <xgboost/model.h>    // for Configurable, Model
+#include <xgboost/span.h>     // for Span
+#include <xgboost/task.h>     // for ObjInfo

-#include <map>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
+#include <algorithm>          // for max
+#include <cstdint>            // for int32_t, uint32_t, uint8_t
+#include <map>                // for map
+#include <memory>             // for shared_ptr, unique_ptr
+#include <string>             // for string
+#include <utility>            // for move
+#include <vector>             // for vector

 namespace xgboost {
-
+class FeatureMap;
 class Metric;
 class GradientBooster;
 class ObjFunction;
 class DMatrix;
 class Json;
 struct XGBAPIThreadLocalEntry;
+template <typename T>
+class HostDeviceVector;

 enum class PredictionType : std::uint8_t {  // NOLINT
  kValue = 0,
@ -143,7 +147,10 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   * \brief Get number of boosted rounds from gradient booster.
   */
  virtual int32_t BoostedRounds() const = 0;
-  virtual uint32_t Groups() const = 0;
+  /**
+   * \brief Get the number of output groups from the model.
+   */
+  virtual std::uint32_t Groups() const = 0;

  void LoadModel(Json const& in) override = 0;
  void SaveModel(Json* out) const override = 0;
@ -275,8 +282,16 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {

 struct LearnerModelParamLegacy;

-/*
- * \brief Basic Model Parameters, used to describe the booster.
+/**
+ * \brief Strategy for building multi-target models.
+ */
+enum class MultiStrategy : std::int32_t {
+  kComposite = 0,
+  kMonolithic = 1,
+};
+
+/**
+ * \brief Basic model parameters, used to describe the booster.
 */
 struct LearnerModelParam {
 private:
@ -287,30 +302,51 @@ struct LearnerModelParam {
  linalg::Tensor<float, 1> base_score_;

 public:
-  /* \brief number of features  */
-  uint32_t num_feature { 0 };
-  /* \brief number of classes, if it is multi-class classification  */
-  uint32_t num_output_group { 0 };
-  /* \brief Current task, determined by objective. */
+  /**
+   * \brief The number of features.
+   */
+  bst_feature_t num_feature{0};
+  /**
+   * \brief The number of classes or targets.
+   */
+  std::uint32_t num_output_group{0};
+  /**
+   * \brief Current task, determined by objective.
+   */
  ObjInfo task{ObjInfo::kRegression};
+  /**
+   * \brief Strategy for building multi-target models.
+   */
+  MultiStrategy multi_strategy{MultiStrategy::kComposite};

  LearnerModelParam() = default;
  // As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
  // this one as an immutable copy.
  LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
-                    linalg::Tensor<float, 1> base_margin, ObjInfo t);
-  LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t);
-  LearnerModelParam(bst_feature_t n_features, linalg::Tensor<float, 1> base_margin,
-                    uint32_t n_groups)
-      : base_score_{std::move(base_margin)}, num_feature{n_features}, num_output_group{n_groups} {}
+                    linalg::Tensor<float, 1> base_margin, ObjInfo t, MultiStrategy multi_strategy);
+  LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t,
+                    MultiStrategy multi_strategy);
+  LearnerModelParam(bst_feature_t n_features, linalg::Tensor<float, 1> base_score,
+                    std::uint32_t n_groups, bst_target_t n_targets, MultiStrategy multi_strategy)
+      : base_score_{std::move(base_score)},
+        num_feature{n_features},
+        num_output_group{std::max(n_groups, n_targets)},
+        multi_strategy{multi_strategy} {}

  linalg::TensorView<float const, 1> BaseScore(Context const* ctx) const;
-  linalg::TensorView<float const, 1> BaseScore(int32_t device) const;
+  [[nodiscard]] linalg::TensorView<float const, 1> BaseScore(std::int32_t device) const;

  void Copy(LearnerModelParam const& that);
+  [[nodiscard]] bool IsVectorLeaf() const noexcept {
+    return multi_strategy == MultiStrategy::kMonolithic;
+  }
+  [[nodiscard]] bst_target_t OutputLength() const noexcept { return this->num_output_group; }
+  [[nodiscard]] bst_target_t LeafLength() const noexcept {
+    return this->IsVectorLeaf() ? this->OutputLength() : 1;
+  }

  /* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
-  bool Initialized() const { return num_feature != 0 && num_output_group != 0; }
+  [[nodiscard]] bool Initialized() const { return num_feature != 0 && num_output_group != 0; }
 };

 }  // namespace xgboost
--- a/include/xgboost/multi_target_tree_model.h
+++ b/include/xgboost/multi_target_tree_model.h
@ -0,0 +1,96 @@
+/**
+ * Copyright 2023 by XGBoost contributors
+ *
+ * \brief Core data structure for multi-target trees.
+ */
+#ifndef XGBOOST_MULTI_TARGET_TREE_MODEL_H_
+#define XGBOOST_MULTI_TARGET_TREE_MODEL_H_
+#include <xgboost/base.h>     // for bst_node_t, bst_target_t, bst_feature_t
+#include <xgboost/context.h>  // for Context
+#include <xgboost/linalg.h>   // for VectorView
+#include <xgboost/model.h>    // for Model
+#include <xgboost/span.h>     // for Span
+
+#include <cinttypes>          // for uint8_t
+#include <cstddef>            // for size_t
+#include <vector>             // for vector
+
+namespace xgboost {
+struct TreeParam;
+/**
+ * \brief Tree structure for multi-target model.
+ */
+class MultiTargetTree : public Model {
+ public:
+  static bst_node_t constexpr InvalidNodeId() { return -1; }
+
+ private:
+  TreeParam const* param_;
+  std::vector<bst_node_t> left_;
+  std::vector<bst_node_t> right_;
+  std::vector<bst_node_t> parent_;
+  std::vector<bst_feature_t> split_index_;
+  std::vector<std::uint8_t> default_left_;
+  std::vector<float> split_conds_;
+  std::vector<float> weights_;
+
+  [[nodiscard]] linalg::VectorView<float const> NodeWeight(bst_node_t nidx) const {
+    auto beg = nidx * this->NumTarget();
+    auto v = common::Span<float const>{weights_}.subspan(beg, this->NumTarget());
+    return linalg::MakeTensorView(Context::kCpuId, v, v.size());
+  }
+  [[nodiscard]] linalg::VectorView<float> NodeWeight(bst_node_t nidx) {
+    auto beg = nidx * this->NumTarget();
+    auto v = common::Span<float>{weights_}.subspan(beg, this->NumTarget());
+    return linalg::MakeTensorView(Context::kCpuId, v, v.size());
+  }
+
+ public:
+  explicit MultiTargetTree(TreeParam const* param);
+  /**
+   * \brief Set the weight for a leaf.
+   */
+  void SetLeaf(bst_node_t nidx, linalg::VectorView<float const> weight);
+  /**
+   * \brief Expand a leaf into split node.
+   */
+  void Expand(bst_node_t nidx, bst_feature_t split_idx, float split_cond, bool default_left,
+              linalg::VectorView<float const> base_weight,
+              linalg::VectorView<float const> left_weight,
+              linalg::VectorView<float const> right_weight);
+
+  [[nodiscard]] bool IsLeaf(bst_node_t nidx) const { return left_[nidx] == InvalidNodeId(); }
+  [[nodiscard]] bst_node_t Parent(bst_node_t nidx) const { return parent_.at(nidx); }
+  [[nodiscard]] bst_node_t LeftChild(bst_node_t nidx) const { return left_.at(nidx); }
+  [[nodiscard]] bst_node_t RightChild(bst_node_t nidx) const { return right_.at(nidx); }
+
+  [[nodiscard]] bst_feature_t SplitIndex(bst_node_t nidx) const { return split_index_[nidx]; }
+  [[nodiscard]] float SplitCond(bst_node_t nidx) const { return split_conds_[nidx]; }
+  [[nodiscard]] bool DefaultLeft(bst_node_t nidx) const { return default_left_[nidx]; }
+  [[nodiscard]] bst_node_t DefaultChild(bst_node_t nidx) const {
+    return this->DefaultLeft(nidx) ? this->LeftChild(nidx) : this->RightChild(nidx);
+  }
+
+  [[nodiscard]] bst_target_t NumTarget() const;
+
+  [[nodiscard]] std::size_t Size() const;
+
+  [[nodiscard]] bst_node_t Depth(bst_node_t nidx) const {
+    bst_node_t depth{0};
+    while (Parent(nidx) != InvalidNodeId()) {
+      ++depth;
+      nidx = Parent(nidx);
+    }
+    return depth;
+  }
+
+  [[nodiscard]] linalg::VectorView<float const> LeafValue(bst_node_t nidx) const {
+    CHECK(IsLeaf(nidx));
+    return this->NodeWeight(nidx);
+  }
+
+  void LoadModel(Json const& in) override;
+  void SaveModel(Json* out) const override;
+};
+}  // namespace xgboost
+#endif  // XGBOOST_MULTI_TARGET_TREE_MODEL_H_
--- a/include/xgboost/tree_model.h
+++ b/include/xgboost/tree_model.h
@ -1,5 +1,5 @@
-/*!
- * Copyright 2014-2022 by Contributors
+/**
+ * Copyright 2014-2023 by Contributors
 * \file tree_model.h
 * \brief model structure for tree
 * \author Tianqi Chen
@ -9,60 +9,62 @@

 #include <dmlc/io.h>
 #include <dmlc/parameter.h>
-
 #include <xgboost/base.h>
 #include <xgboost/data.h>
-#include <xgboost/logging.h>
 #include <xgboost/feature_map.h>
+#include <xgboost/linalg.h>  // for VectorView
+#include <xgboost/logging.h>
 #include <xgboost/model.h>
+#include <xgboost/multi_target_tree_model.h>  // for MultiTargetTree

-#include <limits>
-#include <vector>
-#include <string>
-#include <cstring>
 #include <algorithm>
-#include <tuple>
+#include <cstring>
+#include <limits>
+#include <memory>  // for make_unique
 #include <stack>
+#include <string>
+#include <tuple>
+#include <vector>

 namespace xgboost {
-
-struct PathElement;  // forward declaration
-
 class Json;
+
+#if defined(XGBOOST_USE_HIP)
+#define XGBOOST_NODISCARD
+#else
+#define XGBOOST_NODISCARD [[nodiscard]]
+#endif
 // FIXME(trivialfis): Once binary IO is gone, make this parameter internal as it should
 // not be configured by users.
 /*! \brief meta parameters of the tree */
 struct TreeParam : public dmlc::Parameter<TreeParam> {
  /*! \brief (Deprecated) number of start root */
-  int deprecated_num_roots;
+  int deprecated_num_roots{1};
  /*! \brief total number of nodes */
-  int num_nodes;
+  int num_nodes{1};
  /*!\brief number of deleted nodes */
-  int num_deleted;
+  int num_deleted{0};
  /*! \brief maximum depth, this is a statistics of the tree */
-  int deprecated_max_depth;
+  int deprecated_max_depth{0};
  /*! \brief number of features used for tree construction */
-  bst_feature_t num_feature;
+  bst_feature_t num_feature{0};
  /*!
   * \brief leaf vector size, used for vector tree
   * used to store more than one dimensional information in tree
   */
-  int size_leaf_vector;
+  bst_target_t size_leaf_vector{1};
  /*! \brief reserved part, make sure alignment works for 64bit */
  int reserved[31];
  /*! \brief constructor */
  TreeParam() {
    // assert compact alignment
-    static_assert(sizeof(TreeParam) == (31 + 6) * sizeof(int),
-                  "TreeParam: 64 bit align");
-    std::memset(this, 0, sizeof(TreeParam));
-    num_nodes = 1;
-    deprecated_num_roots = 1;
+    static_assert(sizeof(TreeParam) == (31 + 6) * sizeof(int), "TreeParam: 64 bit align");
+    std::memset(reserved, 0, sizeof(reserved));
  }

  // Swap byte order for all fields. Useful for transporting models between machines with different
  // endianness (big endian vs little endian)
-  inline TreeParam ByteSwap() const {
+  XGBOOST_NODISCARD TreeParam ByteSwap() const {
    TreeParam x = *this;
    dmlc::ByteSwap(&x.deprecated_num_roots, sizeof(x.deprecated_num_roots), 1);
    dmlc::ByteSwap(&x.num_nodes, sizeof(x.num_nodes), 1);
@ -80,17 +82,18 @@ struct TreeParam : public dmlc::Parameter<TreeParam> {
    // other arguments are set by the algorithm.
    DMLC_DECLARE_FIELD(num_nodes).set_lower_bound(1).set_default(1);
    DMLC_DECLARE_FIELD(num_feature)
+        .set_default(0)
        .describe("Number of features used in tree construction.");
-    DMLC_DECLARE_FIELD(num_deleted);
-    DMLC_DECLARE_FIELD(size_leaf_vector).set_lower_bound(0).set_default(0)
+    DMLC_DECLARE_FIELD(num_deleted).set_default(0);
+    DMLC_DECLARE_FIELD(size_leaf_vector)
+        .set_lower_bound(0)
+        .set_default(1)
        .describe("Size of leaf vector, reserved for vector tree");
  }

  bool operator==(const TreeParam& b) const {
-    return num_nodes == b.num_nodes &&
-           num_deleted == b.num_deleted &&
-           num_feature == b.num_feature &&
-           size_leaf_vector == b.size_leaf_vector;
+    return num_nodes == b.num_nodes && num_deleted == b.num_deleted &&
+           num_feature == b.num_feature && size_leaf_vector == b.size_leaf_vector;
  }
 };

@ -114,7 +117,7 @@ struct RTreeNodeStat {
  }
  // Swap byte order for all fields. Useful for transporting models between machines with different
  // endianness (big endian vs little endian)
-  inline RTreeNodeStat ByteSwap() const {
+  XGBOOST_NODISCARD RTreeNodeStat ByteSwap() const {
    RTreeNodeStat x = *this;
    dmlc::ByteSwap(&x.loss_chg, sizeof(x.loss_chg), 1);
    dmlc::ByteSwap(&x.sum_hess, sizeof(x.sum_hess), 1);
@ -124,16 +127,45 @@ struct RTreeNodeStat {
  }
 };

-/*!
+/**
+ * \brief Helper for defining copyable data structure that contains unique pointers.
+ */
+template <typename T>
+class CopyUniquePtr {
+  std::unique_ptr<T> ptr_{nullptr};
+
+ public:
+  CopyUniquePtr() = default;
+  CopyUniquePtr(CopyUniquePtr const& that) {
+    ptr_.reset(nullptr);
+    if (that.ptr_) {
+      ptr_ = std::make_unique<T>(*that);
+    }
+  }
+  T* get() const noexcept { return ptr_.get(); }  // NOLINT
+
+  T& operator*() { return *ptr_; }
+  T* operator->() noexcept { return this->get(); }
+
+  T const& operator*() const { return *ptr_; }
+  T const* operator->() const noexcept { return this->get(); }
+
+  explicit operator bool() const { return static_cast<bool>(ptr_); }
+  bool operator!() const { return !ptr_; }
+  void reset(T* ptr) { ptr_.reset(ptr); }  // NOLINT
+};
+
+/**
 * \brief define regression tree to be the most common tree model.
+ *
 *  This is the data structure used in xgboost's major tree models.
 */
 class RegTree : public Model {
 public:
  using SplitCondT = bst_float;
-  static constexpr bst_node_t kInvalidNodeId {-1};
+  static constexpr bst_node_t kInvalidNodeId{MultiTargetTree::InvalidNodeId()};
  static constexpr uint32_t kDeletedNodeMarker = std::numeric_limits<uint32_t>::max();
-  static constexpr bst_node_t kRoot { 0 };
+  static constexpr bst_node_t kRoot{0};

  /*! \brief tree node */
  class Node {
@ -151,51 +183,51 @@ class RegTree : public Model {
    }

    /*! \brief index of left child */
-    XGBOOST_DEVICE int LeftChild() const {
+    XGBOOST_DEVICE XGBOOST_NODISCARD int LeftChild() const {
      return this->cleft_;
    }
    /*! \brief index of right child */
-    XGBOOST_DEVICE int RightChild() const {
+    XGBOOST_DEVICE XGBOOST_NODISCARD int RightChild() const {
      return this->cright_;
    }
    /*! \brief index of default child when feature is missing */
-    XGBOOST_DEVICE int DefaultChild() const {
+    XGBOOST_DEVICE XGBOOST_NODISCARD int DefaultChild() const {
      return this->DefaultLeft() ? this->LeftChild() : this->RightChild();
    }
    /*! \brief feature index of split condition */
-    XGBOOST_DEVICE unsigned SplitIndex() const {
+    XGBOOST_DEVICE XGBOOST_NODISCARD unsigned SplitIndex() const {
      return sindex_ & ((1U << 31) - 1U);
    }
    /*! \brief when feature is unknown, whether goes to left child */
-    XGBOOST_DEVICE bool DefaultLeft() const {
+    XGBOOST_DEVICE XGBOOST_NODISCARD bool DefaultLeft() const {
      return (sindex_ >> 31) != 0;
    }
    /*! \brief whether current node is leaf node */
-    XGBOOST_DEVICE bool IsLeaf() const {
+    XGBOOST_DEVICE XGBOOST_NODISCARD bool IsLeaf() const {
      return cleft_ == kInvalidNodeId;
    }
    /*! \return get leaf value of leaf node */
-    XGBOOST_DEVICE bst_float LeafValue() const {
+    XGBOOST_DEVICE XGBOOST_NODISCARD float LeafValue() const {
      return (this->info_).leaf_value;
    }
    /*! \return get split condition of the node */
-    XGBOOST_DEVICE SplitCondT SplitCond() const {
+    XGBOOST_DEVICE XGBOOST_NODISCARD SplitCondT SplitCond() const {
      return (this->info_).split_cond;
    }
    /*! \brief get parent of the node */
-    XGBOOST_DEVICE int Parent() const {
+    XGBOOST_DEVICE XGBOOST_NODISCARD int Parent() const {
      return parent_ & ((1U << 31) - 1);
    }
    /*! \brief whether current node is left child */
-    XGBOOST_DEVICE bool IsLeftChild() const {
+    XGBOOST_DEVICE XGBOOST_NODISCARD bool IsLeftChild() const {
      return (parent_ & (1U << 31)) != 0;
    }
    /*! \brief whether this node is deleted */
-    XGBOOST_DEVICE bool IsDeleted() const {
+    XGBOOST_DEVICE XGBOOST_NODISCARD bool IsDeleted() const {
      return sindex_ == kDeletedNodeMarker;
    }
    /*! \brief whether current node is root */
-    XGBOOST_DEVICE bool IsRoot() const { return parent_ == kInvalidNodeId; }
+    XGBOOST_DEVICE XGBOOST_NODISCARD bool IsRoot() const { return parent_ == kInvalidNodeId; }
    /*!
     * \brief set the left child
     * \param nid node id to right child
@ -252,7 +284,7 @@ class RegTree : public Model {
             info_.leaf_value == b.info_.leaf_value;
    }

-    inline Node ByteSwap() const {
+    XGBOOST_NODISCARD Node ByteSwap() const {
      Node x = *this;
      dmlc::ByteSwap(&x.parent_, sizeof(x.parent_), 1);
      dmlc::ByteSwap(&x.cleft_, sizeof(x.cleft_), 1);
@ -312,19 +344,28 @@ class RegTree : public Model {

  /*! \brief model parameter */
  TreeParam param;
-  /*! \brief constructor */
  RegTree() {
-    param.num_nodes = 1;
-    param.num_deleted = 0;
+    param.Init(Args{});
    nodes_.resize(param.num_nodes);
    stats_.resize(param.num_nodes);
    split_types_.resize(param.num_nodes, FeatureType::kNumerical);
    split_categories_segments_.resize(param.num_nodes);
-    for (int i = 0; i < param.num_nodes; i ++) {
+    for (int i = 0; i < param.num_nodes; i++) {
      nodes_[i].SetLeaf(0.0f);
      nodes_[i].SetParent(kInvalidNodeId);
    }
  }
+  /**
+   * \brief Constructor that initializes the tree model with shape.
+   */
+  explicit RegTree(bst_target_t n_targets, bst_feature_t n_features) : RegTree{} {
+    param.num_feature = n_features;
+    param.size_leaf_vector = n_targets;
+    if (n_targets > 1) {
+      this->p_mt_tree_.reset(new MultiTargetTree{&param});
+    }
+  }
+
  /*! \brief get node given nid */
  Node& operator[](int nid) {
    return nodes_[nid];
@ -335,17 +376,17 @@ class RegTree : public Model {
  }

  /*! \brief get const reference to nodes */
-  const std::vector<Node>& GetNodes() const { return nodes_; }
+  XGBOOST_NODISCARD const std::vector<Node>& GetNodes() const { return nodes_; }

  /*! \brief get const reference to stats */
-  const std::vector<RTreeNodeStat>& GetStats() const { return stats_; }
+  XGBOOST_NODISCARD const std::vector<RTreeNodeStat>& GetStats() const { return stats_; }

  /*! \brief get node statistics given nid */
  RTreeNodeStat& Stat(int nid) {
    return stats_[nid];
  }
  /*! \brief get node statistics given nid */
-  const RTreeNodeStat& Stat(int nid) const {
+  XGBOOST_NODISCARD const RTreeNodeStat& Stat(int nid) const {
    return stats_[nid];
  }

@ -398,7 +439,7 @@ class RegTree : public Model {
   *
   * \param b The other tree.
   */
-  bool Equal(const RegTree& b) const;
+  XGBOOST_NODISCARD bool Equal(const RegTree& b) const;

  /**
   * \brief Expands a leaf node into two additional leaf nodes.
@ -424,6 +465,11 @@ class RegTree : public Model {
                  float right_sum,
                  bst_node_t leaf_right_child = kInvalidNodeId);

+  void ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond, bool default_left,
+                  linalg::VectorView<float const> base_weight,
+                  linalg::VectorView<float const> left_weight,
+                  linalg::VectorView<float const> right_weight);
+
  /**
   * \brief Expands a leaf node with categories
   *
@ -445,15 +491,27 @@ class RegTree : public Model {
                         bst_float right_leaf_weight, bst_float loss_change, float sum_hess,
                         float left_sum, float right_sum);

-  bool HasCategoricalSplit() const {
+  XGBOOST_NODISCARD bool HasCategoricalSplit() const {
    return !split_categories_.empty();
  }
+  /**
+   * \brief Whether this is a multi-target tree.
+   */
+  XGBOOST_NODISCARD bool IsMultiTarget() const { return static_cast<bool>(p_mt_tree_); }
+  XGBOOST_NODISCARD bst_target_t NumTargets() const { return param.size_leaf_vector; }
+  XGBOOST_NODISCARD auto GetMultiTargetTree() const {
+    CHECK(IsMultiTarget());
+    return p_mt_tree_.get();
+  }

  /*!
   * \brief get current depth
   * \param nid node id
   */
-  int GetDepth(int nid) const {
+  XGBOOST_NODISCARD std::int32_t GetDepth(bst_node_t nid) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->Depth(nid);
+    }
    int depth = 0;
    while (!nodes_[nid].IsRoot()) {
      ++depth;
@ -461,12 +519,16 @@ class RegTree : public Model {
    }
    return depth;
  }
+  void SetLeaf(bst_node_t nidx, linalg::VectorView<float const> weight) {
+    CHECK(IsMultiTarget());
+    return this->p_mt_tree_->SetLeaf(nidx, weight);
+  }

  /*!
   * \brief get maximum depth
   * \param nid node id
   */
-  int MaxDepth(int nid) const {
+  XGBOOST_NODISCARD int MaxDepth(int nid) const {
    if (nodes_[nid].IsLeaf()) return 0;
    return std::max(MaxDepth(nodes_[nid].LeftChild())+1,
                     MaxDepth(nodes_[nid].RightChild())+1);
@ -480,13 +542,13 @@ class RegTree : public Model {
  }

  /*! \brief number of extra nodes besides the root */
-  int NumExtraNodes() const {
+  XGBOOST_NODISCARD int NumExtraNodes() const {
    return param.num_nodes - 1 - param.num_deleted;
  }

  /* \brief Count number of leaves in tree. */
-  bst_node_t GetNumLeaves() const;
-  bst_node_t GetNumSplitNodes() const;
+  XGBOOST_NODISCARD bst_node_t GetNumLeaves() const;
+  XGBOOST_NODISCARD bst_node_t GetNumSplitNodes() const;

  /*!
   * \brief dense feature vector that can be taken by RegTree
@ -513,20 +575,20 @@ class RegTree : public Model {
     * \brief returns the size of the feature vector
     * \return the size of the feature vector
     */
-    size_t Size() const;
+    XGBOOST_NODISCARD size_t Size() const;
    /*!
     * \brief get ith value
     * \param i feature index.
     * \return the i-th feature value
     */
-    bst_float GetFvalue(size_t i) const;
+    XGBOOST_NODISCARD bst_float GetFvalue(size_t i) const;
    /*!
     * \brief check whether i-th entry is missing
     * \param i feature index.
     * \return whether i-th value is missing.
     */
-    bool IsMissing(size_t i) const;
-    bool HasMissing() const;
+    XGBOOST_NODISCARD bool IsMissing(size_t i) const;
+    XGBOOST_NODISCARD bool HasMissing() const;


   private:
@ -557,56 +619,123 @@ class RegTree : public Model {
   * \param format the format to dump the model in
   * \return the string of dumped model
   */
-  std::string DumpModel(const FeatureMap& fmap,
-                        bool with_stats,
+  XGBOOST_NODISCARD std::string DumpModel(const FeatureMap& fmap, bool with_stats,
                                      std::string format) const;
  /*!
   * \brief Get split type for a node.
   * \param nidx Index of node.
   * \return The type of this split.  For leaf node it's always kNumerical.
   */
-  FeatureType NodeSplitType(bst_node_t nidx) const {
-    return split_types_.at(nidx);
-  }
+  XGBOOST_NODISCARD FeatureType NodeSplitType(bst_node_t nidx) const { return split_types_.at(nidx); }
  /*!
   * \brief Get split types for all nodes.
   */
-  std::vector<FeatureType> const &GetSplitTypes() const { return split_types_; }
-  common::Span<uint32_t const> GetSplitCategories() const { return split_categories_; }
+  XGBOOST_NODISCARD std::vector<FeatureType> const& GetSplitTypes() const {
+    return split_types_;
+  }
+  XGBOOST_NODISCARD common::Span<uint32_t const> GetSplitCategories() const {
+    return split_categories_;
+  }
  /*!
   * \brief Get the bit storage for categories
   */
-  common::Span<uint32_t const> NodeCats(bst_node_t nidx) const {
+  XGBOOST_NODISCARD common::Span<uint32_t const> NodeCats(bst_node_t nidx) const {
    auto node_ptr = GetCategoriesMatrix().node_ptr;
    auto categories = GetCategoriesMatrix().categories;
    auto segment = node_ptr[nidx];
    auto node_cats = categories.subspan(segment.beg, segment.size);
    return node_cats;
  }
-  auto const& GetSplitCategoriesPtr() const { return split_categories_segments_; }
-
-  // The fields of split_categories_segments_[i] are set such that
-  // the range split_categories_[beg:(beg+size)] stores the bitset for
-  // the matching categories for the i-th node.
-  struct Segment {
-    size_t beg {0};
-    size_t size {0};
-  };
+  XGBOOST_NODISCARD auto const& GetSplitCategoriesPtr() const { return split_categories_segments_; }

+  /**
+   * \brief CSR-like matrix for categorical splits.
+   *
+   * The fields of split_categories_segments_[i] are set such that the range
+   * node_ptr[beg:(beg+size)] stores the bitset for the matching categories for the
+   * i-th node.
+   */
  struct CategoricalSplitMatrix {
+    struct Segment {
+      std::size_t beg{0};
+      std::size_t size{0};
+    };
    common::Span<FeatureType const> split_type;
    common::Span<uint32_t const> categories;
    common::Span<Segment const> node_ptr;
  };

-  CategoricalSplitMatrix GetCategoriesMatrix() const {
+  XGBOOST_NODISCARD CategoricalSplitMatrix GetCategoriesMatrix() const {
    CategoricalSplitMatrix view;
    view.split_type = common::Span<FeatureType const>(this->GetSplitTypes());
    view.categories = this->GetSplitCategories();
-    view.node_ptr = common::Span<Segment const>(split_categories_segments_);
+    view.node_ptr = common::Span<CategoricalSplitMatrix::Segment const>(split_categories_segments_);
    return view;
  }

+  XGBOOST_NODISCARD bst_feature_t SplitIndex(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->SplitIndex(nidx);
+    }
+    return (*this)[nidx].SplitIndex();
+  }
+  XGBOOST_NODISCARD float SplitCond(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->SplitCond(nidx);
+    }
+    return (*this)[nidx].SplitCond();
+  }
+  XGBOOST_NODISCARD bool DefaultLeft(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->DefaultLeft(nidx);
+    }
+    return (*this)[nidx].DefaultLeft();
+  }
+  XGBOOST_NODISCARD bool IsRoot(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return nidx == kRoot;
+    }
+    return (*this)[nidx].IsRoot();
+  }
+  XGBOOST_NODISCARD bool IsLeaf(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->IsLeaf(nidx);
+    }
+    return (*this)[nidx].IsLeaf();
+  }
+  XGBOOST_NODISCARD bst_node_t Parent(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->Parent(nidx);
+    }
+    return (*this)[nidx].Parent();
+  }
+  XGBOOST_NODISCARD bst_node_t LeftChild(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->LeftChild(nidx);
+    }
+    return (*this)[nidx].LeftChild();
+  }
+  XGBOOST_NODISCARD bst_node_t RightChild(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->RightChild(nidx);
+    }
+    return (*this)[nidx].RightChild();
+  }
+  XGBOOST_NODISCARD bool IsLeftChild(bst_node_t nidx) const {
+    if (IsMultiTarget()) {
+      CHECK_NE(nidx, kRoot);
+      auto p = this->p_mt_tree_->Parent(nidx);
+      return nidx == this->p_mt_tree_->LeftChild(p);
+    }
+    return (*this)[nidx].IsLeftChild();
+  }
+  XGBOOST_NODISCARD bst_node_t Size() const {
+    if (IsMultiTarget()) {
+      return this->p_mt_tree_->Size();
+    }
+    return this->nodes_.size();
+  }
+
 private:
  template <bool typed>
  void LoadCategoricalSplit(Json const& in);
@ -622,8 +751,9 @@ class RegTree : public Model {
  // Categories for each internal node.
  std::vector<uint32_t> split_categories_;
  // Ptr to split categories of each node.
-  std::vector<Segment> split_categories_segments_;
-
+  std::vector<CategoricalSplitMatrix::Segment> split_categories_segments_;
+  // ptr to multi-target tree with vector leaf.
+  CopyUniquePtr<MultiTargetTree> p_mt_tree_;
  // allocate a new node,
  // !!!!!! NOTE: may cause BUG here, nodes.resize
  bst_node_t AllocNode() {
@ -703,5 +833,10 @@ inline bool RegTree::FVec::IsMissing(size_t i) const {
 inline bool RegTree::FVec::HasMissing() const {
  return has_missing_;
 }
+
+// Multi-target tree not yet implemented error
+inline StringView MTNotImplemented() {
+  return " support for multi-target tree is not yet implemented.";
+}
 }  // namespace xgboost
 #endif  // XGBOOST_TREE_MODEL_H_
--- a/include/xgboost/tree_updater.h
+++ b/include/xgboost/tree_updater.h
@ -1,5 +1,5 @@
-/*!
- * Copyright 2014-2022 by XGBoost Contributors
+/**
+ * Copyright 2014-2023 by XGBoost Contributors
 * \file tree_updater.h
 * \brief General primitive for tree learning,
 *   Updating a collection of trees given the information.
@ -9,19 +9,17 @@
 #define XGBOOST_TREE_UPDATER_H_

 #include <dmlc/registry.h>
-#include <xgboost/base.h>
-#include <xgboost/context.h>
-#include <xgboost/data.h>
-#include <xgboost/host_device_vector.h>
-#include <xgboost/linalg.h>
-#include <xgboost/model.h>
-#include <xgboost/task.h>
-#include <xgboost/tree_model.h>
+#include <xgboost/base.h>                // for Args, GradientPair
+#include <xgboost/data.h>                // DMatrix
+#include <xgboost/host_device_vector.h>  // for HostDeviceVector
+#include <xgboost/linalg.h>              // for VectorView
+#include <xgboost/model.h>               // for Configurable
+#include <xgboost/span.h>                // for Span
+#include <xgboost/tree_model.h>          // for RegTree

-#include <functional>
-#include <string>
-#include <utility>
-#include <vector>
+#include <functional>                    // for function
+#include <string>                        // for string
+#include <vector>                        // for vector

 namespace xgboost {
 namespace tree {
@ -30,8 +28,9 @@ struct TrainParam;

 class Json;
 struct Context;
+struct ObjInfo;

-/*!
+/**
 * \brief interface of tree update module, that performs update of a tree.
 */
 class TreeUpdater : public Configurable {
@ -53,12 +52,12 @@ class TreeUpdater : public Configurable {
   *  used for modifying existing trees (like `prune`).  Return true if it can modify
   *  existing trees.
   */
-  virtual bool CanModifyTree() const { return false; }
+  [[nodiscard]] virtual bool CanModifyTree() const { return false; }
  /*!
   * \brief Wether the out_position in `Update` is valid. This determines whether adaptive
   *        tree can be used.
   */
-  virtual bool HasNodePosition() const { return false; }
+  [[nodiscard]] virtual bool HasNodePosition() const { return false; }
  /**
   * \brief perform update to the tree models
   *
@ -91,14 +90,15 @@ class TreeUpdater : public Configurable {
    return false;
  }

-  virtual char const* Name() const = 0;
+  [[nodiscard]] virtual char const* Name() const = 0;

-  /*!
+  /**
   * \brief Create a tree updater given name
   * \param name Name of the tree updater.
   * \param ctx A global runtime parameter
+   * \param task Infomation about the objective.
   */
-  static TreeUpdater* Create(const std::string& name, Context const* ctx, ObjInfo task);
+  static TreeUpdater* Create(const std::string& name, Context const* ctx, ObjInfo const* task);
 };

 /*!
@ -106,7 +106,7 @@ class TreeUpdater : public Configurable {
 */
 struct TreeUpdaterReg
    : public dmlc::FunctionRegEntryBase<
-          TreeUpdaterReg, std::function<TreeUpdater*(Context const* ctx, ObjInfo task)>> {};
+          TreeUpdaterReg, std::function<TreeUpdater*(Context const* ctx, ObjInfo const* task)>> {};

 /*!
 * \brief Macro to register tree updater.
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@ -51,7 +51,7 @@
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
-            <version>3.2.4</version>
+            <version>3.3.4</version>
        </dependency>
    </dependencies>

--- a/plugin/federated/README.md
+++ b/plugin/federated/README.md
@ -15,7 +15,6 @@ mkdir build
 cd build
 cmake .. -GNinja \
 -DPLUGIN_FEDERATED=ON \
- -DBUILD_WITH_CUDA_CUB=ON \
 -DUSE_CUDA=ON\
 -DUSE_NCCL=ON
 ninja
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit bec752a4f35be8d15836f8643d78134019fbbdaf
+Subproject commit dced1881e4aa163ba86e1c236d4b6cdb9892d783
--- a/src/c_api/c_api_utils.h
+++ b/src/c_api/c_api_utils.h
@ -13,6 +13,7 @@

 #include "xgboost/c_api.h"
 #include "xgboost/data.h"         // DMatrix
+#include "xgboost/feature_map.h"  // for FeatureMap
 #include "xgboost/json.h"
 #include "xgboost/learner.h"
 #include "xgboost/linalg.h"       // ArrayInterfaceHandler
--- a/src/common/error_msg.h
+++ b/src/common/error_msg.h
@ -0,0 +1,24 @@
+/**
+ * Copyright 2023 by XGBoost contributors
+ *
+ * \brief Common error message for various checks.
+ */
+#ifndef XGBOOST_COMMON_ERROR_MSG_H_
+#define XGBOOST_COMMON_ERROR_MSG_H_
+
+#include "xgboost/string_view.h"  // for StringView
+
+namespace xgboost::error {
+constexpr StringView GroupWeight() {
+  return "Size of weight must equal to the number of query groups when ranking group is used.";
+}
+
+constexpr StringView GroupSize() {
+  return "Invalid query group structure. The number of rows obtained from group doesn't equal to ";
+}
+
+constexpr StringView LabelScoreSize() {
+  return "The size of label doesn't match the size of prediction.";
+}
+}  // namespace xgboost::error
+#endif  // XGBOOST_COMMON_ERROR_MSG_H_
--- a/src/common/host_device_vector.cc
+++ b/src/common/host_device_vector.cc
@ -1,5 +1,5 @@
-/*!
- * Copyright 2017 XGBoost contributors
+/**
+ * Copyright 2017-2023 by XGBoost contributors
 */
 #if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)

@ -179,7 +179,6 @@ template class HostDeviceVector<FeatureType>;
 template class HostDeviceVector<Entry>;
 template class HostDeviceVector<uint64_t>;  // bst_row_t
 template class HostDeviceVector<uint32_t>;  // bst_feature_t
-template class HostDeviceVector<RegTree::Segment>;

 #if defined(__APPLE__) || defined(__EMSCRIPTEN__)
 /*
--- a/src/common/host_device_vector.cu
+++ b/src/common/host_device_vector.cu
@ -1,7 +1,6 @@
-/*!
- * Copyright 2017 XGBoost contributors
+/**
+ * Copyright 2017-2023 by XGBoost contributors
 */
-
 #include <thrust/fill.h>
 #include <thrust/device_ptr.h>

@ -457,7 +456,7 @@ template class HostDeviceVector<Entry>;
 template class HostDeviceVector<uint64_t>;  // bst_row_t
 template class HostDeviceVector<uint32_t>;  // bst_feature_t
 template class HostDeviceVector<RegTree::Node>;
-template class HostDeviceVector<RegTree::Segment>;
+template class HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>;
 template class HostDeviceVector<RTreeNodeStat>;

 #if defined(__APPLE__)
--- a/src/common/ranking_utils.cc
+++ b/src/common/ranking_utils.cc
@ -3,15 +3,28 @@
 */
 #include "ranking_utils.h"

-#include <cstdint>                // std::uint32_t
-#include <sstream>                // std::ostringstream
-#include <string>                 // std::string,std::sscanf
+#include <algorithm>          // for copy_n, max, min, none_of, all_of
+#include <cstddef>            // for size_t
+#include <cstdio>             // for sscanf
+#include <exception>          // for exception
+#include <functional>         // for greater
+#include <iterator>           // for reverse_iterator
+#include <string>             // for char_traits, string

-#include "xgboost/string_view.h"  // StringView
+#include "algorithm.h"        // for ArgSort
+#include "linalg_op.h"        // for cbegin, cend
+#include "optional_weight.h"  // for MakeOptionalWeights
+#include "threading_utils.h"  // for ParallelFor
+#include "xgboost/base.h"     // for bst_group_t
+#include "xgboost/context.h"  // for Context
+#include "xgboost/data.h"     // for MetaInfo
+#include "xgboost/linalg.h"   // for All, TensorView, Range, Tensor, Vector
+#include "xgboost/logging.h"  // for Error, LogCheck_EQ, CHECK_EQ

-namespace xgboost {
-namespace ltr {
-std::string MakeMetricName(StringView name, StringView param, std::uint32_t* topn, bool* minus) {
+namespace xgboost::ltr {
+DMLC_REGISTER_PARAMETER(LambdaRankParam);
+
+std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
  std::string out_name;
  if (!param.empty()) {
    std::ostringstream os;
@ -30,5 +43,18 @@ std::string MakeMetricName(StringView name, StringView param, std::uint32_t* top
  }
  return out_name;
 }
-}  // namespace ltr
-}  // namespace xgboost
+
+std::string MakeMetricName(StringView name, position_t topn, bool minus) {
+  std::ostringstream ss;
+  if (topn == LambdaRankParam::NotSet()) {
+    ss << name;
+  } else {
+    ss << name << "@" << topn;
+  }
+  if (minus) {
+    ss << "-";
+  }
+  std::string out_name = ss.str();
+  return out_name;
+}
+}  // namespace xgboost::ltr
--- a/src/common/ranking_utils.h
+++ b/src/common/ranking_utils.h
@ -3,17 +3,131 @@
 */
 #ifndef XGBOOST_COMMON_RANKING_UTILS_H_
 #define XGBOOST_COMMON_RANKING_UTILS_H_
+#include <algorithm>                     // for min
+#include <cmath>                         // for log2, fabs, floor
+#include <cstddef>                       // for size_t
+#include <cstdint>                       // for uint32_t, uint8_t, int32_t
+#include <limits>                        // for numeric_limits
+#include <string>                        // for char_traits, string
+#include <vector>                        // for vector

-#include <cstddef>                // std::size_t
-#include <cstdint>                // std::uint32_t
-#include <string>                 // std::string
+#include "./math.h"                      // for CloseTo
+#include "dmlc/parameter.h"              // for FieldEntry, DMLC_DECLARE_FIELD
+#include "error_msg.h"                   // for GroupWeight, GroupSize
+#include "xgboost/base.h"                // for XGBOOST_DEVICE, bst_group_t
+#include "xgboost/context.h"             // for Context
+#include "xgboost/data.h"                // for MetaInfo
+#include "xgboost/host_device_vector.h"  // for HostDeviceVector
+#include "xgboost/linalg.h"              // for Vector, VectorView, Tensor
+#include "xgboost/logging.h"             // for LogCheck_EQ, CHECK_EQ, CHECK
+#include "xgboost/parameter.h"           // for XGBoostParameter
+#include "xgboost/span.h"                // for Span
+#include "xgboost/string_view.h"         // for StringView

-#include "xgboost/string_view.h"  // StringView
-
-namespace xgboost {
-namespace ltr {
+namespace xgboost::ltr {
 /**
- * \brief Construct name for ranking metric given parameters.
+ * \brief Relevance degree
+ */
+using rel_degree_t = std::uint32_t;  // NOLINT
+/**
+ * \brief top-k position
+ */
+using position_t = std::uint32_t;  // NOLINT
+
+enum class PairMethod : std::int32_t {
+  kTopK = 0,
+  kMean = 1,
+};
+}  // namespace xgboost::ltr
+
+DECLARE_FIELD_ENUM_CLASS(xgboost::ltr::PairMethod);
+
+namespace xgboost::ltr {
+struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
+ private:
+  static constexpr position_t DefaultK() { return 32; }
+  static constexpr position_t DefaultSamplePairs() { return 1; }
+
+ protected:
+  // pairs
+  // should be accessed by getter for auto configuration.
+  // nolint so that we can keep the string name.
+  PairMethod lambdarank_pair_method{PairMethod::kMean};  // NOLINT
+  std::size_t lambdarank_num_pair_per_sample{NotSet()};  // NOLINT
+
+ public:
+  static constexpr position_t NotSet() { return std::numeric_limits<position_t>::max(); }
+
+  // unbiased
+  bool lambdarank_unbiased{false};
+  double lambdarank_bias_norm{2.0};
+  // ndcg
+  bool ndcg_exp_gain{true};
+
+  bool operator==(LambdaRankParam const& that) const {
+    return lambdarank_pair_method == that.lambdarank_pair_method &&
+           lambdarank_num_pair_per_sample == that.lambdarank_num_pair_per_sample &&
+           lambdarank_unbiased == that.lambdarank_unbiased &&
+           lambdarank_bias_norm == that.lambdarank_bias_norm && ndcg_exp_gain == that.ndcg_exp_gain;
+  }
+  bool operator!=(LambdaRankParam const& that) const { return !(*this == that); }
+
+  [[nodiscard]] double Regularizer() const { return 1.0 / (1.0 + this->lambdarank_bias_norm); }
+
+  /**
+   * \brief Get number of pairs for each sample
+   */
+  [[nodiscard]] position_t NumPair() const {
+    if (lambdarank_num_pair_per_sample == NotSet()) {
+      switch (lambdarank_pair_method) {
+        case PairMethod::kMean:
+          return DefaultSamplePairs();
+        case PairMethod::kTopK:
+          return DefaultK();
+      }
+    } else {
+      return lambdarank_num_pair_per_sample;
+    }
+    LOG(FATAL) << "Unreachable.";
+    return 0;
+  }
+
+  [[nodiscard]] bool HasTruncation() const { return lambdarank_pair_method == PairMethod::kTopK; }
+
+  // Used for evaluation metric and cache initialization, iterate through top-k or the whole list
+  [[nodiscard]] auto TopK() const {
+    if (HasTruncation()) {
+      return NumPair();
+    } else {
+      return NotSet();
+    }
+  }
+
+  DMLC_DECLARE_PARAMETER(LambdaRankParam) {
+    DMLC_DECLARE_FIELD(lambdarank_pair_method)
+        .set_default(PairMethod::kMean)
+        .add_enum("mean", PairMethod::kMean)
+        .add_enum("topk", PairMethod::kTopK)
+        .describe("Method for constructing pairs.");
+    DMLC_DECLARE_FIELD(lambdarank_num_pair_per_sample)
+        .set_default(NotSet())
+        .set_lower_bound(1)
+        .describe("Number of pairs for each sample in the list.");
+    DMLC_DECLARE_FIELD(lambdarank_unbiased)
+        .set_default(false)
+        .describe("Unbiased lambda mart. Use IPW to debias click position");
+    DMLC_DECLARE_FIELD(lambdarank_bias_norm)
+        .set_default(2.0)
+        .set_lower_bound(0.0)
+        .describe("Lp regularization for unbiased lambdarank.");
+    DMLC_DECLARE_FIELD(ndcg_exp_gain)
+        .set_default(true)
+        .describe("When set to true, the label gain is 2^rel - 1, otherwise it's rel.");
+  }
+};
+
+/**
+ * \brief Parse name for ranking metric given parameters.
 *
 * \param [in] name   Null terminated string for metric name
 * \param [in] param  Null terminated string for parameter like the `3-` in `ndcg@3-`.
@ -23,7 +137,11 @@ namespace ltr {
 *
 * \return The name of the metric.
 */
-std::string MakeMetricName(StringView name, StringView param, std::uint32_t* topn, bool* minus);
-}  // namespace ltr
-}  // namespace xgboost
+std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus);
+
+/**
+ * \brief Parse name for ranking metric given parameters.
+ */
+std::string MakeMetricName(StringView name, position_t topn, bool minus);
+}  // namespace xgboost::ltr
 #endif  // XGBOOST_COMMON_RANKING_UTILS_H_
--- a/src/common/threading_utils.cuh
+++ b/src/common/threading_utils.cuh
@ -49,20 +49,18 @@ XGBOOST_DEVICE inline std::size_t DiscreteTrapezoidArea(std::size_t n, std::size
 * with h <= n
 */
 template <typename U>
-inline size_t
-SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
-                          xgboost::common::Span<size_t> out_group_threads_ptr,
-                          size_t h) {
+std::size_t SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
+                                      xgboost::common::Span<std::size_t> out_group_threads_ptr,
+                                      std::size_t h) {
  CHECK_GE(group_ptr.size(), 1);
  CHECK_EQ(group_ptr.size(), out_group_threads_ptr.size());
-  dh::LaunchN(
-      group_ptr.size(), [=] XGBOOST_DEVICE(size_t idx) {
+  dh::LaunchN(group_ptr.size(), [=] XGBOOST_DEVICE(std::size_t idx) {
    if (idx == 0) {
      out_group_threads_ptr[0] = 0;
      return;
    }

-        size_t cnt = static_cast<size_t>(group_ptr[idx] - group_ptr[idx - 1]);
+    std::size_t cnt = static_cast<std::size_t>(group_ptr[idx] - group_ptr[idx - 1]);
    out_group_threads_ptr[idx] = DiscreteTrapezoidArea(cnt, h);
  });
  dh::InclusiveSum(out_group_threads_ptr.data(), out_group_threads_ptr.data(),
@ -70,12 +68,10 @@ SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
  size_t total = 0;

 #if defined(XGBOOST_USE_HIP)
-  dh::safe_cuda(hipMemcpy(
-      &total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
+  dh::safe_cuda(hipMemcpy(&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
      sizeof(total), hipMemcpyDeviceToHost));
 #elif defined(XGBOOST_USE_CUDA)
-  dh::safe_cuda(cudaMemcpy(
-      &total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
+  dh::safe_cuda(cudaMemcpy(&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
      sizeof(total), cudaMemcpyDeviceToHost));
 #endif

@ -85,8 +81,8 @@ SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
 /**
 * Called inside kernel to obtain coordinate from trapezoid grid.
 */
-XGBOOST_DEVICE inline void UnravelTrapeziodIdx(size_t i_idx, size_t n,
-                                               size_t *out_i, size_t *out_j) {
+XGBOOST_DEVICE inline void UnravelTrapeziodIdx(std::size_t i_idx, std::size_t n, std::size_t *out_i,
+                                               std::size_t *out_j) {
  auto &i = *out_i;
  auto &j = *out_j;
  double idx = static_cast<double>(i_idx);
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@ -340,7 +340,7 @@ void GBTree::InitUpdater(Args const& cfg) {
  // create new updaters
  for (const std::string& pstr : ups) {
    std::unique_ptr<TreeUpdater> up(
-        TreeUpdater::Create(pstr.c_str(), ctx_, model_.learner_model_param->task));
+        TreeUpdater::Create(pstr.c_str(), ctx_, &model_.learner_model_param->task));
    up->Configure(cfg);
    updaters_.push_back(std::move(up));
  }
@ -448,7 +448,7 @@ void GBTree::LoadConfig(Json const& in) {
      LOG(WARNING) << "Changing updater from `grow_gpu_hist` to `grow_quantile_histmaker`.";
    }
    std::unique_ptr<TreeUpdater> up{
-        TreeUpdater::Create(name, ctx_, model_.learner_model_param->task)};
+        TreeUpdater::Create(name, ctx_, &model_.learner_model_param->task)};
    up->LoadConfig(kv.second);
    updaters_.push_back(std::move(up));
  }
--- a/src/learner.cc
+++ b/src/learner.cc
@ -6,54 +6,67 @@
 */
 #include "xgboost/learner.h"

-#include <dmlc/any.h>
-#include <dmlc/io.h>
-#include <dmlc/parameter.h>
-#include <dmlc/thread_local.h>
+#include <dmlc/io.h>                      // for Stream
+#include <dmlc/parameter.h>               // for FieldEntry, DMLC_DECLARE_FIELD, Parameter, DMLC...
+#include <dmlc/thread_local.h>            // for ThreadLocalStore

-#include <algorithm>
-#include <array>
-#include <atomic>
-#include <iomanip>
-#include <limits>  // std::numeric_limits
-#include <memory>
-#include <mutex>
-#include <sstream>
-#include <stack>
-#include <string>
-#include <utility>  // for as_const
-#include <vector>
+#include <algorithm>                      // for equal, max, transform, sort, find_if, all_of
+#include <array>                          // for array
+#include <atomic>                         // for atomic
+#include <cctype>                         // for isalpha, isspace
+#include <cmath>                          // for isnan, isinf
+#include <cstdint>                        // for int32_t, uint32_t, int64_t, uint64_t
+#include <cstdlib>                        // for atoi
+#include <cstring>                        // for memcpy, size_t, memset
+#include <functional>                     // for less
+#include <iomanip>                        // for operator<<, setiosflags
+#include <iterator>                       // for back_insert_iterator, distance, back_inserter
+#include <limits>                         // for numeric_limits
+#include <memory>                         // for allocator, unique_ptr, shared_ptr, operator==
+#include <mutex>                          // for mutex, lock_guard
+#include <set>                            // for set
+#include <sstream>                        // for operator<<, basic_ostream, basic_ostream::opera...
+#include <stack>                          // for stack
+#include <string>                         // for basic_string, char_traits, operator<, string
+#include <system_error>                   // for errc
+#include <tuple>                          // for get
+#include <unordered_map>                  // for operator!=, unordered_map
+#include <utility>                        // for pair, as_const, move, swap
+#include <vector>                         // for vector

-#include "collective/communicator-inl.h"
-#include "common/api_entry.h"  // XGBAPIThreadLocalEntry
-#include "common/charconv.h"
-#include "common/common.h"
-#include "common/io.h"
-#include "common/observer.h"
-#include "common/random.h"
-#include "common/threading_utils.h"
-#include "common/timer.h"
-#include "common/version.h"
-#include "xgboost/base.h"
-#include "xgboost/c_api.h"
-#include "xgboost/context.h"  // Context
-#include "xgboost/data.h"
-#include "xgboost/feature_map.h"
-#include "xgboost/gbm.h"
-#include "xgboost/host_device_vector.h"
-#include "xgboost/json.h"
-#include "xgboost/logging.h"
-#include "xgboost/metric.h"
-#include "xgboost/model.h"
-#include "xgboost/objective.h"
-#include "xgboost/parameter.h"
-#include "xgboost/predictor.h"
+#include "collective/communicator-inl.h"  // for Allreduce, Broadcast, GetRank, IsDistributed
+#include "collective/communicator.h"      // for Operation
+#include "common/api_entry.h"             // for XGBAPIThreadLocalEntry
+#include "common/charconv.h"              // for to_chars, to_chars_result, NumericLimits, from_...
+#include "common/common.h"                // for ToString, Split
+#include "common/io.h"                    // for PeekableInStream, ReadAll, FixedSizeStream, Mem...
+#include "common/observer.h"              // for TrainingObserver
+#include "common/random.h"                // for GlobalRandom
+#include "common/timer.h"                 // for Monitor
+#include "common/version.h"               // for Version
+#include "dmlc/endian.h"                  // for ByteSwap, DMLC_IO_NO_ENDIAN_SWAP
+#include "xgboost/base.h"                 // for Args, bst_float, GradientPair, bst_feature_t
+#include "xgboost/context.h"              // for Context
+#include "xgboost/data.h"                 // for DMatrix, MetaInfo
+#include "xgboost/gbm.h"                  // for GradientBooster
+#include "xgboost/global_config.h"        // for GlobalConfiguration, GlobalConfigThreadLocalStore
+#include "xgboost/host_device_vector.h"   // for HostDeviceVector
+#include "xgboost/json.h"                 // for Json, get, Object, String, IsA, Array, ToJson
+#include "xgboost/linalg.h"               // for Tensor, TensorView
+#include "xgboost/logging.h"              // for CHECK, LOG, CHECK_EQ
+#include "xgboost/metric.h"               // for Metric
+#include "xgboost/objective.h"            // for ObjFunction
+#include "xgboost/parameter.h"            // for DECLARE_FIELD_ENUM_CLASS, XGBoostParameter
+#include "xgboost/predictor.h"            // for PredictionContainer, PredictionCacheEntry
+#include "xgboost/string_view.h"          // for operator<<, StringView
+#include "xgboost/task.h"                 // for ObjInfo

 namespace {
-
 const char* kMaxDeltaStepDefaultValue = "0.7";
 }  // anonymous namespace

+DECLARE_FIELD_ENUM_CLASS(xgboost::MultiStrategy);
+
 namespace xgboost {
 Learner::~Learner() = default;
 namespace {
@ -86,8 +99,10 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
  /*! \brief the version of XGBoost. */
  std::uint32_t major_version;
  std::uint32_t minor_version;
-
-  uint32_t num_target{1};
+  /**
+   * \brief Number of target variables.
+   */
+  bst_target_t num_target;
  /**
   * \brief Whether we should calculate the base score from training data.
   *
@ -113,7 +128,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
  }

  // Skip other legacy fields.
-  Json ToJson() const {
+  [[nodiscard]] Json ToJson() const {
    Json obj{Object{}};
    char floats[NumericLimits<float>::kToCharsSize];
    auto ret = to_chars(floats, floats + NumericLimits<float>::kToCharsSize, base_score);
@ -163,7 +178,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
    from_chars(str.c_str(), str.c_str() + str.size(), base_score);
  }

-  LearnerModelParamLegacy ByteSwap() const {
+  [[nodiscard]] LearnerModelParamLegacy ByteSwap() const {
    LearnerModelParamLegacy x = *this;
    dmlc::ByteSwap(&x.base_score, sizeof(x.base_score), 1);
    dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
@ -226,35 +241,38 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
    DMLC_DECLARE_FIELD(num_feature)
        .set_default(0)
        .describe(
-            "Number of features in training data,"
-            " this parameter will be automatically detected by learner.");
+            "Number of features in training data, this parameter will be automatically detected by "
+            "learner.");
    DMLC_DECLARE_FIELD(num_class).set_default(0).set_lower_bound(0).describe(
        "Number of class option for multi-class classifier. "
        " By default equals 0 and corresponds to binary classifier.");
    DMLC_DECLARE_FIELD(num_target)
        .set_default(1)
        .set_lower_bound(1)
-        .describe("Number of target for multi-target regression.");
+        .describe("Number of output targets. Can be set automatically if not specified.");
    DMLC_DECLARE_FIELD(boost_from_average)
        .set_default(true)
        .describe("Whether we should calculate the base score from training data.");
  }
 };

-LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t)
-    : num_feature{user_param.num_feature}, task{t} {
-  auto n_classes = std::max(static_cast<uint32_t>(user_param.num_class), 1u);
-  auto n_targets = user_param.num_target;
-  num_output_group = std::max(n_classes, n_targets);
-  // For version < 1.6, n_targets == 0
-  CHECK(n_classes <= 1 || n_targets <= 1)
-      << "Multi-class multi-output is not yet supported. n_classes:" << n_classes
-      << ", n_targets:" << n_targets;
+LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t,
+                                     MultiStrategy multi_strategy)
+    : num_feature{user_param.num_feature},
+      num_output_group{
+          std::max(static_cast<std::uint32_t>(user_param.num_class), user_param.num_target)},
+      task{t},
+      multi_strategy{multi_strategy} {
+  if (user_param.num_class > 1 && user_param.num_target > 1) {
+    LOG(FATAL) << "multi-target-multi-class is not yet supported. Output classes:"
+               << user_param.num_class << ", output targets:" << user_param.num_target;
+  }
 }

 LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
-                                     linalg::Tensor<float, 1> base_margin, ObjInfo t)
-    : LearnerModelParam{user_param, t} {
+                                     linalg::Tensor<float, 1> base_margin, ObjInfo t,
+                                     MultiStrategy multi_strategy)
+    : LearnerModelParam{user_param, t, multi_strategy} {
  std::swap(base_score_, base_margin);
  // Make sure read access everywhere for thread-safe prediction.
  std::as_const(base_score_).HostView();
@ -297,6 +315,7 @@ void LearnerModelParam::Copy(LearnerModelParam const& that) {
  num_feature = that.num_feature;
  num_output_group = that.num_output_group;
  task = that.task;
+  multi_strategy = that.multi_strategy;
 }

 struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
@ -306,18 +325,26 @@ struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
  // specified by users.  Move them to model parameter once we can get rid of binary IO.
  std::string booster;
  std::string objective;
+  // This is a training parameter and is not saved (nor loaded) in the model.
+  MultiStrategy multi_strategy{MultiStrategy::kComposite};

  // declare parameters
  DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
    DMLC_DECLARE_FIELD(disable_default_eval_metric)
        .set_default(false)
        .describe("Flag to disable default metric. Set to >0 to disable");
-    DMLC_DECLARE_FIELD(booster)
-        .set_default("gbtree")
-        .describe("Gradient booster used for training.");
+    DMLC_DECLARE_FIELD(booster).set_default("gbtree").describe(
+        "Gradient booster used for training.");
    DMLC_DECLARE_FIELD(objective)
        .set_default("reg:squarederror")
        .describe("Objective function used for obtaining gradient.");
+    DMLC_DECLARE_FIELD(multi_strategy)
+        .add_enum("composite", MultiStrategy::kComposite)
+        .add_enum("monolithic", MultiStrategy::kMonolithic)
+        .set_default(MultiStrategy::kComposite)
+        .describe(
+            "Strategy used for training multi-target models. `mono` means building one single tree "
+            "for all targets.");
  }
 };

@ -379,8 +406,10 @@ class LearnerConfiguration : public Learner {

    // transform to margin
    h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
+    CHECK(tparam_.GetInitialised());
    // move it to model param, which is shared with all other components.
-    learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task);
+    learner_model_param_ =
+        LearnerModelParam(Ctx(), mparam_, std::move(base_score), task, tparam_.multi_strategy);
    CHECK(learner_model_param_.Initialized());
    CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
  }
@ -748,7 +777,6 @@ class LearnerConfiguration : public Learner {
        << "0 feature is supplied.  Are you using raw Booster interface?";
    // Remove these once binary IO is gone.
    cfg_["num_feature"] = common::ToString(mparam_.num_feature);
-    cfg_["num_class"] = common::ToString(mparam_.num_class);
  }

  void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {
@ -779,9 +807,17 @@ class LearnerConfiguration : public Learner {
    if (obj_ == nullptr || tparam_.objective != old.objective) {
      obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
    }
+
+    bool has_nc {cfg_.find("num_class") != cfg_.cend()};
+    // Inject num_class into configuration.
+    // FIXME(jiamingy): Remove the duplicated parameter in softmax
+    cfg_["num_class"] = common::ToString(mparam_.num_class);
    auto& args = *p_args;
    args = {cfg_.cbegin(), cfg_.cend()};  // renew
    obj_->Configure(args);
+    if (!has_nc) {
+      cfg_.erase("num_class");
+    }
  }

  void ConfigureMetrics(Args const& args) {
@ -805,7 +841,7 @@ class LearnerConfiguration : public Learner {
  void ConfigureTargets() {
    CHECK(this->obj_);
    auto const& cache = prediction_container_.Container();
-    size_t n_targets = 1;
+    bst_target_t n_targets = 1;
    for (auto const& d : cache) {
      if (n_targets == 1) {
        n_targets = this->obj_->Targets(d.first.ptr->Info());
@ -814,7 +850,8 @@ class LearnerConfiguration : public Learner {
        CHECK(n_targets == t || 1 == t) << "Inconsistent labels.";
      }
    }
-    if (mparam_.num_target != 1) {
+
+    if (mparam_.num_target > 1) {
      CHECK(n_targets == 1 || n_targets == mparam_.num_target)
          << "Inconsistent configuration of num_target.  Configuration result from input data:"
          << n_targets << ", configuration from parameter:" << mparam_.num_target;
@ -974,9 +1011,6 @@ class LearnerIO : public LearnerConfiguration {
    if (!DMLC_IO_NO_ENDIAN_SWAP) {
      mparam_ = mparam_.ByteSwap();
    }
-    if (mparam_.num_target == 0) {
-      mparam_.num_target = 1;
-    }
    CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format";
    CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";

@ -1030,7 +1064,7 @@ class LearnerIO : public LearnerConfiguration {
                                                        : obj_->ProbToMargin(mparam_.base_score)},
                                                   {1},
                                                   Context::kCpuId},
-                          obj_->Task());
+                          obj_->Task(), tparam_.multi_strategy);

    if (attributes_.find("objective") != attributes_.cend()) {
      auto obj_str = attributes_.at("objective");
@ -1058,7 +1092,6 @@ class LearnerIO : public LearnerConfiguration {
    mparam_.major_version = std::get<0>(Version::Self());
    mparam_.minor_version = std::get<1>(Version::Self());

-    cfg_["num_class"] = common::ToString(mparam_.num_class);
    cfg_["num_feature"] = common::ToString(mparam_.num_feature);

    auto n = tparam_.__DICT__();
@ -1071,6 +1104,8 @@ class LearnerIO : public LearnerConfiguration {
  // JSON serialization format.
  void SaveModel(dmlc::Stream* fo) const override {
    this->CheckModelInitialized();
+    CHECK(!this->learner_model_param_.IsVectorLeaf())
+        << "Please use JSON/UBJ format for model serialization with multi-output models.";

    LearnerModelParamLegacy mparam = mparam_;  // make a copy to potentially modify
    std::vector<std::pair<std::string, std::string> > extra_attr;
--- a/src/metric/rank_metric.cc
+++ b/src/metric/rank_metric.cc
@ -234,7 +234,7 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig {

 protected:
  explicit EvalRank(const char* name, const char* param) {
-    this->name = ltr::MakeMetricName(name, param, &topn, &minus);
+    this->name = ltr::ParseMetricName(name, param, &topn, &minus);
  }

  virtual double EvalGroup(PredIndPairContainer *recptr) const = 0;
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@ -1,5 +1,5 @@
-/*!
- * Copyright 2017-2021 by Contributors
+/**
+ * Copyright 2017-2023 by XGBoost Contributors
 */
 #include <amd_warp_primitives.h>
 #include <GPUTreeShap/gpu_treeshap.h>
@ -32,9 +32,7 @@
 #include "xgboost/tree_model.h"
 #include "xgboost/tree_updater.h"

-namespace xgboost {
-namespace predictor {
-
+namespace xgboost::predictor {
 DMLC_REGISTRY_FILE_TAG(gpu_predictor);

 struct TreeView {
@ -42,12 +40,11 @@ struct TreeView {
  common::Span<RegTree::Node const> d_tree;

  XGBOOST_DEVICE
-  TreeView(size_t tree_begin, size_t tree_idx,
-           common::Span<const RegTree::Node> d_nodes,
+  TreeView(size_t tree_begin, size_t tree_idx, common::Span<const RegTree::Node> d_nodes,
           common::Span<size_t const> d_tree_segments,
           common::Span<FeatureType const> d_tree_split_types,
           common::Span<uint32_t const> d_cat_tree_segments,
-           common::Span<RegTree::Segment const> d_cat_node_segments,
+           common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
           common::Span<uint32_t const> d_categories) {
    auto begin = d_tree_segments[tree_idx - tree_begin];
    auto n_nodes = d_tree_segments[tree_idx - tree_begin + 1] -
@ -262,7 +259,7 @@ PredictLeafKernel(Data data, common::Span<const RegTree::Node> d_nodes,

                  common::Span<FeatureType const> d_tree_split_types,
                  common::Span<uint32_t const> d_cat_tree_segments,
-                  common::Span<RegTree::Segment const> d_cat_node_segments,
+                  common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
                  common::Span<uint32_t const> d_categories,

                  size_t tree_begin, size_t tree_end, size_t num_features,
@ -297,7 +294,7 @@ PredictKernel(Data data, common::Span<const RegTree::Node> d_nodes,
              common::Span<int const> d_tree_group,
              common::Span<FeatureType const> d_tree_split_types,
              common::Span<uint32_t const> d_cat_tree_segments,
-              common::Span<RegTree::Segment const> d_cat_node_segments,
+              common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
              common::Span<uint32_t const> d_categories, size_t tree_begin,
              size_t tree_end, size_t num_features, size_t num_rows,
              size_t entry_start, bool use_shared, int num_group, float missing) {
@ -341,7 +338,7 @@ class DeviceModel {
  // Pointer to each tree, segmenting the node array.
  HostDeviceVector<uint32_t> categories_tree_segments;
  // Pointer to each node, segmenting categories array.
-  HostDeviceVector<RegTree::Segment> categories_node_segments;
+  HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment> categories_node_segments;
  HostDeviceVector<uint32_t> categories;

  size_t tree_beg_;  // NOLINT
@ -421,9 +418,9 @@ class DeviceModel {
      h_split_cat_segments.push_back(h_categories.size());
    }

-    categories_node_segments =
-        HostDeviceVector<RegTree::Segment>(h_tree_segments.back(), {}, gpu_id);
-    std::vector<RegTree::Segment> &h_categories_node_segments =
+    categories_node_segments = HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>(
+        h_tree_segments.back(), {}, gpu_id);
+    std::vector<RegTree::CategoricalSplitMatrix::Segment>& h_categories_node_segments =
        categories_node_segments.HostVector();
    for (auto tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) {
      auto const &src_cats_ptr = model.trees.at(tree_idx)->GetSplitCategoriesPtr();
@ -583,10 +580,10 @@ void ExtractPaths(
  if (thrust::any_of(dh::tbegin(d_split_types), dh::tend(d_split_types),
                     common::IsCatOp{})) {
    dh::PinnedMemory pinned;
-    auto h_max_cat = pinned.GetSpan<RegTree::Segment>(1);
+    auto h_max_cat = pinned.GetSpan<RegTree::CategoricalSplitMatrix::Segment>(1);
    auto max_elem_it = dh::MakeTransformIterator<size_t>(
        dh::tbegin(d_cat_node_segments),
-        [] __device__(RegTree::Segment seg) { return seg.size; });
+        [] __device__(RegTree::CategoricalSplitMatrix::Segment seg) { return seg.size; });
    size_t max_cat_it =
        thrust::max_element(thrust::device, max_elem_it,
                            max_elem_it + d_cat_node_segments.size()) -
@ -1095,5 +1092,4 @@ XGBOOST_REGISTER_PREDICTOR(GPUPredictor, "gpu_predictor")
    .describe("Make predictions using GPU.")
    .set_body([](Context const* ctx) { return new GPUPredictor(ctx); });

-}  // namespace predictor
-}  // namespace xgboost
+}  // namespace xgboost::predictor
--- a/src/predictor/predictor.cc
+++ b/src/predictor/predictor.cc
@ -3,18 +3,19 @@
 */
 #include "xgboost/predictor.h"

-#include <dmlc/registry.h>
+#include <dmlc/registry.h>               // for DMLC_REGISTRY_LINK_TAG

-#include <string>                        // std::string
+#include <cstdint>                       // for int32_t
+#include <string>                        // for string, to_string

-#include "../gbm/gbtree.h"               // GBTreeModel
-#include "xgboost/base.h"                // bst_row_t,bst_group_t
-#include "xgboost/context.h"             // Context
-#include "xgboost/data.h"                // MetaInfo
-#include "xgboost/host_device_vector.h"  // HostDeviceVector
-#include "xgboost/learner.h"             // LearnerModelParam
-#include "xgboost/linalg.h"              // Tensor
-#include "xgboost/logging.h"
+#include "../gbm/gbtree_model.h"         // for GBTreeModel
+#include "xgboost/base.h"                // for bst_float, Args, bst_group_t, bst_row_t
+#include "xgboost/context.h"             // for Context
+#include "xgboost/data.h"                // for MetaInfo
+#include "xgboost/host_device_vector.h"  // for HostDeviceVector
+#include "xgboost/learner.h"             // for LearnerModelParam
+#include "xgboost/linalg.h"              // for Tensor, TensorView
+#include "xgboost/logging.h"             // for CHECK_EQ, CHECK_NE, LOG

 namespace dmlc {
 DMLC_REGISTRY_ENABLE(::xgboost::PredictorReg);
@ -45,15 +46,16 @@ void ValidateBaseMarginShape(linalg::Tensor<float, D> const& margin, bst_row_t n
 void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_float>* out_preds,
                                   const gbm::GBTreeModel& model) const {
  CHECK_NE(model.learner_model_param->num_output_group, 0);
-  size_t n_classes = model.learner_model_param->num_output_group;
-  size_t n = n_classes * info.num_row_;
+  std::size_t n{model.learner_model_param->OutputLength() * info.num_row_};
+
  const HostDeviceVector<bst_float>* base_margin = info.base_margin_.Data();
  if (ctx_->gpu_id >= 0) {
    out_preds->SetDevice(ctx_->gpu_id);
  }
  if (!base_margin->Empty()) {
    out_preds->Resize(n);
-    ValidateBaseMarginShape(info.base_margin_, info.num_row_, n_classes);
+    ValidateBaseMarginShape(info.base_margin_, info.num_row_,
+                            model.learner_model_param->OutputLength());
    out_preds->Copy(*base_margin);
  } else {
    // cannot rely on the Resize to fill as it might skip if the size is already correct.
@ -64,12 +66,10 @@ void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_fl
 }
 }  // namespace xgboost

-namespace xgboost {
-namespace predictor {
+namespace xgboost::predictor {
 // List of files that will be force linked in static links.
 #if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
 DMLC_REGISTRY_LINK_TAG(gpu_predictor);
 #endif  // XGBOOST_USE_CUDA || defined(XGBOOST_USE_HIP)
 DMLC_REGISTRY_LINK_TAG(cpu_predictor);
-}  // namespace predictor
-}  // namespace xgboost
+}  // namespace xgboost::predictor
--- a/src/tree/fit_stump.cc
+++ b/src/tree/fit_stump.cc
@ -71,10 +71,7 @@ void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair,
  auto n_samples = gpair.Size() / n_targets;

  gpair.SetDevice(ctx->gpu_id);
-  linalg::TensorView<GradientPair const, 2> gpair_t{
-      ctx->IsCPU() ? gpair.ConstHostSpan() : gpair.ConstDeviceSpan(),
-      {n_samples, n_targets},
-      ctx->gpu_id};
+  auto gpair_t = linalg::MakeTensorView(ctx, &gpair, n_samples, n_targets);
  ctx->IsCPU() ? cpu_impl::FitStump(ctx, gpair_t, out->HostView())
               : cuda_impl::FitStump(ctx, gpair_t, out->View(ctx->gpu_id));
 }
--- a/src/tree/hist/histogram.h
+++ b/src/tree/hist/histogram.h
@ -12,7 +12,7 @@
 #include "../../common/hist_util.h"
 #include "../../data/gradient_index.h"
 #include "expand_entry.h"
-#include "xgboost/tree_model.h"
+#include "xgboost/tree_model.h"  // for RegTree

 namespace xgboost {
 namespace tree {
@ -175,8 +175,8 @@ class HistogramBuilder {
      auto this_local = hist_local_worker_[entry.nid];
      common::CopyHist(this_local, this_hist, r.begin(), r.end());

-      if (!(*p_tree)[entry.nid].IsRoot()) {
-        const size_t parent_id = (*p_tree)[entry.nid].Parent();
+      if (!p_tree->IsRoot(entry.nid)) {
+        const size_t parent_id = p_tree->Parent(entry.nid);
        const int subtraction_node_id = nodes_for_subtraction_trick[node].nid;
        auto parent_hist = this->hist_local_worker_[parent_id];
        auto sibling_hist = this->hist_[subtraction_node_id];
@ -213,8 +213,8 @@ class HistogramBuilder {
      // Merging histograms from each thread into once
      this->buffer_.ReduceHist(node, r.begin(), r.end());

-      if (!(*p_tree)[entry.nid].IsRoot()) {
-        auto const parent_id = (*p_tree)[entry.nid].Parent();
+      if (!p_tree->IsRoot(entry.nid)) {
+        auto const parent_id = p_tree->Parent(entry.nid);
        auto const subtraction_node_id = nodes_for_subtraction_trick[node].nid;
        auto parent_hist = this->hist_[parent_id];
        auto sibling_hist = this->hist_[subtraction_node_id];
@ -237,10 +237,10 @@ class HistogramBuilder {
    common::ParallelFor2d(
        space, this->n_threads_, [&](size_t node, common::Range1d r) {
          const auto &entry = nodes[node];
-          if (!((*p_tree)[entry.nid].IsLeftChild())) {
+          if (!(p_tree->IsLeftChild(entry.nid))) {
            auto this_hist = this->hist_[entry.nid];

-            if (!(*p_tree)[entry.nid].IsRoot()) {
+            if (!p_tree->IsRoot(entry.nid)) {
              const int subtraction_node_id = subtraction_nodes[node].nid;
              auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()];
              auto sibling_hist = hist_[subtraction_node_id];
@ -285,7 +285,7 @@ class HistogramBuilder {
    std::sort(merged_node_ids.begin(), merged_node_ids.end());
    int n_left = 0;
    for (auto const &nid : merged_node_ids) {
-      if ((*p_tree)[nid].IsLeftChild()) {
+      if (p_tree->IsLeftChild(nid)) {
        this->hist_.AddHistRow(nid);
        (*starting_index) = std::min(nid, (*starting_index));
        n_left++;
@ -293,7 +293,7 @@ class HistogramBuilder {
      }
    }
    for (auto const &nid : merged_node_ids) {
-      if (!((*p_tree)[nid].IsLeftChild())) {
+      if (!(p_tree->IsLeftChild(nid))) {
        this->hist_.AddHistRow(nid);
        this->hist_local_worker_.AddHistRow(nid);
      }
--- a/src/tree/io_utils.h
+++ b/src/tree/io_utils.h
@ -0,0 +1,65 @@
+/**
+ * Copyright 2023 by XGBoost Contributors
+ */
+#ifndef XGBOOST_TREE_IO_UTILS_H_
+#define XGBOOST_TREE_IO_UTILS_H_
+#include <string>          // for string
+#include <type_traits>     // for enable_if_t, is_same, conditional_t
+#include <vector>          // for vector
+
+#include "xgboost/json.h"  // for Json
+
+namespace xgboost {
+template <bool typed>
+using FloatArrayT = std::conditional_t<typed, F32Array const, Array const>;
+template <bool typed>
+using U8ArrayT = std::conditional_t<typed, U8Array const, Array const>;
+template <bool typed>
+using I32ArrayT = std::conditional_t<typed, I32Array const, Array const>;
+template <bool typed>
+using I64ArrayT = std::conditional_t<typed, I64Array const, Array const>;
+template <bool typed, bool feature_is_64>
+using IndexArrayT = std::conditional_t<feature_is_64, I64ArrayT<typed>, I32ArrayT<typed>>;
+
+// typed array, not boolean
+template <typename JT, typename T>
+std::enable_if_t<!std::is_same<T, Json>::value && !std::is_same<JT, Boolean>::value, T> GetElem(
+    std::vector<T> const& arr, size_t i) {
+  return arr[i];
+}
+// typed array boolean
+template <typename JT, typename T>
+std::enable_if_t<!std::is_same<T, Json>::value && std::is_same<T, uint8_t>::value &&
+                     std::is_same<JT, Boolean>::value,
+                 bool>
+GetElem(std::vector<T> const& arr, size_t i) {
+  return arr[i] == 1;
+}
+// json array
+template <typename JT, typename T>
+std::enable_if_t<
+    std::is_same<T, Json>::value,
+    std::conditional_t<std::is_same<JT, Integer>::value, int64_t,
+                       std::conditional_t<std::is_same<Boolean, JT>::value, bool, float>>>
+GetElem(std::vector<T> const& arr, size_t i) {
+  if (std::is_same<JT, Boolean>::value && !IsA<Boolean>(arr[i])) {
+    return get<Integer const>(arr[i]) == 1;
+  }
+  return get<JT const>(arr[i]);
+}
+
+namespace tree_field {
+inline std::string const kLossChg{"loss_changes"};
+inline std::string const kSumHess{"sum_hessian"};
+inline std::string const kBaseWeight{"base_weights"};
+
+inline std::string const kSplitIdx{"split_indices"};
+inline std::string const kSplitCond{"split_conditions"};
+inline std::string const kDftLeft{"default_left"};
+
+inline std::string const kParent{"parents"};
+inline std::string const kLeft{"left_children"};
+inline std::string const kRight{"right_children"};
+}  // namespace tree_field
+}  // namespace xgboost
+#endif  // XGBOOST_TREE_IO_UTILS_H_
--- a/src/tree/multi_target_tree_model.cc
+++ b/src/tree/multi_target_tree_model.cc
@ -0,0 +1,220 @@
+/**
+ * Copyright 2023 by XGBoost Contributors
+ */
+#include "xgboost/multi_target_tree_model.h"
+
+#include <algorithm>             // for copy_n
+#include <cstddef>               // for size_t
+#include <cstdint>               // for int32_t, uint8_t
+#include <limits>                // for numeric_limits
+#include <string_view>           // for string_view
+#include <utility>               // for move
+#include <vector>                // for vector
+
+#include "io_utils.h"            // for I32ArrayT, FloatArrayT, GetElem, ...
+#include "xgboost/base.h"        // for bst_node_t, bst_feature_t, bst_target_t
+#include "xgboost/json.h"        // for Json, get, Object, Number, Integer, ...
+#include "xgboost/logging.h"
+#include "xgboost/tree_model.h"  // for TreeParam
+
+namespace xgboost {
+MultiTargetTree::MultiTargetTree(TreeParam const* param)
+    : param_{param},
+      left_(1ul, InvalidNodeId()),
+      right_(1ul, InvalidNodeId()),
+      parent_(1ul, InvalidNodeId()),
+      split_index_(1ul, 0),
+      default_left_(1ul, 0),
+      split_conds_(1ul, std::numeric_limits<float>::quiet_NaN()),
+      weights_(param->size_leaf_vector, std::numeric_limits<float>::quiet_NaN()) {
+  CHECK_GT(param_->size_leaf_vector, 1);
+}
+
+template <bool typed, bool feature_is_64>
+void LoadModelImpl(Json const& in, std::vector<float>* p_weights, std::vector<bst_node_t>* p_lefts,
+                   std::vector<bst_node_t>* p_rights, std::vector<bst_node_t>* p_parents,
+                   std::vector<float>* p_conds, std::vector<bst_feature_t>* p_fidx,
+                   std::vector<std::uint8_t>* p_dft_left) {
+  namespace tf = tree_field;
+
+  auto get_float = [&](std::string_view name, std::vector<float>* p_out) {
+    auto& values = get<FloatArrayT<typed>>(get<Object const>(in).find(name)->second);
+    auto& out = *p_out;
+    out.resize(values.size());
+    for (std::size_t i = 0; i < values.size(); ++i) {
+      out[i] = GetElem<Number>(values, i);
+    }
+  };
+  get_float(tf::kBaseWeight, p_weights);
+  get_float(tf::kSplitCond, p_conds);
+
+  auto get_nidx = [&](std::string_view name, std::vector<bst_node_t>* p_nidx) {
+    auto& nidx = get<I32ArrayT<typed>>(get<Object const>(in).find(name)->second);
+    auto& out_nidx = *p_nidx;
+    out_nidx.resize(nidx.size());
+    for (std::size_t i = 0; i < nidx.size(); ++i) {
+      out_nidx[i] = GetElem<Integer>(nidx, i);
+    }
+  };
+  get_nidx(tf::kLeft, p_lefts);
+  get_nidx(tf::kRight, p_rights);
+  get_nidx(tf::kParent, p_parents);
+
+  auto const& splits = get<IndexArrayT<typed, feature_is_64> const>(in[tf::kSplitIdx]);
+  p_fidx->resize(splits.size());
+  auto& out_fidx = *p_fidx;
+  for (std::size_t i = 0; i < splits.size(); ++i) {
+    out_fidx[i] = GetElem<Integer>(splits, i);
+  }
+
+  auto const& dft_left = get<U8ArrayT<typed> const>(in[tf::kDftLeft]);
+  auto& out_dft_l = *p_dft_left;
+  out_dft_l.resize(dft_left.size());
+  for (std::size_t i = 0; i < dft_left.size(); ++i) {
+    out_dft_l[i] = GetElem<Boolean>(dft_left, i);
+  }
+}
+
+void MultiTargetTree::LoadModel(Json const& in) {
+  namespace tf = tree_field;
+  bool typed = IsA<F32Array>(in[tf::kBaseWeight]);
+  bool feature_is_64 = IsA<I64Array>(in[tf::kSplitIdx]);
+
+  if (typed && feature_is_64) {
+    LoadModelImpl<true, true>(in, &weights_, &left_, &right_, &parent_, &split_conds_,
+                              &split_index_, &default_left_);
+  } else if (typed && !feature_is_64) {
+    LoadModelImpl<true, false>(in, &weights_, &left_, &right_, &parent_, &split_conds_,
+                               &split_index_, &default_left_);
+  } else if (!typed && feature_is_64) {
+    LoadModelImpl<false, true>(in, &weights_, &left_, &right_, &parent_, &split_conds_,
+                               &split_index_, &default_left_);
+  } else {
+    LoadModelImpl<false, false>(in, &weights_, &left_, &right_, &parent_, &split_conds_,
+                                &split_index_, &default_left_);
+  }
+}
+
+void MultiTargetTree::SaveModel(Json* p_out) const {
+  CHECK(p_out);
+  auto& out = *p_out;
+
+  auto n_nodes = param_->num_nodes;
+
+  // nodes
+  I32Array lefts(n_nodes);
+  I32Array rights(n_nodes);
+  I32Array parents(n_nodes);
+  F32Array conds(n_nodes);
+  U8Array default_left(n_nodes);
+  F32Array weights(n_nodes * this->NumTarget());
+
+  auto save_tree = [&](auto* p_indices_array) {
+    auto& indices_array = *p_indices_array;
+    for (bst_node_t nidx = 0; nidx < n_nodes; ++nidx) {
+      CHECK_LT(nidx, left_.size());
+      lefts.Set(nidx, left_[nidx]);
+      CHECK_LT(nidx, right_.size());
+      rights.Set(nidx, right_[nidx]);
+      CHECK_LT(nidx, parent_.size());
+      parents.Set(nidx, parent_[nidx]);
+      CHECK_LT(nidx, split_index_.size());
+      indices_array.Set(nidx, split_index_[nidx]);
+      conds.Set(nidx, split_conds_[nidx]);
+      default_left.Set(nidx, default_left_[nidx]);
+
+      auto in_weight = this->NodeWeight(nidx);
+      auto weight_out = common::Span<float>(weights.GetArray())
+                            .subspan(nidx * this->NumTarget(), this->NumTarget());
+      CHECK_EQ(in_weight.Size(), weight_out.size());
+      std::copy_n(in_weight.Values().data(), in_weight.Size(), weight_out.data());
+    }
+  };
+
+  namespace tf = tree_field;
+
+  if (this->param_->num_feature >
+      static_cast<bst_feature_t>(std::numeric_limits<std::int32_t>::max())) {
+    I64Array indices_64(n_nodes);
+    save_tree(&indices_64);
+    out[tf::kSplitIdx] = std::move(indices_64);
+  } else {
+    I32Array indices_32(n_nodes);
+    save_tree(&indices_32);
+    out[tf::kSplitIdx] = std::move(indices_32);
+  }
+
+  out[tf::kBaseWeight] = std::move(weights);
+  out[tf::kLeft] = std::move(lefts);
+  out[tf::kRight] = std::move(rights);
+  out[tf::kParent] = std::move(parents);
+
+  out[tf::kSplitCond] = std::move(conds);
+  out[tf::kDftLeft] = std::move(default_left);
+}
+
+void MultiTargetTree::SetLeaf(bst_node_t nidx, linalg::VectorView<float const> weight) {
+  CHECK(this->IsLeaf(nidx)) << "Collapsing a split node to leaf " << MTNotImplemented();
+  auto const next_nidx = nidx + 1;
+  CHECK_EQ(weight.Size(), this->NumTarget());
+  CHECK_GE(weights_.size(), next_nidx * weight.Size());
+  auto out_weight = common::Span<float>(weights_).subspan(nidx * weight.Size(), weight.Size());
+  for (std::size_t i = 0; i < weight.Size(); ++i) {
+    out_weight[i] = weight(i);
+  }
+}
+
+void MultiTargetTree::Expand(bst_node_t nidx, bst_feature_t split_idx, float split_cond,
+                             bool default_left, linalg::VectorView<float const> base_weight,
+                             linalg::VectorView<float const> left_weight,
+                             linalg::VectorView<float const> right_weight) {
+  CHECK(this->IsLeaf(nidx));
+  CHECK_GE(parent_.size(), 1);
+  CHECK_EQ(parent_.size(), left_.size());
+  CHECK_EQ(left_.size(), right_.size());
+
+  std::size_t n = param_->num_nodes + 2;
+  CHECK_LT(split_idx, this->param_->num_feature);
+  left_.resize(n, InvalidNodeId());
+  right_.resize(n, InvalidNodeId());
+  parent_.resize(n, InvalidNodeId());
+
+  auto left_child = parent_.size() - 2;
+  auto right_child = parent_.size() - 1;
+
+  left_[nidx] = left_child;
+  right_[nidx] = right_child;
+
+  if (nidx != 0) {
+    CHECK_NE(parent_[nidx], InvalidNodeId());
+  }
+
+  parent_[left_child] = nidx;
+  parent_[right_child] = nidx;
+
+  split_index_.resize(n);
+  split_index_[nidx] = split_idx;
+
+  split_conds_.resize(n);
+  split_conds_[nidx] = split_cond;
+  default_left_.resize(n);
+  default_left_[nidx] = static_cast<std::uint8_t>(default_left);
+
+  weights_.resize(n * this->NumTarget());
+  auto p_weight = this->NodeWeight(nidx);
+  CHECK_EQ(p_weight.Size(), base_weight.Size());
+  auto l_weight = this->NodeWeight(left_child);
+  CHECK_EQ(l_weight.Size(), left_weight.Size());
+  auto r_weight = this->NodeWeight(right_child);
+  CHECK_EQ(r_weight.Size(), right_weight.Size());
+
+  for (std::size_t i = 0; i < base_weight.Size(); ++i) {
+    p_weight(i) = base_weight(i);
+    l_weight(i) = left_weight(i);
+    r_weight(i) = right_weight(i);
+  }
+}
+
+bst_target_t MultiTargetTree::NumTarget() const { return param_->size_leaf_vector; }
+std::size_t MultiTargetTree::Size() const { return parent_.size(); }
+}  // namespace xgboost
--- a/src/tree/tree_model.cc
+++ b/src/tree/tree_model.cc
@ -1,25 +1,27 @@
-/*!
- * Copyright 2015-2022 by Contributors
+/**
+ * Copyright 2015-2023 by Contributors
 * \file tree_model.cc
 * \brief model structure for tree
 */
-#include <dmlc/registry.h>
 #include <dmlc/json.h>
-
-#include <xgboost/tree_model.h>
-#include <xgboost/logging.h>
+#include <dmlc/registry.h>
 #include <xgboost/json.h>
+#include <xgboost/tree_model.h>

-#include <sstream>
-#include <limits>
 #include <cmath>
 #include <iomanip>
-#include <stack>
+#include <limits>
+#include <sstream>
+#include <type_traits>

-#include "param.h"
-#include "../common/common.h"
 #include "../common/categorical.h"
+#include "../common/common.h"
 #include "../predictor/predict_fn.h"
+#include "io_utils.h"  // GetElem
+#include "param.h"
+#include "xgboost/base.h"
+#include "xgboost/data.h"
+#include "xgboost/logging.h"

 namespace xgboost {
 // register tree parameter
@ -729,12 +731,9 @@ XGBOOST_REGISTER_TREE_IO(GraphvizGenerator, "dot")

 constexpr bst_node_t RegTree::kRoot;

-std::string RegTree::DumpModel(const FeatureMap& fmap,
-                               bool with_stats,
-                               std::string format) const {
-  std::unique_ptr<TreeGenerator> builder {
-    TreeGenerator::Create(format, fmap, with_stats)
-  };
+std::string RegTree::DumpModel(const FeatureMap& fmap, bool with_stats, std::string format) const {
+  CHECK(!IsMultiTarget());
+  std::unique_ptr<TreeGenerator> builder{TreeGenerator::Create(format, fmap, with_stats)};
  builder->BuildTree(*this);

  std::string result = builder->Str();
@ -742,6 +741,7 @@ std::string RegTree::DumpModel(const FeatureMap& fmap,
 }

 bool RegTree::Equal(const RegTree& b) const {
+  CHECK(!IsMultiTarget());
  if (NumExtraNodes() != b.NumExtraNodes()) {
    return false;
  }
@ -758,6 +758,7 @@ bool RegTree::Equal(const RegTree& b) const {
 }

 bst_node_t RegTree::GetNumLeaves() const {
+  CHECK(!IsMultiTarget());
  bst_node_t leaves { 0 };
  auto const& self = *this;
  this->WalkTree([&leaves, &self](bst_node_t nidx) {
@ -770,6 +771,7 @@ bst_node_t RegTree::GetNumLeaves() const {
 }

 bst_node_t RegTree::GetNumSplitNodes() const {
+  CHECK(!IsMultiTarget());
  bst_node_t splits { 0 };
  auto const& self = *this;
  this->WalkTree([&splits, &self](bst_node_t nidx) {
@ -787,6 +789,7 @@ void RegTree::ExpandNode(bst_node_t nid, unsigned split_index, bst_float split_v
                         bst_float right_leaf_weight, bst_float loss_change,
                         float sum_hess, float left_sum, float right_sum,
                         bst_node_t leaf_right_child) {
+  CHECK(!IsMultiTarget());
  int pleft = this->AllocNode();
  int pright = this->AllocNode();
  auto &node = nodes_[nid];
@ -807,11 +810,31 @@ void RegTree::ExpandNode(bst_node_t nid, unsigned split_index, bst_float split_v
  this->split_types_.at(nid) = FeatureType::kNumerical;
 }

+void RegTree::ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond,
+                         bool default_left, linalg::VectorView<float const> base_weight,
+                         linalg::VectorView<float const> left_weight,
+                         linalg::VectorView<float const> right_weight) {
+  CHECK(IsMultiTarget());
+  CHECK_LT(split_index, this->param.num_feature);
+  CHECK(this->p_mt_tree_);
+  CHECK_GT(param.size_leaf_vector, 1);
+
+  this->p_mt_tree_->Expand(nidx, split_index, split_cond, default_left, base_weight, left_weight,
+                           right_weight);
+
+  split_types_.resize(this->Size(), FeatureType::kNumerical);
+  split_categories_segments_.resize(this->Size());
+  this->split_types_.at(nidx) = FeatureType::kNumerical;
+
+  this->param.num_nodes = this->p_mt_tree_->Size();
+}
+
 void RegTree::ExpandCategorical(bst_node_t nid, bst_feature_t split_index,
                                common::Span<const uint32_t> split_cat, bool default_left,
                                bst_float base_weight, bst_float left_leaf_weight,
                                bst_float right_leaf_weight, bst_float loss_change, float sum_hess,
                                float left_sum, float right_sum) {
+  CHECK(!IsMultiTarget());
  this->ExpandNode(nid, split_index, std::numeric_limits<float>::quiet_NaN(),
                   default_left, base_weight,
                   left_leaf_weight, right_leaf_weight, loss_change, sum_hess,
@ -893,44 +916,17 @@ void RegTree::Save(dmlc::Stream* fo) const {
    }
  }
 }
-// typed array, not boolean
-template <typename JT, typename T>
-std::enable_if_t<!std::is_same<T, Json>::value && !std::is_same<JT, Boolean>::value, T> GetElem(
-    std::vector<T> const& arr, size_t i) {
-  return arr[i];
-}
-// typed array boolean
-template <typename JT, typename T>
-std::enable_if_t<!std::is_same<T, Json>::value && std::is_same<T, uint8_t>::value &&
-                     std::is_same<JT, Boolean>::value,
-                 bool>
-GetElem(std::vector<T> const& arr, size_t i) {
-  return arr[i] == 1;
-}
-// json array
-template <typename JT, typename T>
-std::enable_if_t<
-    std::is_same<T, Json>::value,
-    std::conditional_t<std::is_same<JT, Integer>::value, int64_t,
-                       std::conditional_t<std::is_same<Boolean, JT>::value, bool, float>>>
-GetElem(std::vector<T> const& arr, size_t i) {
-  if (std::is_same<JT, Boolean>::value && !IsA<Boolean>(arr[i])) {
-    return get<Integer const>(arr[i]) == 1;
-  }
-  return get<JT const>(arr[i]);
-}

 template <bool typed>
 void RegTree::LoadCategoricalSplit(Json const& in) {
-  using I64ArrayT = std::conditional_t<typed, I64Array const, Array const>;
-  using I32ArrayT = std::conditional_t<typed, I32Array const, Array const>;
+  auto const& categories_segments = get<I64ArrayT<typed>>(in["categories_segments"]);
+  auto const& categories_sizes = get<I64ArrayT<typed>>(in["categories_sizes"]);
+  auto const& categories_nodes = get<I32ArrayT<typed>>(in["categories_nodes"]);
+  auto const& categories = get<I32ArrayT<typed>>(in["categories"]);

-  auto const& categories_segments = get<I64ArrayT>(in["categories_segments"]);
-  auto const& categories_sizes = get<I64ArrayT>(in["categories_sizes"]);
-  auto const& categories_nodes = get<I32ArrayT>(in["categories_nodes"]);
-  auto const& categories = get<I32ArrayT>(in["categories"]);
-
-  size_t cnt = 0;
+  auto split_type = get<U8ArrayT<typed>>(in["split_type"]);
+  bst_node_t n_nodes = split_type.size();
+  std::size_t cnt = 0;
  bst_node_t last_cat_node = -1;
  if (!categories_nodes.empty()) {
    last_cat_node = GetElem<Integer>(categories_nodes, cnt);
@ -938,7 +934,10 @@ void RegTree::LoadCategoricalSplit(Json const& in) {
  // `categories_segments' is only available for categorical nodes to prevent overhead for
  // numerical node. As a result, we need to track the categorical nodes we have processed
  // so far.
-  for (bst_node_t nidx = 0; nidx < param.num_nodes; ++nidx) {
+  split_types_.resize(n_nodes, FeatureType::kNumerical);
+  split_categories_segments_.resize(n_nodes);
+  for (bst_node_t nidx = 0; nidx < n_nodes; ++nidx) {
+    split_types_[nidx] = static_cast<FeatureType>(GetElem<Integer>(split_type, nidx));
    if (nidx == last_cat_node) {
      auto j_begin = GetElem<Integer>(categories_segments, cnt);
      auto j_end = GetElem<Integer>(categories_sizes, cnt) + j_begin;
@ -985,15 +984,17 @@ template void RegTree::LoadCategoricalSplit<false>(Json const& in);

 void RegTree::SaveCategoricalSplit(Json* p_out) const {
  auto& out = *p_out;
-  CHECK_EQ(this->split_types_.size(), param.num_nodes);
-  CHECK_EQ(this->GetSplitCategoriesPtr().size(), param.num_nodes);
+  CHECK_EQ(this->split_types_.size(), this->Size());
+  CHECK_EQ(this->GetSplitCategoriesPtr().size(), this->Size());

  I64Array categories_segments;
  I64Array categories_sizes;
  I32Array categories;        // bst_cat_t = int32_t
  I32Array categories_nodes;  // bst_note_t = int32_t
+  U8Array split_type(split_types_.size());

  for (size_t i = 0; i < nodes_.size(); ++i) {
+    split_type.Set(i, static_cast<std::underlying_type_t<FeatureType>>(this->NodeSplitType(i)));
    if (this->split_types_[i] == FeatureType::kCategorical) {
      categories_nodes.GetArray().emplace_back(i);
      auto begin = categories.Size();
@ -1012,66 +1013,49 @@ void RegTree::SaveCategoricalSplit(Json* p_out) const {
    }
  }

+  out["split_type"] = std::move(split_type);
  out["categories_segments"] = std::move(categories_segments);
  out["categories_sizes"] = std::move(categories_sizes);
  out["categories_nodes"] = std::move(categories_nodes);
  out["categories"] = std::move(categories);
 }

-template <bool typed, bool feature_is_64,
-          typename FloatArrayT = std::conditional_t<typed, F32Array const, Array const>,
-          typename U8ArrayT = std::conditional_t<typed, U8Array const, Array const>,
-          typename I32ArrayT = std::conditional_t<typed, I32Array const, Array const>,
-          typename I64ArrayT = std::conditional_t<typed, I64Array const, Array const>,
-          typename IndexArrayT = std::conditional_t<feature_is_64, I64ArrayT, I32ArrayT>>
-bool LoadModelImpl(Json const& in, TreeParam* param, std::vector<RTreeNodeStat>* p_stats,
-                   std::vector<FeatureType>* p_split_types, std::vector<RegTree::Node>* p_nodes,
-                   std::vector<RegTree::Segment>* p_split_categories_segments) {
+template <bool typed, bool feature_is_64>
+void LoadModelImpl(Json const& in, TreeParam const& param, std::vector<RTreeNodeStat>* p_stats,
+                   std::vector<RegTree::Node>* p_nodes) {
+  namespace tf = tree_field;
  auto& stats = *p_stats;
-  auto& split_types = *p_split_types;
  auto& nodes = *p_nodes;
-  auto& split_categories_segments = *p_split_categories_segments;

-  FromJson(in["tree_param"], param);
-  auto n_nodes = param->num_nodes;
+  auto n_nodes = param.num_nodes;
  CHECK_NE(n_nodes, 0);
  // stats
-  auto const& loss_changes = get<FloatArrayT>(in["loss_changes"]);
+  auto const& loss_changes = get<FloatArrayT<typed>>(in[tf::kLossChg]);
  CHECK_EQ(loss_changes.size(), n_nodes);
-  auto const& sum_hessian = get<FloatArrayT>(in["sum_hessian"]);
+  auto const& sum_hessian = get<FloatArrayT<typed>>(in[tf::kSumHess]);
  CHECK_EQ(sum_hessian.size(), n_nodes);
-  auto const& base_weights = get<FloatArrayT>(in["base_weights"]);
+  auto const& base_weights = get<FloatArrayT<typed>>(in[tf::kBaseWeight]);
  CHECK_EQ(base_weights.size(), n_nodes);
  // nodes
-  auto const& lefts = get<I32ArrayT>(in["left_children"]);
+  auto const& lefts = get<I32ArrayT<typed>>(in[tf::kLeft]);
  CHECK_EQ(lefts.size(), n_nodes);
-  auto const& rights = get<I32ArrayT>(in["right_children"]);
+  auto const& rights = get<I32ArrayT<typed>>(in[tf::kRight]);
  CHECK_EQ(rights.size(), n_nodes);
-  auto const& parents = get<I32ArrayT>(in["parents"]);
+  auto const& parents = get<I32ArrayT<typed>>(in[tf::kParent]);
  CHECK_EQ(parents.size(), n_nodes);
-  auto const& indices = get<IndexArrayT>(in["split_indices"]);
+  auto const& indices = get<IndexArrayT<typed, feature_is_64>>(in[tf::kSplitIdx]);
  CHECK_EQ(indices.size(), n_nodes);
-  auto const& conds = get<FloatArrayT>(in["split_conditions"]);
+  auto const& conds = get<FloatArrayT<typed>>(in[tf::kSplitCond]);
  CHECK_EQ(conds.size(), n_nodes);
-  auto const& default_left = get<U8ArrayT>(in["default_left"]);
+  auto const& default_left = get<U8ArrayT<typed>>(in[tf::kDftLeft]);
  CHECK_EQ(default_left.size(), n_nodes);

-  bool has_cat = get<Object const>(in).find("split_type") != get<Object const>(in).cend();
-  std::remove_const_t<std::remove_reference_t<decltype(get<U8ArrayT const>(in["split_type"]))>>
-      split_type;
-  if (has_cat) {
-    split_type = get<U8ArrayT const>(in["split_type"]);
-  }
-
  // Initialization
  stats = std::remove_reference_t<decltype(stats)>(n_nodes);
  nodes = std::remove_reference_t<decltype(nodes)>(n_nodes);
-  split_types = std::remove_reference_t<decltype(split_types)>(n_nodes);
-  split_categories_segments = std::remove_reference_t<decltype(split_categories_segments)>(n_nodes);

  static_assert(std::is_integral<decltype(GetElem<Integer>(lefts, 0))>::value);
  static_assert(std::is_floating_point<decltype(GetElem<Number>(loss_changes, 0))>::value);
-  CHECK_EQ(n_nodes, split_categories_segments.size());

  // Set node
  for (int32_t i = 0; i < n_nodes; ++i) {
@ -1088,41 +1072,46 @@ bool LoadModelImpl(Json const& in, TreeParam* param, std::vector<RTreeNodeStat>*
    float cond{GetElem<Number>(conds, i)};
    bool dft_left{GetElem<Boolean>(default_left, i)};
    n = RegTree::Node{left, right, parent, ind, cond, dft_left};
-
-    if (has_cat) {
-      split_types[i] = static_cast<FeatureType>(GetElem<Integer>(split_type, i));
  }
-  }
-
-  return has_cat;
 }

 void RegTree::LoadModel(Json const& in) {
-  bool has_cat{false};
-  bool typed = IsA<F32Array>(in["loss_changes"]);
-  bool feature_is_64 = IsA<I64Array>(in["split_indices"]);
-  if (typed && feature_is_64) {
-    has_cat = LoadModelImpl<true, true>(in, &param, &stats_, &split_types_, &nodes_,
-                                        &split_categories_segments_);
-  } else if (typed && !feature_is_64) {
-    has_cat = LoadModelImpl<true, false>(in, &param, &stats_, &split_types_, &nodes_,
-                                         &split_categories_segments_);
-  } else if (!typed && feature_is_64) {
-    has_cat = LoadModelImpl<false, true>(in, &param, &stats_, &split_types_, &nodes_,
-                                         &split_categories_segments_);
-  } else {
-    has_cat = LoadModelImpl<false, false>(in, &param, &stats_, &split_types_, &nodes_,
-                                          &split_categories_segments_);
-  }
+  namespace tf = tree_field;

+  bool typed = IsA<I32Array>(in[tf::kParent]);
+  auto const& in_obj = get<Object const>(in);
+  // basic properties
+  FromJson(in["tree_param"], &param);
+  // categorical splits
+  bool has_cat = in_obj.find("split_type") != in_obj.cend();
  if (has_cat) {
    if (typed) {
      this->LoadCategoricalSplit<true>(in);
    } else {
      this->LoadCategoricalSplit<false>(in);
    }
+  }
+  // multi-target
+  if (param.size_leaf_vector > 1) {
+    this->p_mt_tree_.reset(new MultiTargetTree{&param});
+    this->GetMultiTargetTree()->LoadModel(in);
+    return;
+  }
+
+  bool feature_is_64 = IsA<I64Array>(in["split_indices"]);
+  if (typed && feature_is_64) {
+    LoadModelImpl<true, true>(in, param, &stats_, &nodes_);
+  } else if (typed && !feature_is_64) {
+    LoadModelImpl<true, false>(in, param, &stats_, &nodes_);
+  } else if (!typed && feature_is_64) {
+    LoadModelImpl<false, true>(in, param, &stats_, &nodes_);
  } else {
+    LoadModelImpl<false, false>(in, param, &stats_, &nodes_);
+  }
+
+  if (!has_cat) {
    this->split_categories_segments_.resize(this->param.num_nodes);
+    this->split_types_.resize(this->param.num_nodes);
    std::fill(split_types_.begin(), split_types_.end(), FeatureType::kNumerical);
  }

@ -1144,16 +1133,26 @@ void RegTree::LoadModel(Json const& in) {
 }

 void RegTree::SaveModel(Json* p_out) const {
+  auto& out = *p_out;
+  // basic properties
+  out["tree_param"] = ToJson(param);
+  // categorical splits
+  this->SaveCategoricalSplit(p_out);
+  // multi-target
+  if (this->IsMultiTarget()) {
+    CHECK_GT(param.size_leaf_vector, 1);
+    this->GetMultiTargetTree()->SaveModel(p_out);
+    return;
+  }
  /*  Here we are treating leaf node and internal node equally.  Some information like
   *  child node id doesn't make sense for leaf node but we will have to save them to
   *  avoid creating a huge map.  One difficulty is XGBoost has deleted node created by
   *  pruner, and this pruner can be used inside another updater so leaf are not necessary
   *  at the end of node array.
   */
-  auto& out = *p_out;
  CHECK_EQ(param.num_nodes, static_cast<int>(nodes_.size()));
  CHECK_EQ(param.num_nodes, static_cast<int>(stats_.size()));
-  out["tree_param"] = ToJson(param);
+
  CHECK_EQ(get<String>(out["tree_param"]["num_nodes"]), std::to_string(param.num_nodes));
  auto n_nodes = param.num_nodes;

@ -1167,12 +1166,12 @@ void RegTree::SaveModel(Json* p_out) const {
  I32Array rights(n_nodes);
  I32Array parents(n_nodes);

-
  F32Array conds(n_nodes);
  U8Array default_left(n_nodes);
-  U8Array split_type(n_nodes);
  CHECK_EQ(this->split_types_.size(), param.num_nodes);

+  namespace tf = tree_field;
+
  auto save_tree = [&](auto* p_indices_array) {
    auto& indices_array = *p_indices_array;
    for (bst_node_t i = 0; i < n_nodes; ++i) {
@ -1188,33 +1187,28 @@ void RegTree::SaveModel(Json* p_out) const {
      indices_array.Set(i, n.SplitIndex());
      conds.Set(i, n.SplitCond());
      default_left.Set(i, static_cast<uint8_t>(!!n.DefaultLeft()));
-
-      split_type.Set(i, static_cast<uint8_t>(this->NodeSplitType(i)));
    }
  };
  if (this->param.num_feature > static_cast<bst_feature_t>(std::numeric_limits<int32_t>::max())) {
    I64Array indices_64(n_nodes);
    save_tree(&indices_64);
-    out["split_indices"] = std::move(indices_64);
+    out[tf::kSplitIdx] = std::move(indices_64);
  } else {
    I32Array indices_32(n_nodes);
    save_tree(&indices_32);
-    out["split_indices"] = std::move(indices_32);
+    out[tf::kSplitIdx] = std::move(indices_32);
  }

-  this->SaveCategoricalSplit(&out);
+  out[tf::kLossChg] = std::move(loss_changes);
+  out[tf::kSumHess] = std::move(sum_hessian);
+  out[tf::kBaseWeight] = std::move(base_weights);

-  out["split_type"] = std::move(split_type);
-  out["loss_changes"] = std::move(loss_changes);
-  out["sum_hessian"] = std::move(sum_hessian);
-  out["base_weights"] = std::move(base_weights);
+  out[tf::kLeft] = std::move(lefts);
+  out[tf::kRight] = std::move(rights);
+  out[tf::kParent] = std::move(parents);

-  out["left_children"] = std::move(lefts);
-  out["right_children"] = std::move(rights);
-  out["parents"] = std::move(parents);
-
-  out["split_conditions"] = std::move(conds);
-  out["default_left"] = std::move(default_left);
+  out[tf::kSplitCond] = std::move(conds);
+  out[tf::kDftLeft] = std::move(default_left);
 }

 void RegTree::CalculateContributionsApprox(const RegTree::FVec &feat,
--- a/src/tree/tree_updater.cc
+++ b/src/tree/tree_updater.cc
@ -1,20 +1,20 @@
-/*!
- * Copyright 2015-2022 by XGBoost Contributors
+/**
+ * Copyright 2015-2023 by XGBoost Contributors
 * \file tree_updater.cc
 * \brief Registry of tree updaters.
 */
+#include "xgboost/tree_updater.h"
+
 #include <dmlc/registry.h>

-#include "xgboost/tree_updater.h"
-#include "xgboost/host_device_vector.h"
+#include <string>  // for string

 namespace dmlc {
 DMLC_REGISTRY_ENABLE(::xgboost::TreeUpdaterReg);
 }  // namespace dmlc

 namespace xgboost {
-
-TreeUpdater* TreeUpdater::Create(const std::string& name, Context const* ctx, ObjInfo task) {
+TreeUpdater* TreeUpdater::Create(const std::string& name, Context const* ctx, ObjInfo const* task) {
  auto* e = ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->Find(name);
  if (e == nullptr) {
    LOG(FATAL) << "Unknown tree updater " << name;
@ -22,11 +22,9 @@ TreeUpdater* TreeUpdater::Create(const std::string& name, Context const* ctx, Ob
  auto p_updater = (e->body)(ctx, task);
  return p_updater;
 }
-
 }  // namespace xgboost

-namespace xgboost {
-namespace tree {
+namespace xgboost::tree {
 // List of files that will be force linked in static links.
 DMLC_REGISTRY_LINK_TAG(updater_colmaker);
 DMLC_REGISTRY_LINK_TAG(updater_refresh);
@ -38,4 +36,3 @@ DMLC_REGISTRY_LINK_TAG(updater_sync);
 DMLC_REGISTRY_LINK_TAG(updater_gpu_hist);
 #endif  // XGBOOST_USE_CUDA, XGBOOST_USE_HIP
 }  // namespace tree
-}  // namespace xgboost
--- a/src/tree/updater_approx.cc
+++ b/src/tree/updater_approx.cc
@ -14,14 +14,15 @@
 #include "driver.h"
 #include "hist/evaluate_splits.h"
 #include "hist/histogram.h"
-#include "hist/sampler.h"  // SampleGradient
+#include "hist/sampler.h"  // for SampleGradient
 #include "param.h"
 #include "xgboost/base.h"
 #include "xgboost/data.h"
 #include "xgboost/json.h"
 #include "xgboost/linalg.h"
+#include "xgboost/task.h"          // for ObjInfo
 #include "xgboost/tree_model.h"
-#include "xgboost/tree_updater.h"
+#include "xgboost/tree_updater.h"  // for TreeUpdater

 namespace xgboost::tree {

@ -40,12 +41,12 @@ auto BatchSpec(TrainParam const &p, common::Span<float> hess) {

 class GloablApproxBuilder {
 protected:
-  TrainParam const* param_;
+  TrainParam const *param_;
  std::shared_ptr<common::ColumnSampler> col_sampler_;
  HistEvaluator<CPUExpandEntry> evaluator_;
  HistogramBuilder<CPUExpandEntry> histogram_builder_;
  Context const *ctx_;
-  ObjInfo const task_;
+  ObjInfo const *const task_;

  std::vector<CommonRowPartitioner> partitioner_;
  // Pointer to last updated tree, used for update prediction cache.
@ -63,7 +64,8 @@ class GloablApproxBuilder {
    bst_bin_t n_total_bins = 0;
    partitioner_.clear();
    // Generating the GHistIndexMatrix is quite slow, is there a way to speed it up?
-    for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess, task_))) {
+    for (auto const &page :
+         p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess, *task_))) {
      if (n_total_bins == 0) {
        n_total_bins = page.cut.TotalBins();
        feature_values_ = page.cut;
@ -157,7 +159,7 @@ class GloablApproxBuilder {
  void LeafPartition(RegTree const &tree, common::Span<float const> hess,
                     std::vector<bst_node_t> *p_out_position) {
    monitor_->Start(__func__);
-    if (!task_.UpdateTreeLeaf()) {
+    if (!task_->UpdateTreeLeaf()) {
      return;
    }
    for (auto const &part : partitioner_) {
@ -168,8 +170,8 @@ class GloablApproxBuilder {

 public:
  explicit GloablApproxBuilder(TrainParam const *param, MetaInfo const &info, Context const *ctx,
-                               std::shared_ptr<common::ColumnSampler> column_sampler, ObjInfo task,
-                               common::Monitor *monitor)
+                               std::shared_ptr<common::ColumnSampler> column_sampler,
+                               ObjInfo const *task, common::Monitor *monitor)
      : param_{param},
        col_sampler_{std::move(column_sampler)},
        evaluator_{ctx, param_, info, col_sampler_},
@ -256,10 +258,11 @@ class GlobalApproxUpdater : public TreeUpdater {
  DMatrix *cached_{nullptr};
  std::shared_ptr<common::ColumnSampler> column_sampler_ =
      std::make_shared<common::ColumnSampler>();
-  ObjInfo task_;
+  ObjInfo const *task_;

 public:
-  explicit GlobalApproxUpdater(Context const *ctx, ObjInfo task) : TreeUpdater(ctx), task_{task} {
+  explicit GlobalApproxUpdater(Context const *ctx, ObjInfo const *task)
+      : TreeUpdater(ctx), task_{task} {
    monitor_.Init(__func__);
  }

@ -317,5 +320,7 @@ XGBOOST_REGISTER_TREE_UPDATER(GlobalHistMaker, "grow_histmaker")
    .describe(
        "Tree constructor that uses approximate histogram construction "
        "for each node.")
-    .set_body([](Context const *ctx, ObjInfo task) { return new GlobalApproxUpdater(ctx, task); });
+    .set_body([](Context const *ctx, ObjInfo const *task) {
+      return new GlobalApproxUpdater(ctx, task);
+    });
 }  // namespace xgboost::tree
--- a/src/tree/updater_colmaker.cc
+++ b/src/tree/updater_colmaker.cc
@ -603,5 +603,5 @@ class ColMaker: public TreeUpdater {

 XGBOOST_REGISTER_TREE_UPDATER(ColMaker, "grow_colmaker")
    .describe("Grow tree with parallelization over columns.")
-    .set_body([](Context const *ctx, ObjInfo) { return new ColMaker(ctx); });
+    .set_body([](Context const *ctx, auto) { return new ColMaker(ctx); });
 }  // namespace xgboost::tree
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@ -16,6 +16,8 @@
 #include "../common/bitfield.h"
 #include "../common/categorical.h"

+#include "../common/cuda_context.cuh"  // CUDAContext
+
 #if defined(XGBOOST_USE_CUDA)
 #include "../common/device_helpers.cuh"
 #elif defined(XGBOOST_USE_HIP)
@ -26,7 +28,6 @@
 #include "../common/io.h"
 #include "../common/timer.h"
 #include "../data/ellpack_page.cuh"
-#include "../common/cuda_context.cuh"  // CUDAContext
 #include "constraints.cuh"
 #include "driver.h"
 #include "gpu_hist/evaluate_splits.cuh"
@ -45,11 +46,10 @@
 #include "xgboost/json.h"
 #include "xgboost/parameter.h"
 #include "xgboost/span.h"
-#include "xgboost/task.h"
+#include "xgboost/task.h"  // for ObjInfo
 #include "xgboost/tree_model.h"

-namespace xgboost {
-namespace tree {
+namespace xgboost::tree {
 #if !defined(GTEST_TEST)
 DMLC_REGISTRY_FILE_TAG(updater_gpu_hist);
 #endif  // !defined(GTEST_TEST)
@ -112,12 +112,12 @@ class DeviceHistogramStorage {
    nidx_map_.clear();
    overflow_nidx_map_.clear();
  }
-  bool HistogramExists(int nidx) const {
+  [[nodiscard]] bool HistogramExists(int nidx) const {
    return nidx_map_.find(nidx) != nidx_map_.cend() ||
           overflow_nidx_map_.find(nidx) != overflow_nidx_map_.cend();
  }
-  int Bins() const { return n_bins_; }
-  size_t HistogramSize() const { return n_bins_ * kNumItemsInGradientSum; }
+  [[nodiscard]] int Bins() const { return n_bins_; }
+  [[nodiscard]] size_t HistogramSize() const { return n_bins_ * kNumItemsInGradientSum; }
  dh::device_vector<typename GradientSumT::ValueT>& Data() { return data_; }

  void AllocateHistograms(const std::vector<int>& new_nidxs) {
@ -489,7 +489,7 @@ struct GPUHistMakerDevice {

    dh::caching_device_vector<FeatureType> d_split_types;
    dh::caching_device_vector<uint32_t> d_categories;
-    dh::caching_device_vector<RegTree::Segment> d_categories_segments;
+    dh::caching_device_vector<RegTree::CategoricalSplitMatrix::Segment> d_categories_segments;

    if (!categories.empty()) {
      dh::CopyToD(h_split_types, &d_split_types);
@ -502,11 +502,10 @@ struct GPUHistMakerDevice {
                           p_out_position);
  }

-  void FinalisePositionInPage(EllpackPageImpl const *page,
-                              const common::Span<RegTree::Node> d_nodes,
-                              common::Span<FeatureType const> d_feature_types,
-                              common::Span<uint32_t const> categories,
-                              common::Span<RegTree::Segment> categories_segments,
+  void FinalisePositionInPage(
+      EllpackPageImpl const* page, const common::Span<RegTree::Node> d_nodes,
+      common::Span<FeatureType const> d_feature_types, common::Span<uint32_t const> categories,
+      common::Span<RegTree::CategoricalSplitMatrix::Segment> categories_segments,
      HostDeviceVector<bst_node_t>* p_out_position) {
    auto d_matrix = page->GetDeviceAccessor(ctx_->gpu_id);
    auto d_gpair = this->gpair;
@ -755,8 +754,9 @@ struct GPUHistMakerDevice {
    return root_entry;
  }

-  void UpdateTree(HostDeviceVector<GradientPair>* gpair_all, DMatrix* p_fmat, ObjInfo task,
-                  RegTree* p_tree, collective::DeviceCommunicator* communicator,
+  void UpdateTree(HostDeviceVector<GradientPair>* gpair_all, DMatrix* p_fmat,
+                  ObjInfo const* task, RegTree* p_tree,
+                  collective::DeviceCommunicator* communicator,
                  HostDeviceVector<bst_node_t>* p_out_position) {
    auto& tree = *p_tree;
    // Process maximum 32 nodes at a time
@ -806,7 +806,7 @@ struct GPUHistMakerDevice {
    }

    monitor.Start("FinalisePosition");
-    this->FinalisePosition(p_tree, p_fmat, task, p_out_position);
+    this->FinalisePosition(p_tree, p_fmat, *task, p_out_position);
    monitor.Stop("FinalisePosition");
  }
 };
@ -815,7 +815,7 @@ class GPUHistMaker : public TreeUpdater {
  using GradientSumT = GradientPairPrecise;

 public:
-  explicit GPUHistMaker(Context const* ctx, ObjInfo task)
+  explicit GPUHistMaker(Context const* ctx, ObjInfo const* task)
      : TreeUpdater(ctx), task_{task} {};
  void Configure(const Args& args) override {
    // Used in test to count how many configurations are performed
@ -947,8 +947,8 @@ class GPUHistMaker : public TreeUpdater {

  std::unique_ptr<GPUHistMakerDevice<GradientSumT>> maker;  // NOLINT

-  char const* Name() const override { return "grow_gpu_hist"; }
-  bool HasNodePosition() const override { return true; }
+  [[nodiscard]] char const* Name() const override { return "grow_gpu_hist"; }
+  [[nodiscard]] bool HasNodePosition() const override { return true; }

 private:
  bool initialised_{false};
@ -957,7 +957,7 @@ class GPUHistMaker : public TreeUpdater {

  DMatrix* p_last_fmat_{nullptr};
  RegTree const* p_last_tree_{nullptr};
-  ObjInfo task_;
+  ObjInfo const* task_{nullptr};

  common::Monitor monitor_;
 };
@ -965,8 +965,8 @@ class GPUHistMaker : public TreeUpdater {
 #if !defined(GTEST_TEST)
 XGBOOST_REGISTER_TREE_UPDATER(GPUHistMaker, "grow_gpu_hist")
    .describe("Grow tree with GPU.")
-    .set_body([](Context const* ctx, ObjInfo task) { return new GPUHistMaker(ctx, task); });
+    .set_body([](Context const* ctx, ObjInfo const* task) {
+      return new GPUHistMaker(ctx, task);
+    });
 #endif  // !defined(GTEST_TEST)
-
-}  // namespace tree
-}  // namespace xgboost
+}  // namespace xgboost::tree
--- a/src/tree/updater_prune.cc
+++ b/src/tree/updater_prune.cc
@ -18,7 +18,7 @@ DMLC_REGISTRY_FILE_TAG(updater_prune);
 /*! \brief pruner that prunes a tree after growing finishes */
 class TreePruner : public TreeUpdater {
 public:
-  explicit TreePruner(Context const* ctx, ObjInfo task) : TreeUpdater(ctx) {
+  explicit TreePruner(Context const* ctx, ObjInfo const* task) : TreeUpdater(ctx) {
    syncher_.reset(TreeUpdater::Create("sync", ctx_, task));
    pruner_monitor_.Init("TreePruner");
  }
@ -90,5 +90,7 @@ class TreePruner : public TreeUpdater {

 XGBOOST_REGISTER_TREE_UPDATER(TreePruner, "prune")
    .describe("Pruner that prune the tree according to statistics.")
-    .set_body([](Context const* ctx, ObjInfo task) { return new TreePruner(ctx, task); });
+    .set_body([](Context const* ctx, ObjInfo const* task) {
+      return new TreePruner{ctx, task};
+    });
 }  // namespace xgboost::tree
--- a/src/tree/updater_quantile_hist.cc
+++ b/src/tree/updater_quantile_hist.cc
@ -35,7 +35,7 @@ void QuantileHistMaker::Update(TrainParam const *param, HostDeviceVector<Gradien
  // build tree
  const size_t n_trees = trees.size();
  if (!pimpl_) {
-    pimpl_.reset(new Builder(n_trees, param, dmat, task_, ctx_));
+    pimpl_.reset(new Builder(n_trees, param, dmat, *task_, ctx_));
  }

  size_t t_idx{0};
@ -287,6 +287,8 @@ void QuantileHistMaker::Builder::InitData(DMatrix *fmat, const RegTree &tree,

 XGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, "grow_quantile_histmaker")
    .describe("Grow tree using quantized histogram.")
-    .set_body([](Context const *ctx, ObjInfo task) { return new QuantileHistMaker(ctx, task); });
+    .set_body([](Context const *ctx, ObjInfo const *task) {
+      return new QuantileHistMaker(ctx, task);
+    });
 }  // namespace tree
 }  // namespace xgboost
--- a/src/tree/updater_quantile_hist.h
+++ b/src/tree/updater_quantile_hist.h
@ -43,7 +43,8 @@ inline BatchParam HistBatch(TrainParam const* param) {
 /*! \brief construct a tree using quantized feature values */
 class QuantileHistMaker: public TreeUpdater {
 public:
-  explicit QuantileHistMaker(Context const* ctx, ObjInfo task) : TreeUpdater(ctx), task_{task} {}
+  explicit QuantileHistMaker(Context const* ctx, ObjInfo const* task)
+      : TreeUpdater(ctx), task_{task} {}
  void Configure(const Args&) override {}

  void Update(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
@ -125,7 +126,7 @@ class QuantileHistMaker: public TreeUpdater {

 protected:
  std::unique_ptr<Builder> pimpl_;
-  ObjInfo task_;
+  ObjInfo const* task_;
 };
 }  // namespace xgboost::tree

--- a/src/tree/updater_refresh.cc
+++ b/src/tree/updater_refresh.cc
@ -142,5 +142,5 @@ class TreeRefresher : public TreeUpdater {

 XGBOOST_REGISTER_TREE_UPDATER(TreeRefresher, "refresh")
    .describe("Refresher that refreshes the weight and statistics according to data.")
-    .set_body([](Context const *ctx, ObjInfo) { return new TreeRefresher(ctx); });
+    .set_body([](Context const *ctx, auto) { return new TreeRefresher(ctx); });
 }  // namespace xgboost::tree
--- a/src/tree/updater_sync.cc
+++ b/src/tree/updater_sync.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2014-2013 by XBGoost Contributors
+ * Copyright 2014-2023 by XBGoost Contributors
 * \file updater_sync.cc
 * \brief synchronize the tree in all distributed nodes
 */
@ -53,5 +53,5 @@ class TreeSyncher : public TreeUpdater {

 XGBOOST_REGISTER_TREE_UPDATER(TreeSyncher, "sync")
    .describe("Syncher that synchronize the tree in all distributed nodes.")
-    .set_body([](Context const* ctx, ObjInfo) { return new TreeSyncher(ctx); });
+    .set_body([](Context const* ctx, auto) { return new TreeSyncher(ctx); });
 }  // namespace xgboost::tree
--- a/tests/buildkite/build-cuda-with-rmm.sh
+++ b/tests/buildkite/build-cuda-with-rmm.sh
@ -20,7 +20,7 @@ command_wrapper="tests/ci_build/ci_build.sh rmm docker --build-arg "`

 echo "--- Build libxgboost from the source"
 $command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=gpu_test -DUSE_CUDA=ON \
-  -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DBUILD_WITH_CUDA_CUB=ON ${arch_flag}
+  -DUSE_NCCL=ON -DPLUGIN_RMM=ON ${arch_flag}

 echo "-- Stash C++ test executable (testxgboost)"
 buildkite-agent artifact upload build/testxgboost
--- a/tests/cpp/common/test_ranking_utils.cc
+++ b/tests/cpp/common/test_ranking_utils.cc
@ -1,38 +1,69 @@
 /**
 * Copyright 2023 by XGBoost Contributors
 */
-#include <gtest/gtest.h>
+#include <gtest/gtest.h>                        // for Test, AssertionResult, Message, TestPartR...
+#include <gtest/gtest.h>                        // for ASSERT_NEAR, ASSERT_T...
+#include <xgboost/base.h>                       // for Args
+#include <xgboost/context.h>                    // for Context
+#include <xgboost/string_view.h>                // for StringView

-#include <cstdint>  // std::uint32_t
+#include <cstdint>                              // for uint32_t
+#include <utility>                              // for pair

-#include "../../../src/common/ranking_utils.h"
+#include "../../../src/common/ranking_utils.h"  // for LambdaRankParam, ParseMetricName, MakeMet...

-namespace xgboost {
-namespace ltr {
-TEST(RankingUtils, MakeMetricName) {
+namespace xgboost::ltr {
+TEST(RankingUtils, LambdaRankParam) {
+  // make sure no memory is shared in dmlc parameter.
+  LambdaRankParam p0;
+  p0.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "3"}});
+  ASSERT_EQ(p0.NumPair(), 3);
+
+  LambdaRankParam p1;
+  p1.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "8"}});
+
+  ASSERT_EQ(p0.NumPair(), 3);
+  ASSERT_EQ(p1.NumPair(), 8);
+
+  p0.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "17"}});
+  ASSERT_EQ(p0.NumPair(), 17);
+  ASSERT_EQ(p1.NumPair(), 8);
+}
+
+TEST(RankingUtils, ParseMetricName) {
  std::uint32_t topn{32};
  bool minus{false};
-  auto name = MakeMetricName("ndcg", "3-", &topn, &minus);
+  auto name = ParseMetricName("ndcg", "3-", &topn, &minus);
  ASSERT_EQ(name, "ndcg@3-");
  ASSERT_EQ(topn, 3);
  ASSERT_TRUE(minus);

-  name = MakeMetricName("ndcg", "6", &topn, &minus);
+  name = ParseMetricName("ndcg", "6", &topn, &minus);
  ASSERT_EQ(topn, 6);
  ASSERT_TRUE(minus);  // unchanged

  minus = false;
-  name = MakeMetricName("ndcg", "-", &topn, &minus);
+  name = ParseMetricName("ndcg", "-", &topn, &minus);
  ASSERT_EQ(topn, 6);  // unchanged
  ASSERT_TRUE(minus);

-  name = MakeMetricName("ndcg", nullptr, &topn, &minus);
+  name = ParseMetricName("ndcg", nullptr, &topn, &minus);
  ASSERT_EQ(topn, 6);  // unchanged
  ASSERT_TRUE(minus);  // unchanged

-  name = MakeMetricName("ndcg", StringView{}, &topn, &minus);
+  name = ParseMetricName("ndcg", StringView{}, &topn, &minus);
  ASSERT_EQ(topn, 6);  // unchanged
  ASSERT_TRUE(minus);  // unchanged
 }
-}  // namespace ltr
-}  // namespace xgboost
+
+TEST(RankingUtils, MakeMetricName) {
+  auto name = MakeMetricName("map", LambdaRankParam::NotSet(), true);
+  ASSERT_EQ(name, "map-");
+  name = MakeMetricName("map", LambdaRankParam::NotSet(), false);
+  ASSERT_EQ(name, "map");
+  name = MakeMetricName("map", 2, true);
+  ASSERT_EQ(name, "map@2-");
+  name = MakeMetricName("map", 2, false);
+  ASSERT_EQ(name, "map@2");
+}
+}  // namespace xgboost::ltr
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@ -24,6 +24,7 @@
 #include "../../src/data/array_interface.h"
 #include "../../src/gbm/gbtree_model.h"
 #include "filesystem.h"  // dmlc::TemporaryDirectory
+#include "xgboost/linalg.h"

 #if defined(__CUDACC__)
 #define DeclareUnifiedTest(name) GPU ## name
@ -461,7 +462,7 @@ inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint
                                int32_t device = Context::kCpuId) {
  size_t shape[1]{1};
  LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape, device},
-                           n_groups);
+                           n_groups, 1, MultiStrategy::kComposite);
  return mparam;
 }

--- a/tests/cpp/test_multi_target.cc
+++ b/tests/cpp/test_multi_target.cc
@ -2,24 +2,26 @@
 * Copyright 2023 by XGBoost Contributors
 */
 #include <gtest/gtest.h>
-#include <xgboost/base.h>     // bst_target_t
-#include <xgboost/data.h>     // DMatrix
-#include <xgboost/json.h>     // Json,Object,Number,get
-#include <xgboost/learner.h>  // Learner
+#include <xgboost/base.h>                         // for Args, bst_target_t
+#include <xgboost/data.h>                         // for DMatrix, MetaInfo
+#include <xgboost/json.h>                         // for Json, get, Object, String
+#include <xgboost/learner.h>                      // for Learner

-#include <cstddef>            // size_t
-#include <memory>             // shared_ptr,unique_ptr
-#include <numeric>
-#include <string>             // stod
-#include <vector>
+#include <algorithm>                              // for copy
+#include <cstddef>                                // for size_t
+#include <memory>                                 // for shared_ptr, allocator, __shared_ptr_access
+#include <numeric>                                // for accumulate
+#include <string>                                 // for stod, string
+#include <vector>                                 // for vector

-#include "../../src/common/linalg_op.h"  // cbegin,cend
-#include "../../src/common/stats.h"      // Median
-#include "helpers.h"                     // RandomDataGenerator
-#include "xgboost/linalg.h"
+#include "../../src/common/linalg_op.h"           // for begin, cbegin, cend
+#include "../../src/common/stats.h"               // for Median
+#include "../../src/common/transform_iterator.h"  // for IndexTransformIter
+#include "helpers.h"                              // for RandomDataGenerator
+#include "xgboost/host_device_vector.h"           // for HostDeviceVector
+#include "xgboost/linalg.h"                       // for Tensor, All, TensorView, Vector

 namespace xgboost {
-
 class TestL1MultiTarget : public ::testing::Test {
  std::shared_ptr<DMatrix> Xy_;
  std::shared_ptr<DMatrix> Xyw_;
@ -117,4 +119,16 @@ TEST_F(TestL1MultiTarget, Approx) { this->RunTest("approx"); }
 #if defined(XGBOOST_USE_CUDA)
 TEST_F(TestL1MultiTarget, GpuHist) { this->RunTest("gpu_hist"); }
 #endif  // defined(XGBOOST_USE_CUDA)
+
+TEST(MultiStrategy, Configure) {
+  auto p_fmat = RandomDataGenerator{12ul, 3ul, 0.0}.GenerateDMatrix();
+  p_fmat->Info().labels.Reshape(p_fmat->Info().num_row_, 2);
+  std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
+  learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "2"}});
+  learner->Configure();
+  ASSERT_EQ(learner->Groups(), 2);
+
+  learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "0"}});
+  ASSERT_THROW({ learner->Configure(); }, dmlc::Error);
+}
 }  // namespace xgboost
--- a/tests/cpp/tree/test_gpu_hist.cu
+++ b/tests/cpp/tree/test_gpu_hist.cu
@ -170,8 +170,8 @@ void TestHistogramIndexImpl() {

  // Build 2 matrices and build a histogram maker with that
  Context ctx(CreateEmptyGenericParam(0));
-  tree::GPUHistMaker hist_maker{&ctx, ObjInfo{ObjInfo::kRegression}},
-      hist_maker_ext{&ctx, ObjInfo{ObjInfo::kRegression}};
+  ObjInfo task{ObjInfo::kRegression};
+  tree::GPUHistMaker hist_maker{&ctx, &task}, hist_maker_ext{&ctx, &task};
  std::unique_ptr<DMatrix> hist_maker_dmat(
    CreateSparsePageDMatrixWithRC(kNRows, kNCols, 0, true));

@ -240,7 +240,8 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
  param.UpdateAllowUnknown(args);

  Context ctx(CreateEmptyGenericParam(0));
-  tree::GPUHistMaker hist_maker{&ctx,ObjInfo{ObjInfo::kRegression}};
+  ObjInfo task{ObjInfo::kRegression};
+  tree::GPUHistMaker hist_maker{&ctx, &task};

  std::vector<HostDeviceVector<bst_node_t>> position(1);
  hist_maker.Update(&param, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
@ -385,8 +386,8 @@ TEST(GpuHist, ExternalMemoryWithSampling) {

 TEST(GpuHist, ConfigIO) {
  Context ctx(CreateEmptyGenericParam(0));
-  std::unique_ptr<TreeUpdater> updater{
-      TreeUpdater::Create("grow_gpu_hist", &ctx, ObjInfo{ObjInfo::kRegression})};
+  ObjInfo task{ObjInfo::kRegression};
+  std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_gpu_hist", &ctx, &task)};
  updater->Configure(Args{});

  Json j_updater { Object() };
--- a/tests/cpp/tree/test_histmaker.cc
+++ b/tests/cpp/tree/test_histmaker.cc
@ -37,13 +37,13 @@ TEST(GrowHistMaker, InteractionConstraint)
  auto p_gradients = GenerateGradients(kRows);

  Context ctx;
+  ObjInfo task{ObjInfo::kRegression};
  {
    // With constraints
    RegTree tree;
    tree.param.num_feature = kCols;

-    std::unique_ptr<TreeUpdater> updater{
-        TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
+    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
    TrainParam param;
    param.UpdateAllowUnknown(
        Args{{"interaction_constraints", "[[0, 1]]"}, {"num_feature", std::to_string(kCols)}});
@ -61,8 +61,7 @@ TEST(GrowHistMaker, InteractionConstraint)
    RegTree tree;
    tree.param.num_feature = kCols;

-    std::unique_ptr<TreeUpdater> updater{
-        TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
+    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
    std::vector<HostDeviceVector<bst_node_t>> position(1);
    TrainParam param;
    param.Init(Args{});
@ -81,8 +80,8 @@ void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
  auto p_dmat = GenerateDMatrix(rows, cols);
  auto p_gradients = GenerateGradients(rows);
  Context ctx;
-  std::unique_ptr<TreeUpdater> updater{
-      TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
+  ObjInfo task{ObjInfo::kRegression};
+  std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
  std::vector<HostDeviceVector<bst_node_t>> position(1);

  std::unique_ptr<DMatrix> sliced{
@ -110,12 +109,12 @@ TEST(GrowHistMaker, ColumnSplit) {

  RegTree expected_tree;
  expected_tree.param.num_feature = kCols;
+  ObjInfo task{ObjInfo::kRegression};
  {
    auto p_dmat = GenerateDMatrix(kRows, kCols);
    auto p_gradients = GenerateGradients(kRows);
    Context ctx;
-    std::unique_ptr<TreeUpdater> updater{
-        TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
+    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
    std::vector<HostDeviceVector<bst_node_t>> position(1);
    TrainParam param;
    param.Init(Args{});
--- a/tests/cpp/tree/test_multi_target_tree_model.cc
+++ b/tests/cpp/tree/test_multi_target_tree_model.cc
@ -0,0 +1,48 @@
+/**
+ * Copyright 2023 by XGBoost Contributors
+ */
+#include <gtest/gtest.h>
+#include <xgboost/context.h>     // for Context
+#include <xgboost/multi_target_tree_model.h>
+#include <xgboost/tree_model.h>  // for RegTree
+
+namespace xgboost {
+TEST(MultiTargetTree, JsonIO) {
+  bst_target_t n_targets{3};
+  bst_feature_t n_features{4};
+  RegTree tree{n_targets, n_features};
+  ASSERT_TRUE(tree.IsMultiTarget());
+  linalg::Vector<float> base_weight{{1.0f, 2.0f, 3.0f}, {3ul}, Context::kCpuId};
+  linalg::Vector<float> left_weight{{2.0f, 3.0f, 4.0f}, {3ul}, Context::kCpuId};
+  linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, Context::kCpuId};
+  tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),
+                  left_weight.HostView(), right_weight.HostView());
+  ASSERT_EQ(tree.param.num_nodes, 3);
+  ASSERT_EQ(tree.param.size_leaf_vector, 3);
+  ASSERT_EQ(tree.GetMultiTargetTree()->Size(), 3);
+  ASSERT_EQ(tree.Size(), 3);
+
+  Json jtree{Object{}};
+  tree.SaveModel(&jtree);
+
+  auto check_jtree = [](Json jtree, RegTree const& tree) {
+    ASSERT_EQ(get<String const>(jtree["tree_param"]["num_nodes"]),
+              std::to_string(tree.param.num_nodes));
+    ASSERT_EQ(get<F32Array const>(jtree["base_weights"]).size(),
+              tree.param.num_nodes * tree.param.size_leaf_vector);
+    ASSERT_EQ(get<I32Array const>(jtree["parents"]).size(), tree.param.num_nodes);
+    ASSERT_EQ(get<I32Array const>(jtree["left_children"]).size(), tree.param.num_nodes);
+    ASSERT_EQ(get<I32Array const>(jtree["right_children"]).size(), tree.param.num_nodes);
+  };
+  check_jtree(jtree, tree);
+
+  RegTree loaded;
+  loaded.LoadModel(jtree);
+  ASSERT_TRUE(loaded.IsMultiTarget());
+  ASSERT_EQ(loaded.param.num_nodes, 3);
+
+  Json jtree1{Object{}};
+  loaded.SaveModel(&jtree1);
+  check_jtree(jtree1, tree);
+}
+}  // namespace xgboost
--- a/tests/cpp/tree/test_node_partition.cc
+++ b/tests/cpp/tree/test_node_partition.cc
@ -2,22 +2,25 @@
 * Copyright 2023 by XGBoost contributors
 */
 #include <gtest/gtest.h>
-#include <xgboost/task.h>
-#include <xgboost/tree_updater.h>
+#include <xgboost/context.h>       // for Context
+#include <xgboost/task.h>          // for ObjInfo
+#include <xgboost/tree_updater.h>  // for TreeUpdater
+
+#include <memory>                  // for unique_ptr

 namespace xgboost {
 TEST(Updater, HasNodePosition) {
  Context ctx;
  ObjInfo task{ObjInfo::kRegression, true, true};
-  std::unique_ptr<TreeUpdater> up{TreeUpdater::Create("grow_histmaker", &ctx, task)};
+  std::unique_ptr<TreeUpdater> up{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
  ASSERT_TRUE(up->HasNodePosition());

-  up.reset(TreeUpdater::Create("grow_quantile_histmaker", &ctx, task));
+  up.reset(TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task));
  ASSERT_TRUE(up->HasNodePosition());

 #if defined(XGBOOST_USE_CUDA)
  ctx.gpu_id = 0;
-  up.reset(TreeUpdater::Create("grow_gpu_hist", &ctx, task));
+  up.reset(TreeUpdater::Create("grow_gpu_hist", &ctx, &task));
  ASSERT_TRUE(up->HasNodePosition());
 #endif  // defined(XGBOOST_USE_CUDA)
 }
--- a/tests/cpp/tree/test_prediction_cache.cc
+++ b/tests/cpp/tree/test_prediction_cache.cc
@ -9,6 +9,7 @@

 #include "../../../src/tree/param.h"  // for TrainParam
 #include "../helpers.h"
+#include "xgboost/task.h"             // for ObjInfo

 namespace xgboost {

@ -71,8 +72,8 @@ class TestPredictionCache : public ::testing::Test {
        ctx.gpu_id = Context::kCpuId;
      }

-      std::unique_ptr<TreeUpdater> updater{
-          TreeUpdater::Create(updater_name, &ctx, ObjInfo{ObjInfo::kRegression})};
+      ObjInfo task{ObjInfo::kRegression};
+      std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(updater_name, &ctx, &task)};
      RegTree tree;
      std::vector<RegTree *> trees{&tree};
      auto gpair = GenerateRandomGradients(n_samples_);
--- a/tests/cpp/tree/test_prune.cc
+++ b/tests/cpp/tree/test_prune.cc
@ -39,8 +39,8 @@ TEST(Updater, Prune) {
  TrainParam param;
  param.UpdateAllowUnknown(cfg);

-  std::unique_ptr<TreeUpdater> pruner(
-      TreeUpdater::Create("prune", &ctx, ObjInfo{ObjInfo::kRegression}));
+  ObjInfo task{ObjInfo::kRegression};
+  std::unique_ptr<TreeUpdater> pruner(TreeUpdater::Create("prune", &ctx, &task));

  // loss_chg < min_split_loss;
  std::vector<HostDeviceVector<bst_node_t>> position(trees.size());
--- a/tests/cpp/tree/test_refresh.cc
+++ b/tests/cpp/tree/test_refresh.cc
@ -1,8 +1,9 @@
 /**
- * Copyright 2018-2013 by XGBoost Contributors
+ * Copyright 2018-2023 by XGBoost Contributors
 */
 #include <gtest/gtest.h>
 #include <xgboost/host_device_vector.h>
+#include <xgboost/task.h>  // for ObjInfo
 #include <xgboost/tree_updater.h>

 #include <memory>
@ -12,9 +13,7 @@
 #include "../../../src/tree/param.h"  // for TrainParam
 #include "../helpers.h"

-namespace xgboost {
-namespace tree {
-
+namespace xgboost::tree {
 TEST(Updater, Refresh) {
  bst_row_t constexpr kRows = 8;
  bst_feature_t constexpr kCols = 16;
@ -33,8 +32,9 @@ TEST(Updater, Refresh) {
  auto ctx = CreateEmptyGenericParam(GPUIDX);
  tree.param.UpdateAllowUnknown(cfg);
  std::vector<RegTree*> trees{&tree};
-  std::unique_ptr<TreeUpdater> refresher(
-      TreeUpdater::Create("refresh", &ctx, ObjInfo{ObjInfo::kRegression}));
+
+  ObjInfo task{ObjInfo::kRegression};
+  std::unique_ptr<TreeUpdater> refresher(TreeUpdater::Create("refresh", &ctx, &task));

  tree.ExpandNode(0, 2, 0.2f, false, 0.0, 0.2f, 0.8f, 0.0f, 0.0f,
                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);
@ -57,6 +57,4 @@ TEST(Updater, Refresh) {
  ASSERT_NEAR(0, tree.Stat(1).loss_chg, kEps);
  ASSERT_NEAR(0, tree.Stat(2).loss_chg, kEps);
 }
-
-}  // namespace tree
-}  // namespace xgboost
+}  // namespace xgboost::tree
--- a/tests/cpp/tree/test_tree_model.cc
+++ b/tests/cpp/tree/test_tree_model.cc
@ -477,7 +477,7 @@ TEST(Tree, JsonIO) {
  auto tparam = j_tree["tree_param"];
  ASSERT_EQ(get<String>(tparam["num_feature"]), "0");
  ASSERT_EQ(get<String>(tparam["num_nodes"]), "3");
-  ASSERT_EQ(get<String>(tparam["size_leaf_vector"]), "0");
+  ASSERT_EQ(get<String>(tparam["size_leaf_vector"]), "1");

  ASSERT_EQ(get<I32Array const>(j_tree["left_children"]).size(), 3ul);
  ASSERT_EQ(get<I32Array const>(j_tree["right_children"]).size(), 3ul);
--- a/tests/cpp/tree/test_tree_stat.cc
+++ b/tests/cpp/tree/test_tree_stat.cc
@ -2,9 +2,13 @@
 * Copyright 2020-2023 by XGBoost Contributors
 */
 #include <gtest/gtest.h>
+#include <xgboost/context.h>  // for Context
+#include <xgboost/task.h>     // for ObjInfo
 #include <xgboost/tree_model.h>
 #include <xgboost/tree_updater.h>

+#include <memory>                     // for unique_ptr
+
 #include "../../../src/tree/param.h"  // for TrainParam
 #include "../helpers.h"

@ -26,12 +30,12 @@ class UpdaterTreeStatTest : public ::testing::Test {

  void RunTest(std::string updater) {
    tree::TrainParam param;
+    ObjInfo task{ObjInfo::kRegression};
    param.Init(Args{});

    Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
                                           : CreateEmptyGenericParam(Context::kCpuId));
-    auto up = std::unique_ptr<TreeUpdater>{
-        TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kRegression})};
+    auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
    up->Configure(Args{});
    RegTree tree;
    tree.param.num_feature = kCols;
@ -74,18 +78,18 @@ class UpdaterEtaTest : public ::testing::Test {
  }

  void RunTest(std::string updater) {
+    ObjInfo task{ObjInfo::kClassification};
+
    Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
                                           : CreateEmptyGenericParam(Context::kCpuId));

    float eta = 0.4;
-    auto up_0 = std::unique_ptr<TreeUpdater>{
-        TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
+    auto up_0 = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
    up_0->Configure(Args{});
    tree::TrainParam param0;
    param0.Init(Args{{"eta", std::to_string(eta)}});

-    auto up_1 = std::unique_ptr<TreeUpdater>{
-        TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
+    auto up_1 = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
    up_1->Configure(Args{{"eta", "1.0"}});
    tree::TrainParam param1;
    param1.Init(Args{{"eta", "1.0"}});
@ -153,11 +157,11 @@ class TestMinSplitLoss : public ::testing::Test {
              {"gamma", std::to_string(gamma)}};
    tree::TrainParam param;
    param.UpdateAllowUnknown(args);
+    ObjInfo task{ObjInfo::kRegression};

    Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
                                           : CreateEmptyGenericParam(Context::kCpuId));
-    auto up = std::unique_ptr<TreeUpdater>{
-        TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kRegression})};
+    auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
    up->Configure({});

    RegTree tree;
				`@ -1 +0,0 @@`
				`Subproject commit af39ee264f4627608072bf54730bf3a862e56875`