initial merge

2023-03-25 04:31:55 +01:00
parent d97be6f396 cff50fe3ef
commit 7fbc561e17
146 changed files with 6730 additions and 4082 deletions
--- a/tests/buildkite/test-cpp-gpu.sh
+++ b/tests/buildkite/test-cpp-gpu.sh
@@ -12,13 +12,12 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \
  --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
  build/testxgboost

-# Disabled until https://github.com/dmlc/xgboost/issues/8619 is resolved
-# echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
-# rm -rfv build/
-# buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
-# chmod +x build/testxgboost
-# tests/ci_build/ci_build.sh rmm nvidia-docker \
-#   --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
-#   --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
-#   --build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \
-#   "source activate gpu_test && build/testxgboost --use-rmm-pool"
+echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
+rm -rfv build/
+buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
+chmod +x build/testxgboost
+tests/ci_build/ci_build.sh rmm nvidia-docker \
+  --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
+  --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
+  --build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \
+  "source activate gpu_test && build/testxgboost --use-rmm-pool"
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@@ -3,7 +3,7 @@ import os
 import subprocess
 import sys
 from multiprocessing import Pool, cpu_count
-from typing import Dict, Optional, Tuple
+from typing import Dict, Tuple

 from pylint import epylint
 from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time
@@ -15,8 +15,11 @@ SRCPATH = os.path.normpath(


@record_time
-def run_black(rel_path: str) -> bool:
-    cmd = ["black", "-q", "--check", rel_path]
+def run_black(rel_path: str, fix: bool) -> bool:
+    if fix:
+        cmd = ["black", "-q", rel_path]
+    else:
+        cmd = ["black", "-q", "--check", rel_path]
    ret = subprocess.run(cmd).returncode
    if ret != 0:
        subprocess.run(["black", "--version"])
@@ -31,8 +34,11 @@ Please run the following command on your machine to address the formatting error


@record_time
-def run_isort(rel_path: str) -> bool:
-    cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path]
+def run_isort(rel_path: str, fix: bool) -> bool:
+    if fix:
+        cmd = ["isort", f"--src={SRCPATH}", "--profile=black", rel_path]
+    else:
+        cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path]
    ret = subprocess.run(cmd).returncode
    if ret != 0:
        subprocess.run(["isort", "--version"])
@@ -132,7 +138,7 @@ def run_pylint() -> bool:
 def main(args: argparse.Namespace) -> None:
    if args.format == 1:
        black_results = [
-            run_black(path)
+            run_black(path, args.fix)
            for path in [
                # core
                "python-package/",
@@ -166,7 +172,7 @@ def main(args: argparse.Namespace) -> None:
            sys.exit(-1)

        isort_results = [
-            run_isort(path)
+            run_isort(path, args.fix)
            for path in [
                # core
                "python-package/",
@@ -230,6 +236,11 @@ if __name__ == "__main__":
    parser.add_argument("--format", type=int, choices=[0, 1], default=1)
    parser.add_argument("--type-check", type=int, choices=[0, 1], default=1)
    parser.add_argument("--pylint", type=int, choices=[0, 1], default=1)
+    parser.add_argument(
+        "--fix",
+        action="store_true",
+        help="Fix the formatting issues instead of emitting an error.",
+    )
    args = parser.parse_args()
    try:
        main(args)
--- a/tests/cpp/collective/test_nccl_device_communicator.cu
+++ b/tests/cpp/collective/test_nccl_device_communicator.cu
@@ -1,10 +1,12 @@
-/*!
- * Copyright 2022 XGBoost contributors
+/**
+ * Copyright 2022-2023, XGBoost contributors
 */
 #ifdef XGBOOST_USE_NCCL

 #include <gtest/gtest.h>

+#include <string>  // for string
+
 #include "../../../src/collective/nccl_device_communicator.cuh"

 namespace xgboost {
@@ -20,7 +22,15 @@ TEST(NcclDeviceCommunicatorSimpleTest, ThrowOnInvalidCommunicator) {
  EXPECT_THROW(construct(), dmlc::Error);
 }

+TEST(NcclDeviceCommunicatorSimpleTest, SystemError) {
+  try {
+    dh::safe_nccl(ncclSystemError);
+  } catch (dmlc::Error const& e) {
+    auto str = std::string{e.what()};
+    ASSERT_TRUE(str.find("environment variables") != std::string::npos);
+  }
+}
 }  // namespace collective
 }  // namespace xgboost

-#endif
+#endif  // XGBOOST_USE_NCCL
--- a/tests/cpp/common/test_partition_builder.cc
+++ b/tests/cpp/common/test_partition_builder.cc
@@ -1,79 +1,79 @@
-#include <gtest/gtest.h>
-#include <vector>
-#include <string>
-#include <utility>
-
-#include "../../../src/common/row_set.h"
-#include "../../../src/common/partition_builder.h"
-#include "../helpers.h"
-
-namespace xgboost {
-namespace common {
-
-TEST(PartitionBuilder, BasicTest) {
-  constexpr size_t kBlockSize = 16;
-  constexpr size_t kNodes = 5;
-  constexpr size_t kTasks = 3 + 5 + 10 + 1 + 2;
-
-  std::vector<size_t> tasks = { 3, 5, 10, 1, 2 };
-
-  PartitionBuilder<kBlockSize> builder;
-  builder.Init(kTasks, kNodes, [&](size_t i) {
-    return tasks[i];
-  });
-
-  std::vector<size_t> rows_for_left_node = { 2, 12, 0, 16, 8 };
-
-  for(size_t nid = 0; nid < kNodes; ++nid) {
-    size_t value_left = 0;
-    size_t value_right = 0;
-
-    size_t left_total = tasks[nid] * rows_for_left_node[nid];
-
-    for(size_t j = 0; j < tasks[nid]; ++j) {
-      size_t begin = kBlockSize*j;
-      size_t end = kBlockSize*(j+1);
-      const size_t id = builder.GetTaskIdx(nid, begin);
-      builder.AllocateForTask(id);
-
-      auto left  = builder.GetLeftBuffer(nid, begin, end);
-      auto right = builder.GetRightBuffer(nid, begin, end);
-
-      size_t n_left   = rows_for_left_node[nid];
-      size_t n_right = kBlockSize - rows_for_left_node[nid];
-
-      for(size_t i = 0; i < n_left; i++) {
-        left[i] = value_left++;
-      }
-
-      for(size_t i = 0; i < n_right; i++) {
-        right[i] = left_total + value_right++;
-      }
-
-      builder.SetNLeftElems(nid, begin, n_left);
-      builder.SetNRightElems(nid, begin, n_right);
-    }
-  }
-  builder.CalculateRowOffsets();
-
-  std::vector<size_t> v(*std::max_element(tasks.begin(), tasks.end()) * kBlockSize);
-
-  for(size_t nid = 0; nid < kNodes; ++nid) {
-
-    for(size_t j = 0; j < tasks[nid]; ++j) {
-      builder.MergeToArray(nid, kBlockSize*j, v.data());
-    }
-
-    for(size_t j = 0; j < tasks[nid] * kBlockSize; ++j) {
-      ASSERT_EQ(v[j], j);
-    }
-    size_t n_left  = builder.GetNLeftElems(nid);
-    size_t n_right = builder.GetNRightElems(nid);
-
-    ASSERT_EQ(n_left, rows_for_left_node[nid] * tasks[nid]);
-    ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]);
-  }
-}
-
-}  // namespace common
-}  // namespace xgboost
+/**
+ * Copyright 2020-2023 by XGBoost contributors
+ */
+#include <gtest/gtest.h>
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "../../../src/common/partition_builder.h"
+#include "../../../src/common/row_set.h"
+#include "../helpers.h"
+
+namespace xgboost::common {
+TEST(PartitionBuilder, BasicTest) {
+  constexpr size_t kBlockSize = 16;
+  constexpr size_t kNodes = 5;
+  constexpr size_t kTasks = 3 + 5 + 10 + 1 + 2;
+
+  std::vector<size_t> tasks = { 3, 5, 10, 1, 2 };
+
+  PartitionBuilder<kBlockSize> builder;
+  builder.Init(kTasks, kNodes, [&](size_t i) {
+    return tasks[i];
+  });
+
+  std::vector<size_t> rows_for_left_node = { 2, 12, 0, 16, 8 };
+
+  for(size_t nid = 0; nid < kNodes; ++nid) {
+    size_t value_left = 0;
+    size_t value_right = 0;
+
+    size_t left_total = tasks[nid] * rows_for_left_node[nid];
+
+    for(size_t j = 0; j < tasks[nid]; ++j) {
+      size_t begin = kBlockSize*j;
+      size_t end = kBlockSize*(j+1);
+      const size_t id = builder.GetTaskIdx(nid, begin);
+      builder.AllocateForTask(id);
+
+      auto left  = builder.GetLeftBuffer(nid, begin, end);
+      auto right = builder.GetRightBuffer(nid, begin, end);
+
+      size_t n_left   = rows_for_left_node[nid];
+      size_t n_right = kBlockSize - rows_for_left_node[nid];
+
+      for(size_t i = 0; i < n_left; i++) {
+        left[i] = value_left++;
+      }
+
+      for(size_t i = 0; i < n_right; i++) {
+        right[i] = left_total + value_right++;
+      }
+
+      builder.SetNLeftElems(nid, begin, n_left);
+      builder.SetNRightElems(nid, begin, n_right);
+    }
+  }
+  builder.CalculateRowOffsets();
+
+  std::vector<size_t> v(*std::max_element(tasks.begin(), tasks.end()) * kBlockSize);
+
+  for(size_t nid = 0; nid < kNodes; ++nid) {
+
+    for(size_t j = 0; j < tasks[nid]; ++j) {
+      builder.MergeToArray(nid, kBlockSize*j, v.data());
+    }
+
+    for(size_t j = 0; j < tasks[nid] * kBlockSize; ++j) {
+      ASSERT_EQ(v[j], j);
+    }
+    size_t n_left  = builder.GetNLeftElems(nid);
+    size_t n_right = builder.GetNRightElems(nid);
+
+    ASSERT_EQ(n_left, rows_for_left_node[nid] * tasks[nid]);
+    ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]);
+  }
+}
+}  // namespace xgboost::common
--- a/tests/cpp/common/test_ranking_utils.cc
+++ b/tests/cpp/common/test_ranking_utils.cc
@@ -1,16 +1,25 @@
 /**
 * Copyright 2023 by XGBoost Contributors
 */
-#include <gtest/gtest.h>                        // for Test, AssertionResult, Message, TestPartR...
-#include <gtest/gtest.h>                        // for ASSERT_NEAR, ASSERT_T...
-#include <xgboost/base.h>                       // for Args
+#include "test_ranking_utils.h"
+
+#include <gtest/gtest.h>
+#include <xgboost/base.h>                       // for Args, bst_group_t, kRtEps
 #include <xgboost/context.h>                    // for Context
+#include <xgboost/data.h>                       // for MetaInfo, DMatrix
+#include <xgboost/host_device_vector.h>         // for HostDeviceVector
+#include <xgboost/logging.h>                    // for Error
 #include <xgboost/string_view.h>                // for StringView

+#include <cstddef>                              // for size_t
 #include <cstdint>                              // for uint32_t
-#include <utility>                              // for pair
+#include <numeric>                              // for iota
+#include <utility>                              // for move
+#include <vector>                               // for vector

+#include "../../../src/common/numeric.h"        // for Iota
 #include "../../../src/common/ranking_utils.h"  // for LambdaRankParam, ParseMetricName, MakeMet...
+#include "../helpers.h"                         // for EmptyDMatrix

 namespace xgboost::ltr {
 TEST(RankingUtils, LambdaRankParam) {
@@ -66,4 +75,138 @@ TEST(RankingUtils, MakeMetricName) {
  name = MakeMetricName("map", 2, false);
  ASSERT_EQ(name, "map@2");
 }
+
+void TestRankingCache(Context const* ctx) {
+  auto p_fmat = EmptyDMatrix();
+  MetaInfo& info = p_fmat->Info();
+
+  info.num_row_ = 16;
+  info.labels.Reshape(info.num_row_);
+  auto& h_label = info.labels.Data()->HostVector();
+  for (std::size_t i = 0; i < h_label.size(); ++i) {
+    h_label[i] = i % 2;
+  }
+
+  LambdaRankParam param;
+  param.UpdateAllowUnknown(Args{});
+
+  RankingCache cache{ctx, info, param};
+
+  HostDeviceVector<float> predt(info.num_row_, 0);
+  auto& h_predt = predt.HostVector();
+  std::iota(h_predt.begin(), h_predt.end(), 0.0f);
+  predt.SetDevice(ctx->gpu_id);
+
+  auto rank_idx =
+      cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
+
+  for (std::size_t i = 0; i < rank_idx.size(); ++i) {
+    ASSERT_EQ(rank_idx[i], rank_idx.size() - i - 1);
+  }
+}
+
+TEST(RankingCache, InitFromCPU) {
+  Context ctx;
+  TestRankingCache(&ctx);
+}
+
+void TestNDCGCache(Context const* ctx) {
+  auto p_fmat = EmptyDMatrix();
+  MetaInfo& info = p_fmat->Info();
+  LambdaRankParam param;
+  param.UpdateAllowUnknown(Args{});
+
+  {
+    // empty
+    NDCGCache cache{ctx, info, param};
+    ASSERT_EQ(cache.DataGroupPtr(ctx).size(), 2);
+  }
+
+  info.num_row_ = 3;
+  info.group_ptr_ = {static_cast<bst_group_t>(0), static_cast<bst_group_t>(info.num_row_)};
+
+  {
+    auto fail = [&]() { NDCGCache cache{ctx, info, param}; };
+    // empty label
+    ASSERT_THROW(fail(), dmlc::Error);
+    info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, Context::kCpuId};
+    // invalid label
+    ASSERT_THROW(fail(), dmlc::Error);
+    auto h_labels = info.labels.HostView();
+    for (std::size_t i = 0; i < h_labels.Size(); ++i) {
+      h_labels(i) *= 10;
+    }
+    param.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
+    NDCGCache cache{ctx, info, param};
+    Context cpuctx;
+    auto inv_idcg = cache.InvIDCG(&cpuctx);
+    ASSERT_EQ(inv_idcg.Size(), 1);
+    ASSERT_NEAR(1.0 / inv_idcg(0), 2.63093, kRtEps);
+  }
+
+  {
+    param.UpdateAllowUnknown(Args{{"lambdarank_unbiased", "false"}});
+
+    std::vector<float> h_data(32);
+
+    common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);
+    info.labels.Reshape(h_data.size());
+    info.num_row_ = h_data.size();
+    info.group_ptr_.back() = info.num_row_;
+    info.labels.Data()->HostVector() = std::move(h_data);
+
+    {
+      NDCGCache cache{ctx, info, param};
+      Context cpuctx;
+      auto inv_idcg = cache.InvIDCG(&cpuctx);
+      ASSERT_NEAR(inv_idcg(0), 0.00551782, kRtEps);
+    }
+
+    param.UpdateAllowUnknown(
+        Args{{"lambdarank_num_pair_per_sample", "3"}, {"lambdarank_pair_method", "topk"}});
+    {
+      NDCGCache cache{ctx, info, param};
+      Context cpuctx;
+      auto inv_idcg = cache.InvIDCG(&cpuctx);
+      ASSERT_NEAR(inv_idcg(0), 0.01552123, kRtEps);
+    }
+  }
+}
+
+TEST(NDCGCache, InitFromCPU) {
+  Context ctx;
+  TestNDCGCache(&ctx);
+}
+
+void TestMAPCache(Context const* ctx) {
+  auto p_fmat = EmptyDMatrix();
+  MetaInfo& info = p_fmat->Info();
+  LambdaRankParam param;
+  param.UpdateAllowUnknown(Args{});
+
+  std::vector<float> h_data(32);
+
+  common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);
+  info.labels.Reshape(h_data.size());
+  info.num_row_ = h_data.size();
+  info.labels.Data()->HostVector() = std::move(h_data);
+
+  auto fail = [&]() { std::make_shared<MAPCache>(ctx, info, param); };
+  // binary label
+  ASSERT_THROW(fail(), dmlc::Error);
+
+  h_data = std::vector<float>(32, 0.0f);
+  h_data[1] = 1.0f;
+  info.labels.Data()->HostVector() = h_data;
+  auto p_cache = std::make_shared<MAPCache>(ctx, info, param);
+
+  ASSERT_EQ(p_cache->Acc(ctx).size(), info.num_row_);
+  ASSERT_EQ(p_cache->NumRelevant(ctx).size(), info.num_row_);
+}
+
+TEST(MAPCache, InitFromCPU) {
+  Context ctx;
+  ctx.Init(Args{});
+  TestMAPCache(&ctx);
+}
 }  // namespace xgboost::ltr
--- a/tests/cpp/common/test_ranking_utils.cu
+++ b/tests/cpp/common/test_ranking_utils.cu
@@ -0,0 +1,104 @@
+/**
+ * Copyright 2023 by XGBoost Contributors
+ */
+#include <gtest/gtest.h>
+#include <xgboost/base.h>                          // for Args, XGBOOST_DEVICE, bst_group_t, kRtEps
+#include <xgboost/context.h>                       // for Context
+#include <xgboost/linalg.h>                        // for MakeTensorView, Vector
+
+#include <cstddef>                                 // for size_t
+#include <memory>                                  // for shared_ptr
+#include <numeric>                                 // for iota
+#include <vector>                                  // for vector
+
+#include "../../../src/common/algorithm.cuh"       // for SegmentedSequence
+#include "../../../src/common/cuda_context.cuh"    // for CUDAContext
+#include "../../../src/common/device_helpers.cuh"  // for device_vector, ToSpan
+#include "../../../src/common/ranking_utils.cuh"   // for CalcQueriesInvIDCG
+#include "../../../src/common/ranking_utils.h"     // for LambdaRankParam, RankingCache
+#include "../helpers.h"                            // for EmptyDMatrix
+#include "test_ranking_utils.h"                    // for TestNDCGCache
+#include "xgboost/data.h"                          // for MetaInfo
+#include "xgboost/host_device_vector.h"            // for HostDeviceVector
+
+namespace xgboost::ltr {
+void TestCalcQueriesInvIDCG() {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
+  std::size_t n_groups = 5, n_samples_per_group = 32;
+
+  dh::device_vector<float> scores(n_samples_per_group * n_groups);
+  dh::device_vector<bst_group_t> group_ptr(n_groups + 1);
+  auto d_group_ptr = dh::ToSpan(group_ptr);
+  dh::LaunchN(d_group_ptr.size(), ctx.CUDACtx()->Stream(),
+              [=] XGBOOST_DEVICE(std::size_t i) { d_group_ptr[i] = i * n_samples_per_group; });
+
+  auto d_scores = dh::ToSpan(scores);
+  common::SegmentedSequence(&ctx, d_group_ptr, d_scores);
+
+  linalg::Vector<double> inv_IDCG({n_groups}, ctx.gpu_id);
+
+  ltr::LambdaRankParam p;
+  p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
+
+  cuda_impl::CalcQueriesInvIDCG(&ctx, linalg::MakeTensorView(&ctx, d_scores, d_scores.size()),
+                                dh::ToSpan(group_ptr), inv_IDCG.View(ctx.gpu_id), p);
+  for (std::size_t i = 0; i < n_groups; ++i) {
+    double inv_idcg = inv_IDCG(i);
+    ASSERT_NEAR(inv_idcg, 0.00551782, kRtEps);
+  }
+}
+
+TEST(RankingUtils, CalcQueriesInvIDCG) { TestCalcQueriesInvIDCG(); }
+
+namespace {
+void TestRankingCache(Context const* ctx) {
+  auto p_fmat = EmptyDMatrix();
+  MetaInfo& info = p_fmat->Info();
+
+  info.num_row_ = 16;
+  info.labels.Reshape(info.num_row_);
+  auto& h_label = info.labels.Data()->HostVector();
+  for (std::size_t i = 0; i < h_label.size(); ++i) {
+    h_label[i] = i % 2;
+  }
+
+  LambdaRankParam param;
+  param.UpdateAllowUnknown(Args{});
+
+  RankingCache cache{ctx, info, param};
+
+  HostDeviceVector<float> predt(info.num_row_, 0);
+  auto& h_predt = predt.HostVector();
+  std::iota(h_predt.begin(), h_predt.end(), 0.0f);
+  predt.SetDevice(ctx->gpu_id);
+
+  auto rank_idx =
+      cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
+
+  std::vector<std::size_t> h_rank_idx(rank_idx.size());
+  dh::CopyDeviceSpanToVector(&h_rank_idx, rank_idx);
+  for (std::size_t i = 0; i < rank_idx.size(); ++i) {
+    ASSERT_EQ(h_rank_idx[i], h_rank_idx.size() - i - 1);
+  }
+}
+}  // namespace
+
+TEST(RankingCache, InitFromGPU) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
+  TestRankingCache(&ctx);
+}
+
+TEST(NDCGCache, InitFromGPU) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
+  TestNDCGCache(&ctx);
+}
+
+TEST(MAPCache, InitFromGPU) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
+  TestMAPCache(&ctx);
+}
+}  // namespace xgboost::ltr
--- a/tests/cpp/common/test_ranking_utils.h
+++ b/tests/cpp/common/test_ranking_utils.h
@@ -0,0 +1,11 @@
+/**
+ * Copyright 2023 by XGBoost Contributors
+ */
+#pragma once
+#include <xgboost/context.h>  // for Context
+
+namespace xgboost::ltr {
+void TestNDCGCache(Context const* ctx);
+
+void TestMAPCache(Context const* ctx);
+}  // namespace xgboost::ltr
--- a/tests/cpp/data/test_data.cc
+++ b/tests/cpp/data/test_data.cc
@@ -112,31 +112,12 @@ TEST(SparsePage, SortIndices) {
 }

 TEST(DMatrix, Uri) {
-  size_t constexpr kRows {16};
-  size_t constexpr kCols {8};
-  std::vector<float> data (kRows * kCols);
-
-  for (size_t i = 0; i < kRows * kCols; ++i) {
-    data[i] = i;
-  }
+  auto constexpr kRows {16};
+  auto constexpr kCols {8};

  dmlc::TemporaryDirectory tmpdir;
-  std::string path = tmpdir.path + "/small.csv";
-
-  std::ofstream fout(path);
-  size_t i = 0;
-  for (size_t r = 0; r < kRows; ++r) {
-    for (size_t c = 0; c < kCols; ++c) {
-      fout << data[i];
-      i++;
-      if (c != kCols - 1) {
-        fout << ",";
-      }
-    }
-    fout << "\n";
-  }
-  fout.flush();
-  fout.close();
+  auto const path = tmpdir.path + "/small.csv";
+  CreateTestCSV(path, kRows, kCols);

  std::unique_ptr<DMatrix> dmat;
  // FIXME(trivialfis): Enable the following test by restricting csv parser in dmlc-core.
--- a/tests/cpp/data/test_file_iterator.cc
+++ b/tests/cpp/data/test_file_iterator.cc
@@ -1,8 +1,9 @@
-/*!
- * Copyright 2021 XGBoost contributors
+/**
+ * Copyright 2021-2023 XGBoost contributors
 */
 #include <gtest/gtest.h>

+#include <any>  // for any_cast
 #include <memory>

 #include "../../../src/data/adapter.h"
@@ -11,15 +12,14 @@
 #include "../filesystem.h"  // dmlc::TemporaryDirectory
 #include "../helpers.h"

-namespace xgboost {
-namespace data {
+namespace xgboost::data {
 TEST(FileIterator, Basic) {
  auto check_n_features = [](FileIterator *iter) {
    size_t n_features = 0;
    iter->Reset();
    while (iter->Next()) {
      auto proxy = MakeProxy(iter->Proxy());
-      auto csr = dmlc::get<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter());
+      auto csr = std::any_cast<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter());
      n_features = std::max(n_features, csr->NumColumns());
    }
    ASSERT_EQ(n_features, 5);
@@ -42,5 +42,4 @@ TEST(FileIterator, Basic) {
    check_n_features(&iter);
  }
 }
-}  // namespace data
-}  // namespace xgboost
+}  // namespace xgboost::data
--- a/tests/cpp/data/test_proxy_dmatrix.cu
+++ b/tests/cpp/data/test_proxy_dmatrix.cu
@@ -1,23 +1,24 @@
+/**
+ * Copyright 2020-2023 XGBoost contributors
+ */
 #include <gtest/gtest.h>
 #include <xgboost/host_device_vector.h>
+
+#include <any>  // for any_cast
 #include <memory>
-#include "../helpers.h"

 #include "../../../src/data/device_adapter.cuh"
 #include "../../../src/data/proxy_dmatrix.h"
+#include "../helpers.h"

-namespace xgboost {
-namespace data {
+namespace xgboost::data {
 TEST(ProxyDMatrix, DeviceData) {
  constexpr size_t kRows{100}, kCols{100};
  HostDeviceVector<float> storage;
-  auto data = RandomDataGenerator(kRows, kCols, 0.5)
-                  .Device(0)
-                  .GenerateArrayInterface(&storage);
+  auto data = RandomDataGenerator(kRows, kCols, 0.5).Device(0).GenerateArrayInterface(&storage);
  std::vector<HostDeviceVector<float>> label_storage(1);
-  auto labels = RandomDataGenerator(kRows, 1, 0)
-                    .Device(0)
-                    .GenerateColumnarArrayInterface(&label_storage);
+  auto labels =
+      RandomDataGenerator(kRows, 1, 0).Device(0).GenerateColumnarArrayInterface(&label_storage);

  DMatrixProxy proxy;
  proxy.SetCUDAArray(data.c_str());
@@ -25,23 +26,16 @@ TEST(ProxyDMatrix, DeviceData) {

  ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CupyAdapter>));
  ASSERT_EQ(proxy.Info().labels.Size(), kRows);
-  ASSERT_EQ(dmlc::get<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumRows(),
-            kRows);
-  ASSERT_EQ(
-      dmlc::get<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumColumns(),
-      kCols);
+  ASSERT_EQ(std::any_cast<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumRows(), kRows);
+  ASSERT_EQ(std::any_cast<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumColumns(), kCols);

  std::vector<HostDeviceVector<float>> columnar_storage(kCols);
  data = RandomDataGenerator(kRows, kCols, 0)
-                    .Device(0)
-                    .GenerateColumnarArrayInterface(&columnar_storage);
+             .Device(0)
+             .GenerateColumnarArrayInterface(&columnar_storage);
  proxy.SetCUDAArray(data.c_str());
  ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CudfAdapter>));
-  ASSERT_EQ(dmlc::get<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumRows(),
-            kRows);
-  ASSERT_EQ(
-      dmlc::get<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumColumns(),
-      kCols);
+  ASSERT_EQ(std::any_cast<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumRows(), kRows);
+  ASSERT_EQ(std::any_cast<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumColumns(), kCols);
 }
-}  // namespace data
-}  // namespace xgboost
+}  // namespace xgboost::data
--- a/tests/cpp/gbm/test_gbtree.cc
+++ b/tests/cpp/gbm/test_gbtree.cc
@@ -412,7 +412,7 @@ std::pair<Json, Json> TestModelSlice(std::string booster) {
    j++;
  }

-  // CHECK sliced model doesn't have dependency on old one
+  // CHECK sliced model doesn't have dependency on the old one
  learner.reset();
  CHECK_EQ(sliced->GetNumFeature(), kCols);

--- a/tests/cpp/helpers.cc
+++ b/tests/cpp/helpers.cc
@@ -65,6 +65,29 @@ void CreateBigTestData(const std::string& filename, size_t n_entries, bool zero_
  }
 }

+void CreateTestCSV(std::string const& path, size_t rows, size_t cols) {
+  std::vector<float> data(rows * cols);
+
+  for (size_t i = 0; i < rows * cols; ++i) {
+    data[i] = i;
+  }
+
+  std::ofstream fout(path);
+  size_t i = 0;
+  for (size_t r = 0; r < rows; ++r) {
+    for (size_t c = 0; c < cols; ++c) {
+      fout << data[i];
+      i++;
+      if (c != cols - 1) {
+        fout << ",";
+      }
+    }
+    fout << "\n";
+  }
+  fout.flush();
+  fout.close();
+}
+
 void CheckObjFunctionImpl(std::unique_ptr<xgboost::ObjFunction> const& obj,
                          std::vector<xgboost::bst_float> preds,
                          std::vector<xgboost::bst_float> labels,
@@ -224,19 +247,18 @@ std::string RandomDataGenerator::GenerateArrayInterface(
  return out;
 }

-std::pair<std::vector<std::string>, std::string>
-RandomDataGenerator::GenerateArrayInterfaceBatch(
-    HostDeviceVector<float> *storage, size_t batches) const {
-  this->GenerateDense(storage);
+std::pair<std::vector<std::string>, std::string> MakeArrayInterfaceBatch(
+    HostDeviceVector<float> const* storage, std::size_t n_samples, bst_feature_t n_features,
+    std::size_t batches, std::int32_t device) {
  std::vector<std::string> result(batches);
  std::vector<Json> objects;

-  size_t const rows_per_batch = rows_ / batches;
+  size_t const rows_per_batch = n_samples / batches;

-  auto make_interface = [storage, this](size_t offset, size_t rows) {
+  auto make_interface = [storage, device, n_features](std::size_t offset, std::size_t rows) {
    Json array_interface{Object()};
    array_interface["data"] = std::vector<Json>(2);
-    if (device_ >= 0) {
+    if (device >= 0) {
      array_interface["data"][0] =
          Integer(reinterpret_cast<int64_t>(storage->DevicePointer() + offset));
      array_interface["stream"] = Null{};
@@ -249,22 +271,22 @@ RandomDataGenerator::GenerateArrayInterfaceBatch(

    array_interface["shape"] = std::vector<Json>(2);
    array_interface["shape"][0] = rows;
-    array_interface["shape"][1] = cols_;
+    array_interface["shape"][1] = n_features;

    array_interface["typestr"] = String("<f4");
    array_interface["version"] = 3;
    return array_interface;
  };

-  auto j_interface = make_interface(0, rows_);
+  auto j_interface = make_interface(0, n_samples);
  size_t offset = 0;
  for (size_t i = 0; i < batches - 1; ++i) {
    objects.emplace_back(make_interface(offset, rows_per_batch));
-    offset += rows_per_batch * cols_;
+    offset += rows_per_batch * n_features;
  }

-  size_t const remaining = rows_ - offset / cols_;
-  CHECK_LE(offset, rows_ * cols_);
+  size_t const remaining = n_samples - offset / n_features;
+  CHECK_LE(offset, n_samples * n_features);
  objects.emplace_back(make_interface(offset, remaining));

  for (size_t i = 0; i < batches; ++i) {
@@ -276,6 +298,12 @@ RandomDataGenerator::GenerateArrayInterfaceBatch(
  return {result, interface_str};
 }

+std::pair<std::vector<std::string>, std::string> RandomDataGenerator::GenerateArrayInterfaceBatch(
+    HostDeviceVector<float>* storage, size_t batches) const {
+  this->GenerateDense(storage);
+  return MakeArrayInterfaceBatch(storage, rows_, cols_, batches, device_);
+}
+
 std::string RandomDataGenerator::GenerateColumnarArrayInterface(
    std::vector<HostDeviceVector<float>> *data) const {
  CHECK(data);
@@ -400,11 +428,14 @@ int NumpyArrayIterForTest::Next() {
  return 1;
 }

-std::shared_ptr<DMatrix>
-GetDMatrixFromData(const std::vector<float> &x, int num_rows, int num_columns){
+std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float>& x, std::size_t num_rows,
+                                            bst_feature_t num_columns) {
  data::DenseAdapter adapter(x.data(), num_rows, num_columns);
-  return std::shared_ptr<DMatrix>(new data::SimpleDMatrix(
-      &adapter, std::numeric_limits<float>::quiet_NaN(), 1));
+  auto p_fmat = std::shared_ptr<DMatrix>(
+      new data::SimpleDMatrix(&adapter, std::numeric_limits<float>::quiet_NaN(), 1));
+  CHECK_EQ(p_fmat->Info().num_row_, num_rows);
+  CHECK_EQ(p_fmat->Info().num_col_, num_columns);
+  return p_fmat;
 }

 std::unique_ptr<DMatrix> CreateSparsePageDMatrix(bst_row_t n_samples, bst_feature_t n_features,
@@ -572,12 +603,23 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
  return gbm;
 }

-ArrayIterForTest::ArrayIterForTest(float sparsity, size_t rows, size_t cols,
-                                   size_t batches) : rows_{rows}, cols_{cols}, n_batches_{batches} {
+ArrayIterForTest::ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches)
+    : rows_{rows}, cols_{cols}, n_batches_{batches} {
  XGProxyDMatrixCreate(&proxy_);
  rng_.reset(new RandomDataGenerator{rows_, cols_, sparsity});
+  std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
+}
+
+ArrayIterForTest::ArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,
+                                   std::size_t n_samples, bst_feature_t n_features,
+                                   std::size_t n_batches)
+    : rows_{n_samples}, cols_{n_features}, n_batches_{n_batches} {
+  XGProxyDMatrixCreate(&proxy_);
+  this->data_.Resize(data.Size());
+  CHECK_EQ(this->data_.Size(), rows_ * cols_ * n_batches);
+  this->data_.Copy(data);
  std::tie(batches_, interface_) =
-      rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
+      MakeArrayInterfaceBatch(&data_, rows_, cols_, n_batches_, ctx->gpu_id);
 }

 ArrayIterForTest::~ArrayIterForTest() { XGDMatrixFree(proxy_); }
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@@ -59,6 +59,8 @@ void CreateSimpleTestData(const std::string& filename);
 // 0-based indexing.
 void CreateBigTestData(const std::string& filename, size_t n_entries, bool zero_based = true);

+void CreateTestCSV(std::string const& path, size_t rows, size_t cols);
+
 void CheckObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
                      std::vector<xgboost::bst_float> preds,
                      std::vector<xgboost::bst_float> labels,
@@ -188,7 +190,7 @@ class SimpleRealUniformDistribution {
 };

 template <typename T>
-Json GetArrayInterface(HostDeviceVector<T> *storage, size_t rows, size_t cols) {
+Json GetArrayInterface(HostDeviceVector<T> const* storage, size_t rows, size_t cols) {
  Json array_interface{Object()};
  array_interface["data"] = std::vector<Json>(2);
  if (storage->DeviceCanRead()) {
@@ -318,8 +320,8 @@ GenerateRandomCategoricalSingleColumn(int n, size_t num_categories) {
  return x;
 }

-std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float> &x,
-                                            int num_rows, int num_columns);
+std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float>& x, std::size_t num_rows,
+                                            bst_feature_t num_columns);

 /**
 * \brief Create Sparse Page using data iterator.
@@ -394,7 +396,7 @@ typedef void *DMatrixHandle;  // NOLINT(*);
 class ArrayIterForTest {
 protected:
  HostDeviceVector<float> data_;
-  size_t iter_ {0};
+  size_t iter_{0};
  DMatrixHandle proxy_;
  std::unique_ptr<RandomDataGenerator> rng_;

@@ -418,6 +420,11 @@ class ArrayIterForTest {
  auto Proxy() -> decltype(proxy_) { return proxy_; }

  explicit ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches);
+  /**
+   * \brief Create iterator with user provided data.
+   */
+  explicit ArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,
+                            std::size_t n_samples, bst_feature_t n_features, std::size_t n_batches);
  virtual ~ArrayIterForTest();
 };

@@ -433,6 +440,10 @@ class NumpyArrayIterForTest : public ArrayIterForTest {
 public:
  explicit NumpyArrayIterForTest(float sparsity, size_t rows = Rows(), size_t cols = Cols(),
                                 size_t batches = Batches());
+  explicit NumpyArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,
+                                 std::size_t n_samples, bst_feature_t n_features,
+                                 std::size_t n_batches)
+      : ArrayIterForTest{ctx, data, n_samples, n_features, n_batches} {}
  int Next() override;
  ~NumpyArrayIterForTest() override = default;
 };
@@ -462,7 +473,7 @@ inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint
                                int32_t device = Context::kCpuId) {
  size_t shape[1]{1};
  LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape, device},
-                           n_groups, 1, MultiStrategy::kComposite);
+                           n_groups, 1, MultiStrategy::kOneOutputPerTree);
  return mparam;
 }

--- a/tests/cpp/metric/test_rank_metric.cc
+++ b/tests/cpp/metric/test_rank_metric.cc
@@ -1,7 +1,20 @@
-// Copyright by Contributors
-#include <xgboost/metric.h>
+/**
+ * Copyright 2016-2023 by XGBoost Contributors
+ */
+#include <gtest/gtest.h>                 // for Test, EXPECT_NEAR, ASSERT_STREQ
+#include <xgboost/context.h>             // for Context
+#include <xgboost/data.h>                // for MetaInfo, DMatrix
+#include <xgboost/linalg.h>              // for Matrix
+#include <xgboost/metric.h>              // for Metric

-#include "../helpers.h"
+#include <algorithm>                     // for max
+#include <memory>                        // for unique_ptr
+#include <vector>                        // for vector
+
+#include "../helpers.h"                  // for GetMetricEval, CreateEmptyGe...
+#include "xgboost/base.h"                // for bst_float, kRtEps
+#include "xgboost/host_device_vector.h"  // for HostDeviceVector
+#include "xgboost/json.h"                // for Json, String, Object

 #if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__)
 TEST(Metric, AMS) {
@@ -51,15 +64,17 @@ TEST(Metric, DeclareUnifiedTest(Precision)) {
  delete metric;
 }

+namespace xgboost {
+namespace metric {
 TEST(Metric, DeclareUnifiedTest(NDCG)) {
-  auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
-  xgboost::Metric * metric = xgboost::Metric::Create("ndcg", &ctx);
+  auto ctx = CreateEmptyGenericParam(GPUIDX);
+  Metric * metric = xgboost::Metric::Create("ndcg", &ctx);
  ASSERT_STREQ(metric->Name(), "ndcg");
  EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}));
-  EXPECT_NEAR(GetMetricEval(metric,
+  ASSERT_NEAR(GetMetricEval(metric,
                            xgboost::HostDeviceVector<xgboost::bst_float>{},
                            {}), 1, 1e-10);
-  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
+  ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
  EXPECT_NEAR(GetMetricEval(metric,
                            {0.1f, 0.9f, 0.1f, 0.9f},
                            {  0,   0,   1,   1}),
@@ -80,7 +95,7 @@ TEST(Metric, DeclareUnifiedTest(NDCG)) {
  EXPECT_NEAR(GetMetricEval(metric,
                            xgboost::HostDeviceVector<xgboost::bst_float>{},
                            {}), 0, 1e-10);
-  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
+  ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10);
  EXPECT_NEAR(GetMetricEval(metric,
                            {0.1f, 0.9f, 0.1f, 0.9f},
                            {  0,   0,   1,   1}),
@@ -91,29 +106,30 @@ TEST(Metric, DeclareUnifiedTest(NDCG)) {
  EXPECT_NEAR(GetMetricEval(metric,
                            xgboost::HostDeviceVector<xgboost::bst_float>{},
                            {}), 0, 1e-10);
-  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
+  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10);
  EXPECT_NEAR(GetMetricEval(metric,
                            {0.1f, 0.9f, 0.1f, 0.9f},
                            {  0,   0,   1,   1}),
-              0.6509f, 0.001f);
+               0.6509f, 0.001f);

  delete metric;
  metric = xgboost::Metric::Create("ndcg@2-", &ctx);
  ASSERT_STREQ(metric->Name(), "ndcg@2-");
-  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
+  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10);
  EXPECT_NEAR(GetMetricEval(metric,
                            {0.1f, 0.9f, 0.1f, 0.9f},
                            {  0,   0,   1,   1}),
-              0.3868f, 0.001f);
+              1.f - 0.3868f, 1.f - 0.001f);

  delete metric;
 }

 TEST(Metric, DeclareUnifiedTest(MAP)) {
  auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
-  xgboost::Metric * metric = xgboost::Metric::Create("map", &ctx);
+  Metric * metric = xgboost::Metric::Create("map", &ctx);
  ASSERT_STREQ(metric->Name(), "map");
-  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
+  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, kRtEps);
+
  EXPECT_NEAR(GetMetricEval(metric,
                            {0.1f, 0.9f, 0.1f, 0.9f},
                            {  0,   0,   1,   1}),
@@ -125,7 +141,7 @@ TEST(Metric, DeclareUnifiedTest(MAP)) {
  // Rank metric with group info
  EXPECT_NEAR(GetMetricEval(metric,
                            {0.1f, 0.9f, 0.2f, 0.8f, 0.4f, 1.7f},
-                            {2, 7, 1, 0, 5, 0},  // Labels
+                            {1, 1, 1, 0, 1, 0},  // Labels
                            {},  // Weights
                            {0, 2, 5, 6}),  // Group info
              0.8611f, 0.001f);
@@ -154,3 +170,39 @@ TEST(Metric, DeclareUnifiedTest(MAP)) {
              0.25f, 0.001f);
  delete metric;
 }
+
+TEST(Metric, DeclareUnifiedTest(NDCGExpGain)) {
+  Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
+
+  auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix();
+  MetaInfo& info = p_fmat->Info();
+  info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.gpu_id};
+  info.num_row_ = info.labels.Shape(0);
+  info.group_ptr_.resize(2);
+  info.group_ptr_[0] = 0;
+  info.group_ptr_[1] = info.num_row_;
+  HostDeviceVector<float> predt{{0.1f, 0.2f, 0.3f, 4.0f, 70.0f}};
+
+  std::unique_ptr<Metric> metric{Metric::Create("ndcg", &ctx)};
+  Json config{Object{}};
+  config["name"] = String{"ndcg"};
+  config["lambdarank_param"] = Object{};
+  config["lambdarank_param"]["ndcg_exp_gain"] = String{"true"};
+  config["lambdarank_param"]["lambdarank_num_pair_per_sample"] = String{"32"};
+  metric->LoadConfig(config);
+
+  auto ndcg = metric->Evaluate(predt, p_fmat);
+  ASSERT_NEAR(ndcg, 0.409738f, kRtEps);
+
+  config["lambdarank_param"]["ndcg_exp_gain"] = String{"false"};
+  metric->LoadConfig(config);
+
+  ndcg = metric->Evaluate(predt, p_fmat);
+  ASSERT_NEAR(ndcg, 0.695694f, kRtEps);
+
+  predt.HostVector() = info.labels.Data()->HostVector();
+  ndcg = metric->Evaluate(predt, p_fmat);
+  ASSERT_NEAR(ndcg, 1.0, kRtEps);
+}
+}  // namespace metric
+}  // namespace xgboost
--- a/tests/cpp/plugin/helpers.cc
+++ b/tests/cpp/plugin/helpers.cc
@@ -1,19 +0,0 @@
-#include <chrono>
-#include <thread>
-#include <random>
-#include <cstdint>
-
-#include "helpers.h"
-
-using namespace std::chrono_literals;
-
-int GenerateRandomPort(int low, int high) {
-  // Ensure unique timestamp by introducing a small artificial delay
-  std::this_thread::sleep_for(100ms);
-  auto timestamp = static_cast<uint64_t>(std::chrono::duration_cast<std::chrono::milliseconds>(
-    std::chrono::system_clock::now().time_since_epoch()).count());
-  std::mt19937_64 rng(timestamp);
-  std::uniform_int_distribution<int> dist(low, high);
-  int port = dist(rng);
-  return port;
-}
--- a/tests/cpp/plugin/helpers.h
+++ b/tests/cpp/plugin/helpers.h
@@ -1,10 +1,69 @@
 /*!
- * Copyright 2022 XGBoost contributors
+ * Copyright 2022-2023 XGBoost contributors
 */
+#pragma once

-#ifndef XGBOOST_TESTS_CPP_PLUGIN_HELPERS_H_
-#define XGBOOST_TESTS_CPP_PLUGIN_HELPERS_H_
+#include <grpcpp/server_builder.h>
+#include <gtest/gtest.h>
+#include <xgboost/json.h>

-int GenerateRandomPort(int low, int high);
+#include <random>

-#endif  // XGBOOST_TESTS_CPP_PLUGIN_HELPERS_H_
+#include "../../../plugin/federated/federated_server.h"
+#include "../../../src/collective/communicator-inl.h"
+
+inline int GenerateRandomPort(int low, int high) {
+  using namespace std::chrono_literals;
+  // Ensure unique timestamp by introducing a small artificial delay
+  std::this_thread::sleep_for(100ms);
+  auto timestamp = static_cast<uint64_t>(std::chrono::duration_cast<std::chrono::milliseconds>(
+                                             std::chrono::system_clock::now().time_since_epoch())
+                                             .count());
+  std::mt19937_64 rng(timestamp);
+  std::uniform_int_distribution<int> dist(low, high);
+  int port = dist(rng);
+  return port;
+}
+
+inline std::string GetServerAddress() {
+  int port = GenerateRandomPort(50000, 60000);
+  std::string address = std::string("localhost:") + std::to_string(port);
+  return address;
+}
+
+namespace xgboost {
+
+class BaseFederatedTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    server_address_ = GetServerAddress();
+    server_thread_.reset(new std::thread([this] {
+      grpc::ServerBuilder builder;
+      xgboost::federated::FederatedService service{kWorldSize};
+      builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
+      builder.RegisterService(&service);
+      server_ = builder.BuildAndStart();
+      server_->Wait();
+    }));
+  }
+
+  void TearDown() override {
+    server_->Shutdown();
+    server_thread_->join();
+  }
+
+  void InitCommunicator(int rank) {
+    Json config{JsonObject()};
+    config["xgboost_communicator"] = String("federated");
+    config["federated_server_address"] = String(server_address_);
+    config["federated_world_size"] = kWorldSize;
+    config["federated_rank"] = rank;
+    xgboost::collective::Init(config);
+  }
+
+  static int const kWorldSize{3};
+  std::string server_address_;
+  std::unique_ptr<std::thread> server_thread_;
+  std::unique_ptr<grpc::Server> server_;
+};
+}  // namespace xgboost
--- a/tests/cpp/plugin/test_federated_adapter.cu
+++ b/tests/cpp/plugin/test_federated_adapter.cu
@@ -1,56 +1,20 @@
 /*!
 * Copyright 2022 XGBoost contributors
 */
-#include <grpcpp/server_builder.h>
 #include <gtest/gtest.h>
 #include <thrust/host_vector.h>

+#include <ctime>
 #include <iostream>
 #include <thread>
-#include <ctime>

-#include "./helpers.h"
 #include "../../../plugin/federated/federated_communicator.h"
-#include "../../../plugin/federated/federated_server.h"
 #include "../../../src/collective/device_communicator_adapter.cuh"
+#include "./helpers.h"

-namespace {
+namespace xgboost::collective {

-std::string GetServerAddress() {
-  int port = GenerateRandomPort(50000, 60000);
-  std::string address = std::string("localhost:") + std::to_string(port);
-  return address;
-}
-
-}  // anonymous namespace
-
-namespace xgboost {
-namespace collective {
-
-class FederatedAdapterTest : public ::testing::Test {
- protected:
-  void SetUp() override {
-    server_address_ = GetServerAddress();
-    server_thread_.reset(new std::thread([this] {
-      grpc::ServerBuilder builder;
-      federated::FederatedService service{kWorldSize};
-      builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
-      builder.RegisterService(&service);
-      server_ = builder.BuildAndStart();
-      server_->Wait();
-    }));
-  }
-
-  void TearDown() override {
-    server_->Shutdown();
-    server_thread_->join();
-  }
-
-  static int const kWorldSize{2};
-  std::string server_address_;
-  std::unique_ptr<std::thread> server_thread_;
-  std::unique_ptr<grpc::Server> server_;
-};
+class FederatedAdapterTest : public BaseFederatedTest {};

 TEST(FederatedAdapterSimpleTest, ThrowOnInvalidDeviceOrdinal) {
  auto construct = []() { DeviceCommunicatorAdapter adapter{-1, nullptr}; };
@@ -65,20 +29,20 @@ TEST(FederatedAdapterSimpleTest, ThrowOnInvalidCommunicator) {
 TEST_F(FederatedAdapterTest, DeviceAllReduceSum) {
  std::vector<std::thread> threads;
  for (auto rank = 0; rank < kWorldSize; rank++) {
-    threads.emplace_back(std::thread([rank, server_address=server_address_] {
+    threads.emplace_back([rank, server_address = server_address_] {
      FederatedCommunicator comm{kWorldSize, rank, server_address};
      // Assign device 0 to all workers, since we run gtest in a single-GPU machine
      DeviceCommunicatorAdapter adapter{0, &comm};
-      int const count = 3;
+      int count = 3;
      thrust::device_vector<double> buffer(count, 0);
      thrust::sequence(buffer.begin(), buffer.end());
      adapter.AllReduceSum(buffer.data().get(), count);
      thrust::host_vector<double> host_buffer = buffer;
      EXPECT_EQ(host_buffer.size(), count);
      for (auto i = 0; i < count; i++) {
-        EXPECT_EQ(host_buffer[i], i * 2);
+        EXPECT_EQ(host_buffer[i], i * kWorldSize);
      }
-    }));
+    });
  }
  for (auto& thread : threads) {
    thread.join();
@@ -88,7 +52,7 @@ TEST_F(FederatedAdapterTest, DeviceAllReduceSum) {
 TEST_F(FederatedAdapterTest, DeviceAllGatherV) {
  std::vector<std::thread> threads;
  for (auto rank = 0; rank < kWorldSize; rank++) {
-    threads.emplace_back(std::thread([rank, server_address=server_address_] {
+    threads.emplace_back([rank, server_address = server_address_] {
      FederatedCommunicator comm{kWorldSize, rank, server_address};
      // Assign device 0 to all workers, since we run gtest in a single-GPU machine
      DeviceCommunicatorAdapter adapter{0, &comm};
@@ -104,17 +68,16 @@ TEST_F(FederatedAdapterTest, DeviceAllGatherV) {
      EXPECT_EQ(segments[0], 2);
      EXPECT_EQ(segments[1], 3);
      thrust::host_vector<char> host_buffer = receive_buffer;
-      EXPECT_EQ(host_buffer.size(), 5);
-      int expected[] = {0, 1, 0, 1, 2};
-      for (auto i = 0; i < 5; i++) {
+      EXPECT_EQ(host_buffer.size(), 9);
+      int expected[] = {0, 1, 0, 1, 2, 0, 1, 2, 3};
+      for (auto i = 0; i < 9; i++) {
        EXPECT_EQ(host_buffer[i], expected[i]);
      }
-    }));
+    });
  }
  for (auto& thread : threads) {
    thread.join();
  }
 }

-}  // namespace collective
-}  // namespace xgboost
+}  // namespace xgboost::collective
--- a/tests/cpp/plugin/test_federated_communicator.cc
+++ b/tests/cpp/plugin/test_federated_communicator.cc
@@ -2,65 +2,34 @@
 * Copyright 2022 XGBoost contributors
 */
 #include <dmlc/parameter.h>
-#include <grpcpp/server_builder.h>
 #include <gtest/gtest.h>

 #include <iostream>
 #include <thread>
-#include <ctime>

-#include "helpers.h"
 #include "../../../plugin/federated/federated_communicator.h"
-#include "../../../plugin/federated/federated_server.h"
+#include "helpers.h"

-namespace {
+namespace xgboost::collective {

-std::string GetServerAddress() {
-  int port = GenerateRandomPort(50000, 60000);
-  std::string address = std::string("localhost:") + std::to_string(port);
-  return address;
-}
-
-}  // anonymous namespace
-
-namespace xgboost {
-namespace collective {
-
-class FederatedCommunicatorTest : public ::testing::Test {
+class FederatedCommunicatorTest : public BaseFederatedTest {
 public:
-  static void VerifyAllgather(int rank, const std::string& server_address) {
+  static void VerifyAllgather(int rank, const std::string &server_address) {
    FederatedCommunicator comm{kWorldSize, rank, server_address};
    CheckAllgather(comm, rank);
  }

-  static void VerifyAllreduce(int rank, const std::string& server_address) {
+  static void VerifyAllreduce(int rank, const std::string &server_address) {
    FederatedCommunicator comm{kWorldSize, rank, server_address};
    CheckAllreduce(comm);
  }

-  static void VerifyBroadcast(int rank, const std::string& server_address) {
+  static void VerifyBroadcast(int rank, const std::string &server_address) {
    FederatedCommunicator comm{kWorldSize, rank, server_address};
    CheckBroadcast(comm, rank);
  }

 protected:
-  void SetUp() override {
-    server_address_ = GetServerAddress();
-    server_thread_.reset(new std::thread([this] {
-      grpc::ServerBuilder builder;
-      federated::FederatedService service{kWorldSize};
-      builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
-      builder.RegisterService(&service);
-      server_ = builder.BuildAndStart();
-      server_->Wait();
-    }));
-  }
-
-  void TearDown() override {
-    server_->Shutdown();
-    server_thread_->join();
-  }
-
  static void CheckAllgather(FederatedCommunicator &comm, int rank) {
    int buffer[kWorldSize] = {0, 0, 0};
    buffer[rank] = rank;
@@ -90,11 +59,6 @@ class FederatedCommunicatorTest : public ::testing::Test {
      EXPECT_EQ(buffer, "hello");
    }
  }
-
-  static int const kWorldSize{3};
-  std::string server_address_;
-  std::unique_ptr<std::thread> server_thread_;
-  std::unique_ptr<grpc::Server> server_;
 };

 TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeTooSmall) {
@@ -161,8 +125,7 @@ TEST(FederatedCommunicatorSimpleTest, IsDistributed) {
 TEST_F(FederatedCommunicatorTest, Allgather) {
  std::vector<std::thread> threads;
  for (auto rank = 0; rank < kWorldSize; rank++) {
-    threads.emplace_back(
-        std::thread(&FederatedCommunicatorTest::VerifyAllgather, rank, server_address_));
+    threads.emplace_back(&FederatedCommunicatorTest::VerifyAllgather, rank, server_address_);
  }
  for (auto &thread : threads) {
    thread.join();
@@ -172,8 +135,7 @@ TEST_F(FederatedCommunicatorTest, Allgather) {
 TEST_F(FederatedCommunicatorTest, Allreduce) {
  std::vector<std::thread> threads;
  for (auto rank = 0; rank < kWorldSize; rank++) {
-    threads.emplace_back(
-        std::thread(&FederatedCommunicatorTest::VerifyAllreduce, rank, server_address_));
+    threads.emplace_back(&FederatedCommunicatorTest::VerifyAllreduce, rank, server_address_);
  }
  for (auto &thread : threads) {
    thread.join();
@@ -183,12 +145,10 @@ TEST_F(FederatedCommunicatorTest, Allreduce) {
 TEST_F(FederatedCommunicatorTest, Broadcast) {
  std::vector<std::thread> threads;
  for (auto rank = 0; rank < kWorldSize; rank++) {
-    threads.emplace_back(
-        std::thread(&FederatedCommunicatorTest::VerifyBroadcast, rank, server_address_));
+    threads.emplace_back(&FederatedCommunicatorTest::VerifyBroadcast, rank, server_address_);
  }
  for (auto &thread : threads) {
    thread.join();
  }
 }
-}  // namespace collective
-}  // namespace xgboost
+}  // namespace xgboost::collective
--- a/tests/cpp/plugin/test_federated_data.cc
+++ b/tests/cpp/plugin/test_federated_data.cc
@@ -0,0 +1,65 @@
+/*!
+ * Copyright 2023 XGBoost contributors
+ */
+#include <dmlc/parameter.h>
+#include <gtest/gtest.h>
+#include <xgboost/data.h>
+
+#include <fstream>
+#include <iostream>
+#include <thread>
+
+#include "../../../plugin/federated/federated_server.h"
+#include "../../../src/collective/communicator-inl.h"
+#include "../filesystem.h"
+#include "../helpers.h"
+#include "helpers.h"
+
+namespace xgboost {
+
+class FederatedDataTest : public BaseFederatedTest {
+ public:
+  void VerifyLoadUri(int rank) {
+    InitCommunicator(rank);
+
+    size_t constexpr kRows{16};
+    size_t const kCols = 8 + rank;
+
+    dmlc::TemporaryDirectory tmpdir;
+    std::string path = tmpdir.path + "/small" + std::to_string(rank) + ".csv";
+    CreateTestCSV(path, kRows, kCols);
+
+    std::unique_ptr<DMatrix> dmat;
+    std::string uri = path + "?format=csv";
+    dmat.reset(DMatrix::Load(uri, false, DataSplitMode::kCol));
+
+    ASSERT_EQ(dmat->Info().num_col_, 8 * kWorldSize + 3);
+    ASSERT_EQ(dmat->Info().num_row_, kRows);
+
+    for (auto const& page : dmat->GetBatches<SparsePage>()) {
+      auto entries = page.GetView().data;
+      auto index = 0;
+      int offsets[] = {0, 8, 17};
+      int offset = offsets[rank];
+      for (auto row = 0; row < kRows; row++) {
+        for (auto col = 0; col < kCols; col++) {
+          EXPECT_EQ(entries[index].index, col + offset);
+          index++;
+        }
+      }
+    }
+
+    xgboost::collective::Finalize();
+  }
+};
+
+TEST_F(FederatedDataTest, LoadUri) {
+  std::vector<std::thread> threads;
+  for (auto rank = 0; rank < kWorldSize; rank++) {
+    threads.emplace_back(&FederatedDataTest_LoadUri_Test::VerifyLoadUri, this, rank);
+  }
+  for (auto& thread : threads) {
+    thread.join();
+  }
+}
+}  // namespace xgboost
--- a/tests/cpp/plugin/test_federated_server.cc
+++ b/tests/cpp/plugin/test_federated_server.cc
@@ -1,30 +1,17 @@
 /*!
 * Copyright 2017-2020 XGBoost contributors
 */
-#include <grpcpp/server_builder.h>
 #include <gtest/gtest.h>

-#include <ctime>
 #include <iostream>
 #include <thread>

 #include "federated_client.h"
-#include "federated_server.h"
 #include "helpers.h"

-namespace {
-
-std::string GetServerAddress() {
-  int port = GenerateRandomPort(50000, 60000);
-  std::string address = std::string("localhost:") + std::to_string(port);
-  return address;
-}
-
-}  // anonymous namespace
-
 namespace xgboost {

-class FederatedServerTest : public ::testing::Test {
+class FederatedServerTest : public BaseFederatedTest {
 public:
  static void VerifyAllgather(int rank, const std::string& server_address) {
    federated::FederatedClient client{server_address, rank};
@@ -51,23 +38,6 @@ class FederatedServerTest : public ::testing::Test {
  }

 protected:
-  void SetUp() override {
-    server_address_ = GetServerAddress();
-    server_thread_.reset(new std::thread([this] {
-      grpc::ServerBuilder builder;
-      federated::FederatedService service{kWorldSize};
-      builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
-      builder.RegisterService(&service);
-      server_ = builder.BuildAndStart();
-      server_->Wait();
-    }));
-  }
-
-  void TearDown() override {
-    server_->Shutdown();
-    server_thread_->join();
-  }
-
  static void CheckAllgather(federated::FederatedClient& client, int rank) {
    int data[kWorldSize] = {0, 0, 0};
    data[rank] = rank;
@@ -98,17 +68,12 @@ class FederatedServerTest : public ::testing::Test {
    auto reply = client.Broadcast(send_buffer, 0);
    EXPECT_EQ(reply, "hello broadcast") << "rank " << rank;
  }
-
-  static int const kWorldSize{3};
-  std::string server_address_;
-  std::unique_ptr<std::thread> server_thread_;
-  std::unique_ptr<grpc::Server> server_;
 };

 TEST_F(FederatedServerTest, Allgather) {
  std::vector<std::thread> threads;
  for (auto rank = 0; rank < kWorldSize; rank++) {
-    threads.emplace_back(std::thread(&FederatedServerTest::VerifyAllgather, rank, server_address_));
+    threads.emplace_back(&FederatedServerTest::VerifyAllgather, rank, server_address_);
  }
  for (auto& thread : threads) {
    thread.join();
@@ -118,7 +83,7 @@ TEST_F(FederatedServerTest, Allgather) {
 TEST_F(FederatedServerTest, Allreduce) {
  std::vector<std::thread> threads;
  for (auto rank = 0; rank < kWorldSize; rank++) {
-    threads.emplace_back(std::thread(&FederatedServerTest::VerifyAllreduce, rank, server_address_));
+    threads.emplace_back(&FederatedServerTest::VerifyAllreduce, rank, server_address_);
  }
  for (auto& thread : threads) {
    thread.join();
@@ -128,7 +93,7 @@ TEST_F(FederatedServerTest, Allreduce) {
 TEST_F(FederatedServerTest, Broadcast) {
  std::vector<std::thread> threads;
  for (auto rank = 0; rank < kWorldSize; rank++) {
-    threads.emplace_back(std::thread(&FederatedServerTest::VerifyBroadcast, rank, server_address_));
+    threads.emplace_back(&FederatedServerTest::VerifyBroadcast, rank, server_address_);
  }
  for (auto& thread : threads) {
    thread.join();
@@ -138,7 +103,7 @@ TEST_F(FederatedServerTest, Broadcast) {
 TEST_F(FederatedServerTest, Mixture) {
  std::vector<std::thread> threads;
  for (auto rank = 0; rank < kWorldSize; rank++) {
-    threads.emplace_back(std::thread(&FederatedServerTest::VerifyMixture, rank, server_address_));
+    threads.emplace_back(&FederatedServerTest::VerifyMixture, rank, server_address_);
  }
  for (auto& thread : threads) {
    thread.join();
--- a/tests/cpp/predictor/test_cpu_predictor.cc
+++ b/tests/cpp/predictor/test_cpu_predictor.cc
@@ -305,4 +305,10 @@ TEST(CpuPredictor, Sparse) {
  TestSparsePrediction(0.2, "cpu_predictor");
  TestSparsePrediction(0.8, "cpu_predictor");
 }
+
+TEST(CpuPredictor, Multi) {
+  Context ctx;
+  ctx.nthread = 1;
+  TestVectorLeafPrediction(&ctx);
+}
 }  // namespace xgboost
--- a/tests/cpp/predictor/test_predictor.cc
+++ b/tests/cpp/predictor/test_predictor.cc
@@ -1,28 +1,34 @@
-/*!
- * Copyright 2020-2021 by Contributors
+/**
+ * Copyright 2020-2023 by XGBoost Contributors
 */
-
 #include "test_predictor.h"

 #include <gtest/gtest.h>
-#include <xgboost/context.h>
-#include <xgboost/data.h>
-#include <xgboost/host_device_vector.h>
-#include <xgboost/predictor.h>
+#include <xgboost/context.h>                      // for Context
+#include <xgboost/data.h>                         // for DMatrix, BatchIterator, BatchSet, MetaInfo
+#include <xgboost/host_device_vector.h>           // for HostDeviceVector
+#include <xgboost/predictor.h>                    // for PredictionCacheEntry, Predictor, Predic...

-#include "../../../src/common/bitfield.h"
-#include "../../../src/common/categorical.h"
-#include "../../../src/common/io.h"
-#include "../../../src/data/adapter.h"
-#include "../../../src/data/proxy_dmatrix.h"
-#include "../helpers.h"
+#include <algorithm>                              // for max
+#include <limits>                                 // for numeric_limits
+#include <unordered_map>                          // for unordered_map
+
+#include "../../../src/common/bitfield.h"         // for LBitField32
+#include "../../../src/data/iterative_dmatrix.h"  // for IterativeDMatrix
+#include "../../../src/data/proxy_dmatrix.h"      // for DMatrixProxy
+#include "../helpers.h"                           // for GetDMatrixFromData, RandomDataGenerator
+#include "xgboost/json.h"                         // for Json, Object, get, String
+#include "xgboost/linalg.h"                       // for MakeVec, Tensor, TensorView, Vector
+#include "xgboost/logging.h"                      // for CHECK
+#include "xgboost/span.h"                         // for operator!=, SpanIterator, Span
+#include "xgboost/tree_model.h"                   // for RegTree

 namespace xgboost {
 TEST(Predictor, PredictionCache) {
  size_t constexpr kRows = 16, kCols = 4;

  PredictionContainer container;
-  DMatrix* m;
+  DMatrix *m;
  // Add a cache that is immediately expired.
  auto add_cache = [&]() {
    auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
@@ -412,4 +418,101 @@ void TestSparsePrediction(float sparsity, std::string predictor) {
    }
  }
 }
+
+void TestVectorLeafPrediction(Context const *ctx) {
+  std::unique_ptr<Predictor> cpu_predictor =
+      std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", ctx));
+
+  size_t constexpr kRows = 5;
+  size_t constexpr kCols = 5;
+
+  LearnerModelParam mparam{static_cast<bst_feature_t>(kCols),
+                           linalg::Vector<float>{{0.5}, {1}, Context::kCpuId}, 1, 3,
+                           MultiStrategy::kMultiOutputTree};
+
+  std::vector<std::unique_ptr<RegTree>> trees;
+  trees.emplace_back(new RegTree{mparam.LeafLength(), mparam.num_feature});
+
+  std::vector<float> p_w(mparam.LeafLength(), 0.0f);
+  std::vector<float> l_w(mparam.LeafLength(), 1.0f);
+  std::vector<float> r_w(mparam.LeafLength(), 2.0f);
+
+  auto &tree = trees.front();
+  tree->ExpandNode(0, static_cast<bst_feature_t>(1), 2.0, true,
+                   linalg::MakeVec(p_w.data(), p_w.size()), linalg::MakeVec(l_w.data(), l_w.size()),
+                   linalg::MakeVec(r_w.data(), r_w.size()));
+  ASSERT_TRUE(tree->IsMultiTarget());
+  ASSERT_TRUE(mparam.IsVectorLeaf());
+
+  gbm::GBTreeModel model{&mparam, ctx};
+  model.CommitModel(std::move(trees), 0);
+
+  auto run_test = [&](float expected, HostDeviceVector<float> *p_data) {
+    {
+      auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);
+      PredictionCacheEntry predt_cache;
+      cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
+      ASSERT_EQ(predt_cache.predictions.Size(), kRows * mparam.LeafLength());
+      cpu_predictor->PredictBatch(p_fmat.get(), &predt_cache, model, 0, 1);
+      auto const &h_predt = predt_cache.predictions.HostVector();
+      for (auto v : h_predt) {
+        ASSERT_EQ(v, expected);
+      }
+    }
+
+    {
+      // inplace
+      PredictionCacheEntry predt_cache;
+      auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);
+      cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
+      auto arr = GetArrayInterface(p_data, kRows, kCols);
+      std::string str;
+      Json::Dump(arr, &str);
+      auto proxy = std::shared_ptr<DMatrix>(new data::DMatrixProxy{});
+      dynamic_cast<data::DMatrixProxy *>(proxy.get())->SetArrayData(str.data());
+      cpu_predictor->InplacePredict(proxy, model, std::numeric_limits<float>::quiet_NaN(),
+                                    &predt_cache, 0, 1);
+      auto const &h_predt = predt_cache.predictions.HostVector();
+      for (auto v : h_predt) {
+        ASSERT_EQ(v, expected);
+      }
+    }
+
+    {
+      // ghist
+      PredictionCacheEntry predt_cache;
+      auto &h_data = p_data->HostVector();
+      // give it at least two bins, otherwise the histogram cuts only have min and max values.
+      for (std::size_t i = 0; i < 5; ++i) {
+        h_data[i] = 1.0;
+      }
+      auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);
+
+      cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
+
+      auto iter = NumpyArrayIterForTest{ctx, *p_data, kRows, static_cast<bst_feature_t>(kCols),
+                                        static_cast<std::size_t>(1)};
+      p_fmat =
+          std::make_shared<data::IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,
+                                                   std::numeric_limits<float>::quiet_NaN(), 0, 256);
+
+      cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
+      cpu_predictor->PredictBatch(p_fmat.get(), &predt_cache, model, 0, 1);
+      auto const &h_predt = predt_cache.predictions.HostVector();
+      // the smallest v uses the min_value from histogram cuts, which leads to a left leaf
+      // during prediction.
+      for (std::size_t i = 5; i < h_predt.size(); ++i) {
+        ASSERT_EQ(h_predt[i], expected) << i;
+      }
+    }
+  };
+
+  // go to right
+  HostDeviceVector<float> data(kRows * kCols, model.trees.front()->SplitCond(RegTree::kRoot) + 1.0);
+  run_test(2.5, &data);
+
+  // go to left
+  data.HostVector().assign(data.Size(), model.trees.front()->SplitCond(RegTree::kRoot) - 1.0);
+  run_test(1.5, &data);
+}
 }  // namespace xgboost
--- a/tests/cpp/predictor/test_predictor.h
+++ b/tests/cpp/predictor/test_predictor.h
@@ -1,9 +1,16 @@
+/**
+ * Copyright 2020-2023 by XGBoost Contributors
+ */
 #ifndef XGBOOST_TEST_PREDICTOR_H_
 #define XGBOOST_TEST_PREDICTOR_H_

+#include <xgboost/context.h>  // for Context
 #include <xgboost/predictor.h>
-#include <string>
+
 #include <cstddef>
+#include <string>
+
+#include "../../../src/gbm/gbtree_model.h"  // for GBTreeModel
 #include "../helpers.h"

 namespace xgboost {
@@ -48,7 +55,7 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
    PredictionCacheEntry precise_out_predictions;
    predictor->InitOutPredictions(p_dmat->Info(), &precise_out_predictions.predictions, model);
    predictor->PredictBatch(p_dmat.get(), &precise_out_predictions, model, 0);
-    ASSERT_FALSE(p_dmat->PageExists<Page>());
+    CHECK(!p_dmat->PageExists<Page>());
  }
 }

@@ -69,6 +76,8 @@ void TestCategoricalPredictLeaf(StringView name);
 void TestIterationRange(std::string name);

 void TestSparsePrediction(float sparsity, std::string predictor);
+
+void TestVectorLeafPrediction(Context const* ctx);
 }  // namespace xgboost

 #endif  // XGBOOST_TEST_PREDICTOR_H_
--- a/tests/cpp/test_multi_target.cc
+++ b/tests/cpp/test_multi_target.cc
@@ -124,11 +124,11 @@ TEST(MultiStrategy, Configure) {
  auto p_fmat = RandomDataGenerator{12ul, 3ul, 0.0}.GenerateDMatrix();
  p_fmat->Info().labels.Reshape(p_fmat->Info().num_row_, 2);
  std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
-  learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "2"}});
+  learner->SetParams(Args{{"multi_strategy", "multi_output_tree"}, {"num_target", "2"}});
  learner->Configure();
  ASSERT_EQ(learner->Groups(), 2);

-  learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "0"}});
+  learner->SetParams(Args{{"multi_strategy", "multi_output_tree"}, {"num_target", "0"}});
  ASSERT_THROW({ learner->Configure(); }, dmlc::Error);
 }
 }  // namespace xgboost
--- a/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu
+++ b/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu
@@ -304,7 +304,7 @@ void TestEvaluateSingleSplit(bool is_categorical) {
  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};

  // Setup gradients so that second feature gets higher gain
-  auto feature_histogram = ConvertToInteger({          {-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
+  auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});

  dh::device_vector<FeatureType> feature_types(feature_set.size(),
                                               FeatureType::kCategorical);
--- a/tests/cpp/tree/hist/test_evaluate_splits.cc
+++ b/tests/cpp/tree/hist/test_evaluate_splits.cc
@@ -1,18 +1,27 @@
 /**
 * Copyright 2021-2023 by XGBoost Contributors
 */
-#include <gtest/gtest.h>
-#include <xgboost/base.h>
-
-#include "../../../../src/common/hist_util.h"
-#include "../../../../src/tree/common_row_partitioner.h"
-#include "../../../../src/tree/hist/evaluate_splits.h"
 #include "../test_evaluate_splits.h"
-#include "../../helpers.h"
-#include "xgboost/context.h"  // Context

-namespace xgboost {
-namespace tree {
+#include <gtest/gtest.h>
+#include <xgboost/base.h>                               // for GradientPairPrecise, Args, Gradie...
+#include <xgboost/context.h>                            // for Context
+#include <xgboost/data.h>                               // for FeatureType, DMatrix, MetaInfo
+#include <xgboost/logging.h>                            // for CHECK_EQ
+#include <xgboost/tree_model.h>                         // for RegTree, RTreeNodeStat
+
+#include <memory>                                       // for make_shared, shared_ptr, addressof
+
+#include "../../../../src/common/hist_util.h"           // for HistCollection, HistogramCuts
+#include "../../../../src/common/random.h"              // for ColumnSampler
+#include "../../../../src/common/row_set.h"             // for RowSetCollection
+#include "../../../../src/data/gradient_index.h"        // for GHistIndexMatrix
+#include "../../../../src/tree/hist/evaluate_splits.h"  // for HistEvaluator
+#include "../../../../src/tree/hist/expand_entry.h"     // for CPUExpandEntry
+#include "../../../../src/tree/param.h"                 // for GradStats, TrainParam
+#include "../../helpers.h"                              // for RandomDataGenerator, AllThreadsFo...
+
+namespace xgboost::tree {
 void TestEvaluateSplits(bool force_read_by_column) {
  Context ctx;
  ctx.nthread = 4;
@@ -87,6 +96,68 @@ TEST(HistEvaluator, Evaluate) {
  TestEvaluateSplits(true);
 }

+TEST(HistMultiEvaluator, Evaluate) {
+  Context ctx;
+  ctx.nthread = 1;
+
+  TrainParam param;
+  param.Init(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
+  auto sampler = std::make_shared<common::ColumnSampler>();
+
+  std::size_t n_samples = 3;
+  bst_feature_t n_features = 2;
+  bst_target_t n_targets = 2;
+  bst_bin_t n_bins = 2;
+
+  auto p_fmat =
+      RandomDataGenerator{n_samples, n_features, 0.5}.Targets(n_targets).GenerateDMatrix(true);
+
+  HistMultiEvaluator evaluator{&ctx, p_fmat->Info(), &param, sampler};
+  std::vector<common::HistCollection> histogram(n_targets);
+  linalg::Vector<GradientPairPrecise> root_sum({2}, Context::kCpuId);
+  for (bst_target_t t{0}; t < n_targets; ++t) {
+    auto &hist = histogram[t];
+    hist.Init(n_bins * n_features);
+    hist.AddHistRow(0);
+    hist.AllocateAllData();
+    auto node_hist = hist[0];
+    node_hist[0] = {-0.5, 0.5};
+    node_hist[1] = {2.0, 0.5};
+    node_hist[2] = {0.5, 0.5};
+    node_hist[3] = {1.0, 0.5};
+
+    root_sum(t) += node_hist[0];
+    root_sum(t) += node_hist[1];
+  }
+
+  RegTree tree{n_targets, n_features};
+  auto weight = evaluator.InitRoot(root_sum.HostView());
+  tree.SetLeaf(RegTree::kRoot, weight.HostView());
+  auto w = weight.HostView();
+  ASSERT_EQ(w.Size(), n_targets);
+  ASSERT_EQ(w(0), -1.5);
+  ASSERT_EQ(w(1), -1.5);
+
+  common::HistogramCuts cuts;
+  cuts.cut_ptrs_ = {0, 2, 4};
+  cuts.cut_values_ = {0.5, 1.0, 2.0, 3.0};
+  cuts.min_vals_ = {-0.2, 1.8};
+
+  std::vector<MultiExpandEntry> entries(1, {/*nidx=*/0, /*depth=*/0});
+
+  std::vector<common::HistCollection const *> ptrs;
+  std::transform(histogram.cbegin(), histogram.cend(), std::back_inserter(ptrs),
+                 [](auto const &h) { return std::addressof(h); });
+
+  evaluator.EvaluateSplits(tree, ptrs, cuts, &entries);
+
+  ASSERT_EQ(entries.front().split.loss_chg, 12.5);
+  ASSERT_EQ(entries.front().split.split_value, 0.5);
+  ASSERT_EQ(entries.front().split.SplitIndex(), 0);
+
+  ASSERT_EQ(sampler->GetFeatureSet(0)->Size(), n_features);
+}
+
 TEST(HistEvaluator, Apply) {
  Context ctx;
  ctx.nthread = 4;
@@ -98,7 +169,8 @@ TEST(HistEvaluator, Apply) {
  auto sampler = std::make_shared<common::ColumnSampler>();
  auto evaluator_ = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};

-  CPUExpandEntry entry{0, 0, 10.0f};
+  CPUExpandEntry entry{0, 0};
+  entry.split.loss_chg = 10.0f;
  entry.split.left_sum = GradStats{0.4, 0.6f};
  entry.split.right_sum = GradStats{0.5, 0.5f};

@@ -210,12 +282,11 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
  std::vector<CPUExpandEntry> entries(1);
  RegTree tree;
  evaluator.EvaluateSplits(hist, cuts_, info.feature_types.ConstHostSpan(), tree, &entries);
-  auto const& split = entries.front().split;
+  auto const &split = entries.front().split;

  this->CheckResult(split.loss_chg, split.SplitIndex(), split.split_value, split.is_cat,
                    split.DefaultLeft(),
                    GradientPairPrecise{split.left_sum.GetGrad(), split.left_sum.GetHess()},
                    GradientPairPrecise{split.right_sum.GetGrad(), split.right_sum.GetHess()});
 }
-}  // namespace tree
-}  // namespace xgboost
+}  // namespace xgboost::tree
--- a/tests/cpp/tree/hist/test_histogram.cc
+++ b/tests/cpp/tree/hist/test_histogram.cc
@@ -41,10 +41,10 @@ void TestAddHistRows(bool is_distributed) {
  tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
  tree.ExpandNode(tree[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
  tree.ExpandNode(tree[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
-  nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3), 0.0f);
-  nodes_for_explicit_hist_build_.emplace_back(4, tree.GetDepth(4), 0.0f);
-  nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5), 0.0f);
-  nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f);
+  nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3));
+  nodes_for_explicit_hist_build_.emplace_back(4, tree.GetDepth(4));
+  nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5));
+  nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6));

  HistogramBuilder<CPUExpandEntry> histogram_builder;
  histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1,
@@ -98,7 +98,7 @@ void TestSyncHist(bool is_distributed) {
  }

  // level 0
-  nodes_for_explicit_hist_build_.emplace_back(0, tree.GetDepth(0), 0.0f);
+  nodes_for_explicit_hist_build_.emplace_back(0, tree.GetDepth(0));
  histogram.AddHistRows(&starting_index, &sync_count,
                        nodes_for_explicit_hist_build_,
                        nodes_for_subtraction_trick_, &tree);
@@ -108,10 +108,8 @@ void TestSyncHist(bool is_distributed) {
  nodes_for_subtraction_trick_.clear();

  // level 1
-  nodes_for_explicit_hist_build_.emplace_back(tree[0].LeftChild(),
-                                              tree.GetDepth(1), 0.0f);
-  nodes_for_subtraction_trick_.emplace_back(tree[0].RightChild(),
-                                            tree.GetDepth(2), 0.0f);
+  nodes_for_explicit_hist_build_.emplace_back(tree[0].LeftChild(), tree.GetDepth(1));
+  nodes_for_subtraction_trick_.emplace_back(tree[0].RightChild(), tree.GetDepth(2));

  histogram.AddHistRows(&starting_index, &sync_count,
                        nodes_for_explicit_hist_build_,
@@ -123,10 +121,10 @@ void TestSyncHist(bool is_distributed) {
  nodes_for_explicit_hist_build_.clear();
  nodes_for_subtraction_trick_.clear();
  // level 2
-  nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3), 0.0f);
-  nodes_for_subtraction_trick_.emplace_back(4, tree.GetDepth(4), 0.0f);
-  nodes_for_explicit_hist_build_.emplace_back(5, tree.GetDepth(5), 0.0f);
-  nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f);
+  nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3));
+  nodes_for_subtraction_trick_.emplace_back(4, tree.GetDepth(4));
+  nodes_for_explicit_hist_build_.emplace_back(5, tree.GetDepth(5));
+  nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6));

  histogram.AddHistRows(&starting_index, &sync_count,
                        nodes_for_explicit_hist_build_,
@@ -256,7 +254,7 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_
  std::iota(row_indices.begin(), row_indices.end(), 0);
  row_set_collection.Init();

-  CPUExpandEntry node(RegTree::kRoot, tree.GetDepth(0), 0.0f);
+  CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
  std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
  nodes_for_explicit_hist_build.push_back(node);
  for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>({kMaxBins, 0.5})) {
@@ -330,7 +328,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
  BatchParam batch_param{0, static_cast<int32_t>(kBins)};

  RegTree tree;
-  CPUExpandEntry node(RegTree::kRoot, tree.GetDepth(0), 0.0f);
+  CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
  std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
  nodes_for_explicit_hist_build.push_back(node);

@@ -403,7 +401,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo

  RegTree tree;
  std::vector<CPUExpandEntry> nodes;
-  nodes.emplace_back(0, tree.GetDepth(0), 0.0f);
+  nodes.emplace_back(0, tree.GetDepth(0));

  common::GHistRow multi_page;
  HistogramBuilder<CPUExpandEntry> multi_build;
--- a/tests/cpp/tree/test_approx.cc
+++ b/tests/cpp/tree/test_approx.cc
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2021-2022, XGBoost contributors.
+/**
+ * Copyright 2021-2023 by XGBoost contributors.
 */
 #include <gtest/gtest.h>

@@ -10,7 +10,6 @@

 namespace xgboost {
 namespace tree {
-
 namespace {
 std::vector<float> GenerateHess(size_t n_samples) {
  auto grad = GenerateRandomGradients(n_samples);
@@ -32,7 +31,8 @@ TEST(Approx, Partitioner) {

  auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
  auto hess = GenerateHess(n_samples);
-  std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
+  std::vector<CPUExpandEntry> candidates{{0, 0}};
+  candidates.front().split.loss_chg = 0.4;

  for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
    bst_feature_t const split_ind = 0;
@@ -79,7 +79,9 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared
                                CommonRowPartitioner const& expected_mid_partitioner) {
  auto dmat =
      std::unique_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
-  std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
+  std::vector<CPUExpandEntry> candidates{{0, 0}};
+  candidates.front().split.loss_chg = 0.4;
+
  Context ctx;
  ctx.InitAllowUnknown(Args{});
  for (auto const& page : dmat->GetBatches<GHistIndexMatrix>({64, *hess, true})) {
@@ -124,7 +126,8 @@ TEST(Approx, PartitionerColSplit) {
  size_t n_samples = 1024, n_features = 16, base_rowid = 0;
  auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
  auto hess = GenerateHess(n_samples);
-  std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
+  std::vector<CPUExpandEntry> candidates{{0, 0}};
+  candidates.front().split.loss_chg = 0.4;

  float min_value, mid_value;
  Context ctx;
@@ -145,77 +148,5 @@ TEST(Approx, PartitionerColSplit) {
  RunWithInMemoryCommunicator(kWorkers, TestColumnSplitPartitioner, n_samples, base_rowid, Xy,
                              &hess, min_value, mid_value, mid_partitioner);
 }
-
-namespace {
-void TestLeafPartition(size_t n_samples) {
-  size_t const n_features = 2, base_rowid = 0;
-  Context ctx;
-  common::RowSetCollection row_set;
-  CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
-
-  auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
-  std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
-  RegTree tree;
-  std::vector<float> hess(n_samples, 0);
-  // emulate sampling
-  auto not_sampled = [](size_t i) {
-    size_t const kSampleFactor{3};
-    return i % kSampleFactor != 0;
-  };
-  for (size_t i = 0; i < hess.size(); ++i) {
-    if (not_sampled(i)) {
-      hess[i] = 1.0f;
-    }
-  }
-
-  std::vector<size_t> h_nptr;
-  float split_value{0};
-  for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({Context::kCpuId, 64})) {
-    bst_feature_t const split_ind = 0;
-    auto ptr = page.cut.Ptrs()[split_ind + 1];
-    split_value = page.cut.Values().at(ptr / 2);
-    GetSplit(&tree, split_value, &candidates);
-    partitioner.UpdatePosition(&ctx, page, candidates, &tree);
-    std::vector<bst_node_t> position;
-    partitioner.LeafPartition(&ctx, tree, hess, &position);
-    std::sort(position.begin(), position.end());
-    size_t beg = std::distance(
-        position.begin(),
-        std::find_if(position.begin(), position.end(), [&](bst_node_t nidx) { return nidx >= 0; }));
-    std::vector<size_t> nptr;
-    common::RunLengthEncode(position.cbegin() + beg, position.cend(), &nptr);
-    std::transform(nptr.begin(), nptr.end(), nptr.begin(), [&](size_t x) { return x + beg; });
-    auto n_uniques = std::unique(position.begin() + beg, position.end()) - (position.begin() + beg);
-    ASSERT_EQ(nptr.size(), n_uniques + 1);
-    ASSERT_EQ(nptr[0], beg);
-    ASSERT_EQ(nptr.back(), n_samples);
-
-    h_nptr = nptr;
-  }
-
-  if (h_nptr.front() == n_samples) {
-    return;
-  }
-
-  ASSERT_GE(h_nptr.size(), 2);
-
-  for (auto const& page : Xy->GetBatches<SparsePage>()) {
-    auto batch = page.GetView();
-    size_t left{0};
-    for (size_t i = 0; i < batch.Size(); ++i) {
-      if (not_sampled(i) && batch[i].front().fvalue < split_value) {
-        left++;
-      }
-    }
-    ASSERT_EQ(left, h_nptr[1] - h_nptr[0]);  // equal to number of sampled assigned to left
-  }
-}
-}  // anonymous namespace
-
-TEST(Approx, LeafPartition) {
-  for (auto n_samples : {0ul, 1ul, 128ul, 256ul}) {
-    TestLeafPartition(n_samples);
-  }
-}
 }  // namespace tree
 }  // namespace xgboost
--- a/tests/cpp/tree/test_common_partitioner.cc
+++ b/tests/cpp/tree/test_common_partitioner.cc
@@ -0,0 +1,93 @@
+/**
+ * Copyright 2022-2023 by XGBoost contributors.
+ */
+#include <gtest/gtest.h>
+#include <xgboost/base.h>                         // for bst_node_t
+#include <xgboost/context.h>                      // for Context
+
+#include <algorithm>                              // for transform
+#include <iterator>                               // for distance
+#include <vector>                                 // for vector
+
+#include "../../../src/common/numeric.h"          // for ==RunLengthEncode
+#include "../../../src/common/row_set.h"          // for RowSetCollection
+#include "../../../src/data/gradient_index.h"     // for GHistIndexMatrix
+#include "../../../src/tree/common_row_partitioner.h"
+#include "../../../src/tree/hist/expand_entry.h"  // for CPUExpandEntry
+#include "../helpers.h"                           // for RandomDataGenerator
+#include "test_partitioner.h"                     // for GetSplit
+
+namespace xgboost::tree {
+namespace {
+void TestLeafPartition(size_t n_samples) {
+  size_t const n_features = 2, base_rowid = 0;
+  Context ctx;
+  common::RowSetCollection row_set;
+  CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
+
+  auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
+  std::vector<CPUExpandEntry> candidates{{0, 0}};
+  candidates.front().split.loss_chg = 0.4;
+  RegTree tree;
+  std::vector<float> hess(n_samples, 0);
+  // emulate sampling
+  auto not_sampled = [](size_t i) {
+    size_t const kSampleFactor{3};
+    return i % kSampleFactor != 0;
+  };
+  for (size_t i = 0; i < hess.size(); ++i) {
+    if (not_sampled(i)) {
+      hess[i] = 1.0f;
+    }
+  }
+
+  std::vector<size_t> h_nptr;
+  float split_value{0};
+  for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({Context::kCpuId, 64})) {
+    bst_feature_t const split_ind = 0;
+    auto ptr = page.cut.Ptrs()[split_ind + 1];
+    split_value = page.cut.Values().at(ptr / 2);
+    GetSplit(&tree, split_value, &candidates);
+    partitioner.UpdatePosition(&ctx, page, candidates, &tree);
+    std::vector<bst_node_t> position;
+    partitioner.LeafPartition(&ctx, tree, hess, &position);
+    std::sort(position.begin(), position.end());
+    size_t beg = std::distance(
+        position.begin(),
+        std::find_if(position.begin(), position.end(), [&](bst_node_t nidx) { return nidx >= 0; }));
+    std::vector<size_t> nptr;
+    common::RunLengthEncode(position.cbegin() + beg, position.cend(), &nptr);
+    std::transform(nptr.begin(), nptr.end(), nptr.begin(), [&](size_t x) { return x + beg; });
+    auto n_uniques = std::unique(position.begin() + beg, position.end()) - (position.begin() + beg);
+    ASSERT_EQ(nptr.size(), n_uniques + 1);
+    ASSERT_EQ(nptr[0], beg);
+    ASSERT_EQ(nptr.back(), n_samples);
+
+    h_nptr = nptr;
+  }
+
+  if (h_nptr.front() == n_samples) {
+    return;
+  }
+
+  ASSERT_GE(h_nptr.size(), 2);
+
+  for (auto const& page : Xy->GetBatches<SparsePage>()) {
+    auto batch = page.GetView();
+    size_t left{0};
+    for (size_t i = 0; i < batch.Size(); ++i) {
+      if (not_sampled(i) && batch[i].front().fvalue < split_value) {
+        left++;
+      }
+    }
+    ASSERT_EQ(left, h_nptr[1] - h_nptr[0]);  // equal to number of sampled assigned to left
+  }
+}
+}  // anonymous namespace
+
+TEST(CommonRowPartitioner, LeafPartition) {
+  for (auto n_samples : {0ul, 1ul, 128ul, 256ul}) {
+    TestLeafPartition(n_samples);
+  }
+}
+}  // namespace xgboost::tree
--- a/tests/cpp/tree/test_evaluate_splits.h
+++ b/tests/cpp/tree/test_evaluate_splits.h
@@ -2,15 +2,26 @@
 * Copyright 2022-2023 by XGBoost Contributors
 */
 #include <gtest/gtest.h>
-#include <xgboost/data.h>
+#include <xgboost/base.h>                       // for GradientPairInternal, GradientPairPrecise
+#include <xgboost/data.h>                       // for MetaInfo
+#include <xgboost/host_device_vector.h>         // for HostDeviceVector
+#include <xgboost/span.h>                       // for operator!=, Span, SpanIterator

-#include <algorithm>  // next_permutation
-#include <numeric>    // iota
+#include <algorithm>                            // for max, max_element, next_permutation, copy
+#include <cmath>                                // for isnan
+#include <cstddef>                              // for size_t
+#include <cstdint>                              // for int32_t, uint64_t, uint32_t
+#include <limits>                               // for numeric_limits
+#include <numeric>                              // for iota
+#include <tuple>                                // for make_tuple, tie, tuple
+#include <utility>                              // for pair
+#include <vector>                               // for vector

-#include "../../../src/common/hist_util.h"  // HistogramCuts,HistCollection
-#include "../../../src/tree/param.h"        // TrainParam
-#include "../../../src/tree/split_evaluator.h"
-#include "../helpers.h"
+#include "../../../src/common/hist_util.h"      // for HistogramCuts, HistCollection, GHistRow
+#include "../../../src/tree/param.h"            // for TrainParam, GradStats
+#include "../../../src/tree/split_evaluator.h"  // for TreeEvaluator
+#include "../helpers.h"                         // for SimpleLCG, SimpleRealUniformDistribution
+#include "gtest/gtest_pred_impl.h"              // for AssertionResult, ASSERT_EQ, ASSERT_TRUE

 namespace xgboost::tree {
 /**
--- a/tests/cpp/tree/test_fit_stump.cc
+++ b/tests/cpp/tree/test_fit_stump.cc
@@ -21,7 +21,8 @@ void TestFitStump(Context const *ctx) {
    }
  }
  linalg::Vector<float> out;
-  FitStump(ctx, gpair, kTargets, &out);
+  MetaInfo info;
+  FitStump(ctx, info, gpair, kTargets, &out);
  auto h_out = out.HostView();
  for (auto it = linalg::cbegin(h_out); it != linalg::cend(h_out); ++it) {
    // sum_hess == kRows
--- a/tests/cpp/tree/test_histmaker.cc
+++ b/tests/cpp/tree/test_histmaker.cc
@@ -40,8 +40,7 @@ TEST(GrowHistMaker, InteractionConstraint)
  ObjInfo task{ObjInfo::kRegression};
  {
    // With constraints
-    RegTree tree;
-    tree.param.num_feature = kCols;
+    RegTree tree{1, kCols};

    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
    TrainParam param;
@@ -58,8 +57,7 @@ TEST(GrowHistMaker, InteractionConstraint)
  }
  {
    // Without constraints
-    RegTree tree;
-    tree.param.num_feature = kCols;
+    RegTree tree{1u, kCols};

    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
    std::vector<HostDeviceVector<bst_node_t>> position(1);
@@ -76,7 +74,7 @@ TEST(GrowHistMaker, InteractionConstraint)
 }

 namespace {
-void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
+void TestColumnSplit(int32_t rows, bst_feature_t cols, RegTree const& expected_tree) {
  auto p_dmat = GenerateDMatrix(rows, cols);
  auto p_gradients = GenerateGradients(rows);
  Context ctx;
@@ -87,8 +85,7 @@ void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
  std::unique_ptr<DMatrix> sliced{
      p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};

-  RegTree tree;
-  tree.param.num_feature = cols;
+  RegTree tree{1u, cols};
  TrainParam param;
  param.Init(Args{});
  updater->Update(&param, p_gradients.get(), sliced.get(), position, {&tree});
@@ -107,8 +104,7 @@ TEST(GrowHistMaker, ColumnSplit) {
  auto constexpr kRows = 32;
  auto constexpr kCols = 16;

-  RegTree expected_tree;
-  expected_tree.param.num_feature = kCols;
+  RegTree expected_tree{1u, kCols};
  ObjInfo task{ObjInfo::kRegression};
  {
    auto p_dmat = GenerateDMatrix(kRows, kCols);
--- a/tests/cpp/tree/test_multi_target_tree_model.cc
+++ b/tests/cpp/tree/test_multi_target_tree_model.cc
@@ -17,8 +17,8 @@ TEST(MultiTargetTree, JsonIO) {
  linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, Context::kCpuId};
  tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),
                  left_weight.HostView(), right_weight.HostView());
-  ASSERT_EQ(tree.param.num_nodes, 3);
-  ASSERT_EQ(tree.param.size_leaf_vector, 3);
+  ASSERT_EQ(tree.NumNodes(), 3);
+  ASSERT_EQ(tree.NumTargets(), 3);
  ASSERT_EQ(tree.GetMultiTargetTree()->Size(), 3);
  ASSERT_EQ(tree.Size(), 3);

@@ -26,20 +26,19 @@ TEST(MultiTargetTree, JsonIO) {
  tree.SaveModel(&jtree);

  auto check_jtree = [](Json jtree, RegTree const& tree) {
-    ASSERT_EQ(get<String const>(jtree["tree_param"]["num_nodes"]),
-              std::to_string(tree.param.num_nodes));
+    ASSERT_EQ(get<String const>(jtree["tree_param"]["num_nodes"]), std::to_string(tree.NumNodes()));
    ASSERT_EQ(get<F32Array const>(jtree["base_weights"]).size(),
-              tree.param.num_nodes * tree.param.size_leaf_vector);
-    ASSERT_EQ(get<I32Array const>(jtree["parents"]).size(), tree.param.num_nodes);
-    ASSERT_EQ(get<I32Array const>(jtree["left_children"]).size(), tree.param.num_nodes);
-    ASSERT_EQ(get<I32Array const>(jtree["right_children"]).size(), tree.param.num_nodes);
+              tree.NumNodes() * tree.NumTargets());
+    ASSERT_EQ(get<I32Array const>(jtree["parents"]).size(), tree.NumNodes());
+    ASSERT_EQ(get<I32Array const>(jtree["left_children"]).size(), tree.NumNodes());
+    ASSERT_EQ(get<I32Array const>(jtree["right_children"]).size(), tree.NumNodes());
  };
  check_jtree(jtree, tree);

  RegTree loaded;
  loaded.LoadModel(jtree);
  ASSERT_TRUE(loaded.IsMultiTarget());
-  ASSERT_EQ(loaded.param.num_nodes, 3);
+  ASSERT_EQ(loaded.NumNodes(), 3);

  Json jtree1{Object{}};
  loaded.SaveModel(&jtree1);
--- a/tests/cpp/tree/test_partitioner.h
+++ b/tests/cpp/tree/test_partitioner.h
@@ -1,17 +1,20 @@
-/*!
- * Copyright 2021-2022, XGBoost contributors.
+/**
+ * Copyright 2021-2023 by XGBoost contributors.
 */
 #ifndef XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_
 #define XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_
-#include <xgboost/tree_model.h>
+#include <xgboost/context.h>                      // for Context
+#include <xgboost/linalg.h>                       // for Constant, Vector
+#include <xgboost/logging.h>                      // for CHECK
+#include <xgboost/tree_model.h>                   // for RegTree

-#include <vector>
+#include <vector>                                 // for vector

-#include "../../../src/tree/hist/expand_entry.h"
+#include "../../../src/tree/hist/expand_entry.h"  // for CPUExpandEntry, MultiExpandEntry

-namespace xgboost {
-namespace tree {
+namespace xgboost::tree {
 inline void GetSplit(RegTree *tree, float split_value, std::vector<CPUExpandEntry> *candidates) {
+  CHECK(!tree->IsMultiTarget());
  tree->ExpandNode(
      /*nid=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value,
      /*default_left=*/true, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
@@ -21,6 +24,22 @@ inline void GetSplit(RegTree *tree, float split_value, std::vector<CPUExpandEntr
  candidates->front().split.sindex = 0;
  candidates->front().split.sindex |= (1U << 31);
 }
-}  // namespace tree
-}  // namespace xgboost
+
+inline void GetMultiSplitForTest(RegTree *tree, float split_value,
+                                 std::vector<MultiExpandEntry> *candidates) {
+  CHECK(tree->IsMultiTarget());
+  auto n_targets = tree->NumTargets();
+  Context ctx;
+  linalg::Vector<float> base_weight{linalg::Constant(&ctx, 0.0f, n_targets)};
+  linalg::Vector<float> left_weight{linalg::Constant(&ctx, 0.0f, n_targets)};
+  linalg::Vector<float> right_weight{linalg::Constant(&ctx, 0.0f, n_targets)};
+
+  tree->ExpandNode(/*nidx=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value,
+                   /*default_left=*/true, base_weight.HostView(), left_weight.HostView(),
+                   right_weight.HostView());
+  candidates->front().split.split_value = split_value;
+  candidates->front().split.sindex = 0;
+  candidates->front().split.sindex |= (1U << 31);
+}
+}  // namespace xgboost::tree
 #endif  // XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_
--- a/tests/cpp/tree/test_prune.cc
+++ b/tests/cpp/tree/test_prune.cc
@@ -32,8 +32,7 @@ TEST(Updater, Prune) {
  auto ctx = CreateEmptyGenericParam(GPUIDX);

  // prepare tree
-  RegTree tree = RegTree();
-  tree.param.UpdateAllowUnknown(cfg);
+  RegTree tree = RegTree{1u, kCols};
  std::vector<RegTree*> trees {&tree};
  // prepare pruner
  TrainParam param;
--- a/tests/cpp/tree/test_quantile_hist.cc
+++ b/tests/cpp/tree/test_quantile_hist.cc
@@ -1,25 +1,29 @@
-/*!
- * Copyright 2018-2022 by XGBoost Contributors
+/**
+ * Copyright 2018-2023 by XGBoost Contributors
 */
 #include <gtest/gtest.h>
 #include <xgboost/host_device_vector.h>
 #include <xgboost/tree_updater.h>

 #include <algorithm>
+#include <cstddef>  // for size_t
 #include <string>
 #include <vector>

+#include "../../../src/tree/common_row_partitioner.h"
+#include "../../../src/tree/hist/expand_entry.h"  // for MultiExpandEntry, CPUExpandEntry
 #include "../../../src/tree/param.h"
 #include "../../../src/tree/split_evaluator.h"
-#include "../../../src/tree/common_row_partitioner.h"
 #include "../helpers.h"
 #include "test_partitioner.h"
 #include "xgboost/data.h"

-namespace xgboost {
-namespace tree {
-TEST(QuantileHist, Partitioner) {
-  size_t n_samples = 1024, n_features = 1, base_rowid = 0;
+namespace xgboost::tree {
+template <typename ExpandEntry>
+void TestPartitioner(bst_target_t n_targets) {
+  std::size_t n_samples = 1024, base_rowid = 0;
+  bst_feature_t n_features = 1;
+
  Context ctx;
  ctx.InitAllowUnknown(Args{});

@@ -29,7 +33,8 @@ TEST(QuantileHist, Partitioner) {
  ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);

  auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
-  std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
+  std::vector<ExpandEntry> candidates{{0, 0}};
+  candidates.front().split.loss_chg = 0.4;

  auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads());

@@ -40,9 +45,13 @@ TEST(QuantileHist, Partitioner) {
    column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());
    {
      auto min_value = gmat.cut.MinValues()[split_ind];
-      RegTree tree;
+      RegTree tree{n_targets, n_features};
      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
-      GetSplit(&tree, min_value, &candidates);
+      if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
+        GetSplit(&tree, min_value, &candidates);
+      } else {
+        GetMultiSplitForTest(&tree, min_value, &candidates);
+      }
      partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
      ASSERT_EQ(partitioner.Size(), 3);
      ASSERT_EQ(partitioner[1].Size(), 0);
@@ -52,9 +61,13 @@ TEST(QuantileHist, Partitioner) {
      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
      auto ptr = gmat.cut.Ptrs()[split_ind + 1];
      float split_value = gmat.cut.Values().at(ptr / 2);
-      RegTree tree;
-      GetSplit(&tree, split_value, &candidates);
-      auto left_nidx = tree[RegTree::kRoot].LeftChild();
+      RegTree tree{n_targets, n_features};
+      if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
+        GetSplit(&tree, split_value, &candidates);
+      } else {
+        GetMultiSplitForTest(&tree, split_value, &candidates);
+      }
+      auto left_nidx = tree.LeftChild(RegTree::kRoot);
      partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);

      auto elem = partitioner[left_nidx];
@@ -64,14 +77,17 @@ TEST(QuantileHist, Partitioner) {
        auto value = gmat.cut.Values().at(gmat.index[*it]);
        ASSERT_LE(value, split_value);
      }
-      auto right_nidx = tree[RegTree::kRoot].RightChild();
+      auto right_nidx = tree.RightChild(RegTree::kRoot);
      elem = partitioner[right_nidx];
      for (auto it = elem.begin; it != elem.end; ++it) {
        auto value = gmat.cut.Values().at(gmat.index[*it]);
-        ASSERT_GT(value, split_value) << *it;
+        ASSERT_GT(value, split_value);
      }
    }
  }
 }
-}  // namespace tree
-}  // namespace xgboost
+
+TEST(QuantileHist, Partitioner) { TestPartitioner<CPUExpandEntry>(1); }
+
+TEST(QuantileHist, MultiPartitioner) { TestPartitioner<MultiExpandEntry>(3); }
+}  // namespace xgboost::tree
--- a/tests/cpp/tree/test_refresh.cc
+++ b/tests/cpp/tree/test_refresh.cc
@@ -28,9 +28,8 @@ TEST(Updater, Refresh) {
      {"num_feature", std::to_string(kCols)},
      {"reg_lambda", "1"}};

-  RegTree tree = RegTree();
+  RegTree tree = RegTree{1u, kCols};
  auto ctx = CreateEmptyGenericParam(GPUIDX);
-  tree.param.UpdateAllowUnknown(cfg);
  std::vector<RegTree*> trees{&tree};

  ObjInfo task{ObjInfo::kRegression};
--- a/tests/cpp/tree/test_tree_model.cc
+++ b/tests/cpp/tree/test_tree_model.cc
@@ -11,9 +11,8 @@
 namespace xgboost {
 TEST(Tree, ModelShape) {
  bst_feature_t n_features = std::numeric_limits<uint32_t>::max();
-  RegTree tree;
-  tree.param.UpdateAllowUnknown(Args{{"num_feature", std::to_string(n_features)}});
-  ASSERT_EQ(tree.param.num_feature, n_features);
+  RegTree tree{1u, n_features};
+  ASSERT_EQ(tree.NumFeatures(), n_features);

  dmlc::TemporaryDirectory tempdir;
  const std::string tmp_file = tempdir.path + "/tree.model";
@@ -27,7 +26,7 @@ TEST(Tree, ModelShape) {
    RegTree new_tree;
    std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(tmp_file.c_str(), "r"));
    new_tree.Load(fi.get());
-    ASSERT_EQ(new_tree.param.num_feature, n_features);
+    ASSERT_EQ(new_tree.NumFeatures(), n_features);
  }
  {
    // json
@@ -39,7 +38,7 @@ TEST(Tree, ModelShape) {

    auto j_loaded = Json::Load(StringView{dumped.data(), dumped.size()});
    new_tree.LoadModel(j_loaded);
-    ASSERT_EQ(new_tree.param.num_feature, n_features);
+    ASSERT_EQ(new_tree.NumFeatures(), n_features);
  }
  {
    // ubjson
@@ -51,7 +50,7 @@ TEST(Tree, ModelShape) {

    auto j_loaded = Json::Load(StringView{dumped.data(), dumped.size()}, std::ios::binary);
    new_tree.LoadModel(j_loaded);
-    ASSERT_EQ(new_tree.param.num_feature, n_features);
+    ASSERT_EQ(new_tree.NumFeatures(), n_features);
  }
 }

@@ -488,8 +487,7 @@ TEST(Tree, JsonIO) {

  RegTree loaded_tree;
  loaded_tree.LoadModel(j_tree);
-  ASSERT_EQ(loaded_tree.param.num_nodes, 3);
-
+  ASSERT_EQ(loaded_tree.NumNodes(), 3);
  ASSERT_TRUE(loaded_tree == tree);

  auto left = tree[0].LeftChild();
--- a/tests/cpp/tree/test_tree_stat.cc
+++ b/tests/cpp/tree/test_tree_stat.cc
@@ -37,8 +37,7 @@ class UpdaterTreeStatTest : public ::testing::Test {
                                           : CreateEmptyGenericParam(Context::kCpuId));
    auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
    up->Configure(Args{});
-    RegTree tree;
-    tree.param.num_feature = kCols;
+    RegTree tree{1u, kCols};
    std::vector<HostDeviceVector<bst_node_t>> position(1);
    up->Update(&param, &gpairs_, p_dmat_.get(), position, {&tree});

@@ -95,16 +94,14 @@ class UpdaterEtaTest : public ::testing::Test {
    param1.Init(Args{{"eta", "1.0"}});

    for (size_t iter = 0; iter < 4; ++iter) {
-      RegTree tree_0;
+      RegTree tree_0{1u, kCols};
      {
-        tree_0.param.num_feature = kCols;
        std::vector<HostDeviceVector<bst_node_t>> position(1);
        up_0->Update(&param0, &gpairs_, p_dmat_.get(), position, {&tree_0});
      }

-      RegTree tree_1;
+      RegTree tree_1{1u, kCols};
      {
-        tree_1.param.num_feature = kCols;
        std::vector<HostDeviceVector<bst_node_t>> position(1);
        up_1->Update(&param1, &gpairs_, p_dmat_.get(), position, {&tree_1});
      }
--- a/tests/python-gpu/test_device_quantile_dmatrix.py
+++ b/tests/python-gpu/test_device_quantile_dmatrix.py
@@ -6,6 +6,7 @@ from hypothesis import given, settings, strategies

 import xgboost as xgb
 from xgboost import testing as tm
+from xgboost.testing.data import check_inf

 sys.path.append("tests/python")
 import test_quantile_dmatrix as tqd
@@ -153,3 +154,9 @@ class TestQuantileDMatrix:
        from_qdm = xgb.QuantileDMatrix(X, weight=w, ref=Xy_qdm)

        assert tm.predictor_equal(from_qdm, from_dm)
+
+    @pytest.mark.skipif(**tm.no_cupy())
+    def test_check_inf(self) -> None:
+        import cupy as cp
+        rng = cp.random.default_rng(1994)
+        check_inf(rng)
--- a/tests/python-gpu/test_gpu_ranking.py
+++ b/tests/python-gpu/test_gpu_ranking.py
@@ -1,194 +1,130 @@
-import itertools
 import os
-import shutil
-import urllib.request
-import zipfile
+from typing import Dict

 import numpy as np
+import pytest

 import xgboost
 from xgboost import testing as tm

-pytestmark = tm.timeout(10)
+pytestmark = tm.timeout(30)


-class TestRanking:
-    @classmethod
-    def setup_class(cls):
-        """
-        Download and setup the test fixtures
-        """
-        from sklearn.datasets import load_svmlight_files
+def comp_training_with_rank_objective(
+    dtrain: xgboost.DMatrix,
+    dtest: xgboost.DMatrix,
+    rank_objective: str,
+    metric_name: str,
+    tolerance: float = 1e-02,
+) -> None:
+    """Internal method that trains the dataset using the rank objective on GPU and CPU,
+    evaluates the metric and determines if the delta between the metric is within the
+    tolerance level.

-        # download the test data
-        cls.dpath = os.path.join(tm.demo_dir(__file__), "rank/")
-        src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
-        target = os.path.join(cls.dpath, "MQ2008.zip")
+    """
+    # specify validations set to watch performance
+    watchlist = [(dtest, "eval"), (dtrain, "train")]

-        if os.path.exists(cls.dpath) and os.path.exists(target):
-            print("Skipping dataset download...")
-        else:
-            urllib.request.urlretrieve(url=src, filename=target)
-            with zipfile.ZipFile(target, 'r') as f:
-                f.extractall(path=cls.dpath)
+    params = {
+        "booster": "gbtree",
+        "tree_method": "gpu_hist",
+        "gpu_id": 0,
+        "predictor": "gpu_predictor",
+    }

-        (x_train, y_train, qid_train, x_test, y_test, qid_test,
-         x_valid, y_valid, qid_valid) = load_svmlight_files(
-            (cls.dpath + "MQ2008/Fold1/train.txt",
-             cls.dpath + "MQ2008/Fold1/test.txt",
-             cls.dpath + "MQ2008/Fold1/vali.txt"),
-            query_id=True, zero_based=False)
-        # instantiate the matrices
-        cls.dtrain = xgboost.DMatrix(x_train, y_train)
-        cls.dvalid = xgboost.DMatrix(x_valid, y_valid)
-        cls.dtest = xgboost.DMatrix(x_test, y_test)
-        # set the group counts from the query IDs
-        cls.dtrain.set_group([len(list(items))
-                              for _key, items in itertools.groupby(qid_train)])
-        cls.dtest.set_group([len(list(items))
-                             for _key, items in itertools.groupby(qid_test)])
-        cls.dvalid.set_group([len(list(items))
-                              for _key, items in itertools.groupby(qid_valid)])
-        # save the query IDs for testing
-        cls.qid_train = qid_train
-        cls.qid_test = qid_test
-        cls.qid_valid = qid_valid
+    num_trees = 100
+    check_metric_improvement_rounds = 10

-        def setup_weighted(x, y, groups):
-            # Setup weighted data
-            data = xgboost.DMatrix(x, y)
-            groups_segment = [len(list(items))
-                              for _key, items in itertools.groupby(groups)]
-            data.set_group(groups_segment)
-            n_groups = len(groups_segment)
-            weights = np.ones((n_groups,))
-            data.set_weight(weights)
-            return data
+    evals_result: Dict[str, Dict] = {}
+    params["objective"] = rank_objective
+    params["eval_metric"] = metric_name
+    bst = xgboost.train(
+        params,
+        dtrain,
+        num_boost_round=num_trees,
+        early_stopping_rounds=check_metric_improvement_rounds,
+        evals=watchlist,
+        evals_result=evals_result,
+    )
+    gpu_scores = evals_result["train"][metric_name][-1]

-        cls.dtrain_w = setup_weighted(x_train, y_train, qid_train)
-        cls.dtest_w = setup_weighted(x_test, y_test, qid_test)
-        cls.dvalid_w = setup_weighted(x_valid, y_valid, qid_valid)
+    evals_result = {}

-        # model training parameters
-        cls.params = {'booster': 'gbtree',
-                      'tree_method': 'gpu_hist',
-                      'gpu_id': 0,
-                      'predictor': 'gpu_predictor'}
-        cls.cpu_params = {'booster': 'gbtree',
-                          'tree_method': 'hist',
-                          'gpu_id': -1,
-                          'predictor': 'cpu_predictor'}
+    cpu_params = {
+        "booster": "gbtree",
+        "tree_method": "hist",
+        "gpu_id": -1,
+        "predictor": "cpu_predictor",
+    }
+    cpu_params["objective"] = rank_objective
+    cpu_params["eval_metric"] = metric_name
+    bstc = xgboost.train(
+        cpu_params,
+        dtrain,
+        num_boost_round=num_trees,
+        early_stopping_rounds=check_metric_improvement_rounds,
+        evals=watchlist,
+        evals_result=evals_result,
+    )
+    cpu_scores = evals_result["train"][metric_name][-1]

-    @classmethod
-    def teardown_class(cls):
-        """
-        Cleanup test artifacts from download and unpacking
-        :return:
-        """
-        os.remove(os.path.join(cls.dpath, "MQ2008.zip"))
-        shutil.rmtree(os.path.join(cls.dpath, "MQ2008"))
+    info = (rank_objective, metric_name)
+    assert np.allclose(gpu_scores, cpu_scores, tolerance, tolerance), info
+    assert np.allclose(bst.best_score, bstc.best_score, tolerance, tolerance), info

-    @classmethod
-    def __test_training_with_rank_objective(cls, rank_objective, metric_name, tolerance=1e-02):
-        """
-        Internal method that trains the dataset using the rank objective on GPU and CPU, evaluates
-        the metric and determines if the delta between the metric is within the tolerance level
-        :return:
-        """
-        # specify validations set to watch performance
-        watchlist = [(cls.dtest, 'eval'), (cls.dtrain, 'train')]
+    evals_result_weighted: Dict[str, Dict] = {}
+    dtest.set_weight(np.ones((dtest.get_group().size,)))
+    dtrain.set_weight(np.ones((dtrain.get_group().size,)))
+    watchlist = [(dtest, "eval"), (dtrain, "train")]
+    bst_w = xgboost.train(
+        params,
+        dtrain,
+        num_boost_round=num_trees,
+        early_stopping_rounds=check_metric_improvement_rounds,
+        evals=watchlist,
+        evals_result=evals_result_weighted,
+    )
+    weighted_metric = evals_result_weighted["train"][metric_name][-1]

-        num_trees = 100
-        check_metric_improvement_rounds = 10
+    tolerance = 1e-5
+    assert np.allclose(bst_w.best_score, bst.best_score, tolerance, tolerance)
+    assert np.allclose(weighted_metric, gpu_scores, tolerance, tolerance)

-        evals_result = {}
-        cls.params['objective'] = rank_objective
-        cls.params['eval_metric'] = metric_name
-        bst = xgboost.train(
-            cls.params, cls.dtrain, num_boost_round=num_trees,
-            early_stopping_rounds=check_metric_improvement_rounds,
-            evals=watchlist, evals_result=evals_result)
-        gpu_map_metric = evals_result['train'][metric_name][-1]

-        evals_result = {}
-        cls.cpu_params['objective'] = rank_objective
-        cls.cpu_params['eval_metric'] = metric_name
-        bstc = xgboost.train(
-            cls.cpu_params, cls.dtrain, num_boost_round=num_trees,
-            early_stopping_rounds=check_metric_improvement_rounds,
-            evals=watchlist, evals_result=evals_result)
-        cpu_map_metric = evals_result['train'][metric_name][-1]
+@pytest.mark.parametrize(
+    "objective,metric",
+    [
+        ("rank:pairwise", "auc"),
+        ("rank:pairwise", "ndcg"),
+        ("rank:pairwise", "map"),
+        ("rank:ndcg", "auc"),
+        ("rank:ndcg", "ndcg"),
+        ("rank:ndcg", "map"),
+        ("rank:map", "auc"),
+        ("rank:map", "ndcg"),
+        ("rank:map", "map"),
+    ],
+)
+def test_with_mq2008(objective, metric) -> None:
+    (
+        x_train,
+        y_train,
+        qid_train,
+        x_test,
+        y_test,
+        qid_test,
+        x_valid,
+        y_valid,
+        qid_valid,
+    ) = tm.data.get_mq2008(os.path.join(os.path.join(tm.demo_dir(__file__), "rank")))

-        assert np.allclose(gpu_map_metric, cpu_map_metric, tolerance,
-                           tolerance)
-        assert np.allclose(bst.best_score, bstc.best_score, tolerance,
-                           tolerance)
+    if metric.find("map") != -1 or objective.find("map") != -1:
+        y_train[y_train <= 1] = 0.0
+        y_train[y_train > 1] = 1.0
+        y_test[y_test <= 1] = 0.0
+        y_test[y_test > 1] = 1.0

-        evals_result_weighted = {}
-        watchlist = [(cls.dtest_w, 'eval'), (cls.dtrain_w, 'train')]
-        bst_w = xgboost.train(
-            cls.params, cls.dtrain_w, num_boost_round=num_trees,
-            early_stopping_rounds=check_metric_improvement_rounds,
-            evals=watchlist, evals_result=evals_result_weighted)
-        weighted_metric = evals_result_weighted['train'][metric_name][-1]
-        # GPU Ranking is not deterministic due to `AtomicAddGpair`,
-        # remove tolerance once the issue is resolved.
-        # https://github.com/dmlc/xgboost/issues/5561
-        assert np.allclose(bst_w.best_score, bst.best_score,
-                           tolerance, tolerance)
-        assert np.allclose(weighted_metric, gpu_map_metric,
-                           tolerance, tolerance)
+    dtrain = xgboost.DMatrix(x_train, y_train, qid=qid_train)
+    dtest = xgboost.DMatrix(x_test, y_test, qid=qid_test)

-    def test_training_rank_pairwise_map_metric(self):
-        """
-        Train an XGBoost ranking model with pairwise objective function and compare map metric
-        """
-        self.__test_training_with_rank_objective('rank:pairwise', 'map')
-
-    def test_training_rank_pairwise_auc_metric(self):
-        """
-        Train an XGBoost ranking model with pairwise objective function and compare auc metric
-        """
-        self.__test_training_with_rank_objective('rank:pairwise', 'auc')
-
-    def test_training_rank_pairwise_ndcg_metric(self):
-        """
-        Train an XGBoost ranking model with pairwise objective function and compare ndcg metric
-        """
-        self.__test_training_with_rank_objective('rank:pairwise', 'ndcg')
-
-    def test_training_rank_ndcg_map(self):
-        """
-        Train an XGBoost ranking model with ndcg objective function and compare map metric
-        """
-        self.__test_training_with_rank_objective('rank:ndcg', 'map')
-
-    def test_training_rank_ndcg_auc(self):
-        """
-        Train an XGBoost ranking model with ndcg objective function and compare auc metric
-        """
-        self.__test_training_with_rank_objective('rank:ndcg', 'auc')
-
-    def test_training_rank_ndcg_ndcg(self):
-        """
-        Train an XGBoost ranking model with ndcg objective function and compare ndcg metric
-        """
-        self.__test_training_with_rank_objective('rank:ndcg', 'ndcg')
-
-    def test_training_rank_map_map(self):
-        """
-        Train an XGBoost ranking model with map objective function and compare map metric
-        """
-        self.__test_training_with_rank_objective('rank:map', 'map')
-
-    def test_training_rank_map_auc(self):
-        """
-        Train an XGBoost ranking model with map objective function and compare auc metric
-        """
-        self.__test_training_with_rank_objective('rank:map', 'auc')
-
-    def test_training_rank_map_ndcg(self):
-        """
-        Train an XGBoost ranking model with map objective function and compare ndcg metric
-        """
-        self.__test_training_with_rank_objective('rank:map', 'ndcg')
+    comp_training_with_rank_objective(dtrain, dtest, objective, metric)
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -32,6 +32,19 @@ def train_result(param, dmat: xgb.DMatrix, num_rounds: int) -> dict:
    return result


+class TestGPUUpdatersMulti:
+    @given(
+        hist_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy
+    )
+    @settings(deadline=None, max_examples=50, print_blob=True)
+    def test_hist(self, param, num_rounds, dataset):
+        param["tree_method"] = "gpu_hist"
+        param = dataset.set_params(param)
+        result = train_result(param, dataset.get_dmat(), num_rounds)
+        note(result)
+        assert tm.non_increasing(result["train"][dataset.metric])
+
+
 class TestGPUUpdaters:
    cputest = test_up.TestTreeMethod()

@@ -101,7 +114,7 @@ class TestGPUUpdaters:
    ) -> None:
        cat_parameters.update(hist_parameters)
        dataset = tm.TestDataset(
-            "ames_housing", tm.get_ames_housing, "reg:squarederror", "rmse"
+            "ames_housing", tm.data.get_ames_housing, "reg:squarederror", "rmse"
        )
        cat_parameters["tree_method"] = "gpu_hist"
        results = train_result(cat_parameters, dataset.get_dmat(), 16)
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@@ -15,13 +15,17 @@ rng = np.random.RandomState(1994)


 def json_model(model_path: str, parameters: dict) -> dict:
-    X = np.random.random((10, 3))
-    y = np.random.randint(2, size=(10,))
+    datasets = pytest.importorskip("sklearn.datasets")
+
+    X, y = datasets.make_classification(64, n_features=8, n_classes=3, n_informative=6)
+    if parameters.get("objective", None) == "multi:softmax":
+        parameters["num_class"] = 3

    dm1 = xgb.DMatrix(X, y)

    bst = xgb.train(parameters, dm1)
    bst.save_model(model_path)
+
    if model_path.endswith("ubj"):
        import ubjson
        with open(model_path, "rb") as ubjfd:
@@ -234,6 +238,27 @@ class TestModels:
        xgb.cv(param, dtrain, num_round, nfold=5,
               metrics={'error'}, seed=0, show_stdv=False)

+    def test_prediction_cache(self) -> None:
+        X, y = tm.make_sparse_regression(512, 4, 0.5, as_dense=False)
+        Xy = xgb.DMatrix(X, y)
+        param = {"max_depth": 8}
+        booster = xgb.train(param, Xy, num_boost_round=1)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = os.path.join(tmpdir, "model.json")
+            booster.save_model(path)
+
+            predt_0 = booster.predict(Xy)
+
+            param["max_depth"] = 2
+
+            booster = xgb.train(param, Xy, num_boost_round=1)
+            predt_1 = booster.predict(Xy)
+            assert not np.isclose(predt_0, predt_1).all()
+
+            booster.load_model(path)
+            predt_2 = booster.predict(Xy)
+            np.testing.assert_allclose(predt_0, predt_2)
+
    def test_feature_names_validation(self):
        X = np.random.random((10, 3))
        y = np.random.randint(2, size=(10,))
@@ -305,24 +330,43 @@ class TestModels:
        from_ubjraw = xgb.Booster()
        from_ubjraw.load_model(ubj_raw)

-        old_from_json = from_jraw.save_raw(raw_format="deprecated")
-        old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
+        if parameters.get("multi_strategy", None) != "multi_output_tree":
+            # old binary model is not supported.
+            old_from_json = from_jraw.save_raw(raw_format="deprecated")
+            old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")

-        assert old_from_json == old_from_ubj
+            assert old_from_json == old_from_ubj

        raw_json = bst.save_raw(raw_format="json")
        pretty = json.dumps(json.loads(raw_json), indent=2) + "\n\n"
        bst.load_model(bytearray(pretty, encoding="ascii"))

-        old_from_json = from_jraw.save_raw(raw_format="deprecated")
-        old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
+        if parameters.get("multi_strategy", None) != "multi_output_tree":
+            # old binary model is not supported.
+            old_from_json = from_jraw.save_raw(raw_format="deprecated")
+            old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")

-        assert old_from_json == old_from_ubj
+            assert old_from_json == old_from_ubj
+
+        rng = np.random.default_rng()
+        X = rng.random(size=from_jraw.num_features() * 10).reshape(
+            (10, from_jraw.num_features())
+        )
+        predt_from_jraw = from_jraw.predict(xgb.DMatrix(X))
+        predt_from_bst = bst.predict(xgb.DMatrix(X))
+        np.testing.assert_allclose(predt_from_jraw, predt_from_bst)

    @pytest.mark.parametrize("ext", ["json", "ubj"])
    def test_model_json_io(self, ext: str) -> None:
        parameters = {"booster": "gbtree", "tree_method": "hist"}
        self.run_model_json_io(parameters, ext)
+        parameters = {
+            "booster": "gbtree",
+            "tree_method": "hist",
+            "multi_strategy": "multi_output_tree",
+            "objective": "multi:softmax",
+        }
+        self.run_model_json_io(parameters, ext)
        parameters = {"booster": "gblinear"}
        self.run_model_json_io(parameters, ext)
        parameters = {"booster": "dart", "tree_method": "hist"}
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@@ -465,7 +465,7 @@ class TestCallbacks:
                assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".pkl"))

    def test_callback_list(self):
-        X, y = tm.get_california_housing()
+        X, y = tm.data.get_california_housing()
        m = xgb.DMatrix(X, y)
        callbacks = [xgb.callback.EarlyStopping(rounds=10)]
        for i in range(4):
--- a/tests/python/test_quantile_dmatrix.py
+++ b/tests/python/test_quantile_dmatrix.py
@@ -15,7 +15,7 @@ from xgboost.testing import (
    make_sparse_regression,
    predictor_equal,
 )
-from xgboost.testing.data import np_dtypes
+from xgboost.testing.data import check_inf, np_dtypes


 class TestQuantileDMatrix:
@@ -244,6 +244,10 @@ class TestQuantileDMatrix:
        from_dm = xgb.QuantileDMatrix(X, weight=w, ref=Xy)
        assert predictor_equal(from_qdm, from_dm)

+    def test_check_inf(self) -> None:
+        rng = np.random.default_rng(1994)
+        check_inf(rng)
+
    # we don't test empty Quantile DMatrix in single node construction.
    @given(
        strategies.integers(1, 1000),
--- a/tests/python/test_ranking.py
+++ b/tests/python/test_ranking.py
@@ -82,7 +82,7 @@ class TestRanking:
        """
        cls.dpath = 'demo/rank/'
        (x_train, y_train, qid_train, x_test, y_test, qid_test,
-         x_valid, y_valid, qid_valid) = tm.get_mq2008(cls.dpath)
+         x_valid, y_valid, qid_valid) = tm.data.get_mq2008(cls.dpath)

        # instantiate the matrices
        cls.dtrain = xgboost.DMatrix(x_train, y_train)
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -11,6 +11,7 @@ from xgboost import testing as tm
 from xgboost.testing.params import (
    cat_parameter_strategy,
    exact_parameter_strategy,
+    hist_multi_parameter_strategy,
    hist_parameter_strategy,
 )
 from xgboost.testing.updater import check_init_estimation, check_quantile_loss
@@ -18,11 +19,70 @@ from xgboost.testing.updater import check_init_estimation, check_quantile_loss

 def train_result(param, dmat, num_rounds):
    result = {}
-    xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
-              evals_result=result)
+    booster = xgb.train(
+        param,
+        dmat,
+        num_rounds,
+        [(dmat, "train")],
+        verbose_eval=False,
+        evals_result=result,
+    )
+    assert booster.num_features() == dmat.num_col()
+    assert booster.num_boosted_rounds() == num_rounds
+    assert booster.feature_names == dmat.feature_names
+    assert booster.feature_types == dmat.feature_types
+
    return result


+class TestTreeMethodMulti:
+    @given(
+        exact_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy
+    )
+    @settings(deadline=None, print_blob=True)
+    def test_exact(self, param: dict, num_rounds: int, dataset: tm.TestDataset) -> None:
+        if dataset.name.endswith("-l1"):
+            return
+        param["tree_method"] = "exact"
+        param = dataset.set_params(param)
+        result = train_result(param, dataset.get_dmat(), num_rounds)
+        assert tm.non_increasing(result["train"][dataset.metric])
+
+    @given(
+        exact_parameter_strategy,
+        hist_parameter_strategy,
+        strategies.integers(1, 20),
+        tm.multi_dataset_strategy,
+    )
+    @settings(deadline=None, print_blob=True)
+    def test_approx(self, param, hist_param, num_rounds, dataset):
+        param["tree_method"] = "approx"
+        param = dataset.set_params(param)
+        param.update(hist_param)
+        result = train_result(param, dataset.get_dmat(), num_rounds)
+        note(result)
+        assert tm.non_increasing(result["train"][dataset.metric])
+
+    @given(
+        exact_parameter_strategy,
+        hist_multi_parameter_strategy,
+        strategies.integers(1, 20),
+        tm.multi_dataset_strategy,
+    )
+    @settings(deadline=None, print_blob=True)
+    def test_hist(
+        self, param: dict, hist_param: dict, num_rounds: int, dataset: tm.TestDataset
+    ) -> None:
+        if dataset.name.endswith("-l1"):
+            return
+        param["tree_method"] = "hist"
+        param = dataset.set_params(param)
+        param.update(hist_param)
+        result = train_result(param, dataset.get_dmat(), num_rounds)
+        note(result)
+        assert tm.non_increasing(result["train"][dataset.metric])
+
+
 class TestTreeMethod:
    USE_ONEHOT = np.iinfo(np.int32).max
    USE_PART = 1
@@ -77,10 +137,14 @@ class TestTreeMethod:
        # Second prune should not change the tree
        assert after_prune == second_prune

-    @given(exact_parameter_strategy, hist_parameter_strategy, strategies.integers(1, 20),
-           tm.dataset_strategy)
+    @given(
+        exact_parameter_strategy,
+        hist_parameter_strategy,
+        strategies.integers(1, 20),
+        tm.dataset_strategy
+    )
    @settings(deadline=None, print_blob=True)
-    def test_hist(self, param, hist_param, num_rounds, dataset):
+    def test_hist(self, param: dict, hist_param: dict, num_rounds: int, dataset: tm.TestDataset) -> None:
        param['tree_method'] = 'hist'
        param = dataset.set_params(param)
        param.update(hist_param)
@@ -88,23 +152,6 @@ class TestTreeMethod:
        note(result)
        assert tm.non_increasing(result['train'][dataset.metric])

-    @given(tm.sparse_datasets_strategy)
-    @settings(deadline=None, print_blob=True)
-    def test_sparse(self, dataset):
-        param = {"tree_method": "hist", "max_bin": 64}
-        hist_result = train_result(param, dataset.get_dmat(), 16)
-        note(hist_result)
-        assert tm.non_increasing(hist_result['train'][dataset.metric])
-
-        param = {"tree_method": "approx", "max_bin": 64}
-        approx_result = train_result(param, dataset.get_dmat(), 16)
-        note(approx_result)
-        assert tm.non_increasing(approx_result['train'][dataset.metric])
-
-        np.testing.assert_allclose(
-            hist_result["train"]["rmse"], approx_result["train"]["rmse"]
-        )
-
    def test_hist_categorical(self):
        # hist must be same as exact on all-categorial data
        dpath = 'demo/data/'
@@ -143,6 +190,23 @@ class TestTreeMethod:
        w = [0, 0, 1, 0]
        model.fit(X, y, sample_weight=w)

+    @given(tm.sparse_datasets_strategy)
+    @settings(deadline=None, print_blob=True)
+    def test_sparse(self, dataset):
+        param = {"tree_method": "hist", "max_bin": 64}
+        hist_result = train_result(param, dataset.get_dmat(), 16)
+        note(hist_result)
+        assert tm.non_increasing(hist_result['train'][dataset.metric])
+
+        param = {"tree_method": "approx", "max_bin": 64}
+        approx_result = train_result(param, dataset.get_dmat(), 16)
+        note(approx_result)
+        assert tm.non_increasing(approx_result['train'][dataset.metric])
+
+        np.testing.assert_allclose(
+            hist_result["train"]["rmse"], approx_result["train"]["rmse"]
+        )
+
    def run_invalid_category(self, tree_method: str) -> None:
        rng = np.random.default_rng()
        # too large
@@ -365,7 +429,7 @@ class TestTreeMethod:
    ) -> None:
        cat_parameters.update(hist_parameters)
        dataset = tm.TestDataset(
-            "ames_housing", tm.get_ames_housing, "reg:squarederror", "rmse"
+            "ames_housing", tm.data.get_ames_housing, "reg:squarederror", "rmse"
        )
        cat_parameters["tree_method"] = tree_method
        results = train_result(cat_parameters, dataset.get_dmat(), 16)
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -128,12 +128,23 @@ def test_ranking():

    x_test = np.random.rand(100, 10)

-    params = {'tree_method': 'exact', 'objective': 'rank:pairwise',
-              'learning_rate': 0.1, 'gamma': 1.0, 'min_child_weight': 0.1,
-              'max_depth': 6, 'n_estimators': 4}
+    params = {
+        "tree_method": "exact",
+        "learning_rate": 0.1,
+        "gamma": 1.0,
+        "min_child_weight": 0.1,
+        "max_depth": 6,
+        "eval_metric": "ndcg",
+        "n_estimators": 4,
+    }
    model = xgb.sklearn.XGBRanker(**params)
-    model.fit(x_train, y_train, group=train_group,
-              eval_set=[(x_valid, y_valid)], eval_group=[valid_group])
+    model.fit(
+        x_train,
+        y_train,
+        group=train_group,
+        eval_set=[(x_valid, y_valid)],
+        eval_group=[valid_group],
+    )
    assert model.evals_result()

    pred = model.predict(x_test)
@@ -145,11 +156,18 @@ def test_ranking():
    assert train_data.get_label().shape[0] == x_train.shape[0]
    valid_data.set_group(valid_group)

-    params_orig = {'tree_method': 'exact', 'objective': 'rank:pairwise',
-                   'eta': 0.1, 'gamma': 1.0,
-                   'min_child_weight': 0.1, 'max_depth': 6}
-    xgb_model_orig = xgb.train(params_orig, train_data, num_boost_round=4,
-                               evals=[(valid_data, 'validation')])
+    params_orig = {
+        "tree_method": "exact",
+        "objective": "rank:pairwise",
+        "eta": 0.1,
+        "gamma": 1.0,
+        "min_child_weight": 0.1,
+        "max_depth": 6,
+        "eval_metric": "ndcg",
+    }
+    xgb_model_orig = xgb.train(
+        params_orig, train_data, num_boost_round=4, evals=[(valid_data, "validation")]
+    )
    pred_orig = xgb_model_orig.predict(test_data)

    np.testing.assert_almost_equal(pred, pred_orig)
@@ -165,7 +183,11 @@ def test_ranking_metric() -> None:
    # sklearn compares the number of mis-classified docs, while the one in xgboost
    # compares the number of mis-classified pairs.
    ltr = xgb.XGBRanker(
-        eval_metric=roc_auc_score, n_estimators=10, tree_method="hist", max_depth=2
+        eval_metric=roc_auc_score,
+        n_estimators=10,
+        tree_method="hist",
+        max_depth=2,
+        objective="rank:pairwise",
    )
    ltr.fit(
        X,
--- a/tests/test_distributed/test_with_dask/test_with_dask.py
+++ b/tests/test_distributed/test_with_dask/test_with_dask.py
@@ -1168,7 +1168,7 @@ def test_dask_aft_survival() -> None:

 def test_dask_ranking(client: "Client") -> None:
    dpath = "demo/rank/"
-    mq2008 = tm.get_mq2008(dpath)
+    mq2008 = tm.data.get_mq2008(dpath)
    data = []
    for d in mq2008:
        if isinstance(d, scipy.sparse.csr_matrix):