Merge branch 'master' into dev-hui

2023-03-08 00:39:33 +01:00
parent f286ae5bfa f236640427
commit ed45aa2816
221 changed files with 3122 additions and 1486 deletions
--- a/tests/cpp/c_api/test_c_api.cc
+++ b/tests/cpp/c_api/test_c_api.cc
@@ -267,7 +267,7 @@ TEST(CAPI, DMatrixSetFeatureName) {
  }

  char const* feat_types [] {"i", "q"};
-  static_assert(sizeof(feat_types)/ sizeof(feat_types[0]) == kCols, "");
+  static_assert(sizeof(feat_types) / sizeof(feat_types[0]) == kCols);
  XGDMatrixSetStrFeatureInfo(handle, "feature_type", feat_types, kCols);
  char const **c_out_types;
  XGDMatrixGetStrFeatureInfo(handle, u8"feature_type", &out_len,
--- a/tests/cpp/common/test_algorithm.cc
+++ b/tests/cpp/common/test_algorithm.cc
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2020-2023 by XGBoost Contributors
+ */
+#include <gtest/gtest.h>
+#include <xgboost/context.h>  // Context
+#include <xgboost/span.h>
+
+#include <algorithm>  // is_sorted
+
+#include "../../../src/common/algorithm.h"
+
+namespace xgboost {
+namespace common {
+TEST(Algorithm, ArgSort) {
+  Context ctx;
+  std::vector<float> inputs{3.0, 2.0, 1.0};
+  auto ret = ArgSort<bst_feature_t>(&ctx, inputs.cbegin(), inputs.cend());
+  std::vector<bst_feature_t> sol{2, 1, 0};
+  ASSERT_EQ(ret, sol);
+}
+
+TEST(Algorithm, Sort) {
+  Context ctx;
+  ctx.Init(Args{{"nthread", "8"}});
+  std::vector<float> inputs{3.0, 1.0, 2.0};
+
+  Sort(&ctx, inputs.begin(), inputs.end(), std::less<>{});
+  ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend()));
+
+  inputs = {3.0, 1.0, 2.0};
+  StableSort(&ctx, inputs.begin(), inputs.end(), std::less<>{});
+  ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend()));
+}
+}  // namespace common
+}  // namespace xgboost
--- a/tests/cpp/common/test_algorithm.cu
+++ b/tests/cpp/common/test_algorithm.cu
@@ -52,9 +52,9 @@ void TestSegmentedArgSort() {
  }
 }

-TEST(Algorithms, SegmentedArgSort) { TestSegmentedArgSort(); }
+TEST(Algorithm, SegmentedArgSort) { TestSegmentedArgSort(); }

-TEST(Algorithms, ArgSort) {
+TEST(Algorithm, GpuArgSort) {
  Context ctx;
  ctx.gpu_id = 0;

@@ -80,7 +80,7 @@ TEST(Algorithms, ArgSort) {
      thrust::is_sorted(sorted_idx.begin() + 10, sorted_idx.end(), thrust::greater<size_t>{}));
 }

-TEST(Algorithms, SegmentedSequence) {
+TEST(Algorithm, SegmentedSequence) {
  dh::device_vector<std::size_t> idx(16);
  dh::device_vector<std::size_t> ptr(3);
  Context ctx = CreateEmptyGenericParam(0);
--- a/tests/cpp/common/test_charconv.cc
+++ b/tests/cpp/common/test_charconv.cc
@@ -128,7 +128,7 @@ TEST(Ryu, Regression) {
  TestRyu("2E2", 200.0f);
  TestRyu("3.3554432E7", 3.3554432E7f);

-  static_assert(1.1920929E-7f == std::numeric_limits<float>::epsilon(), "");
+  static_assert(1.1920929E-7f == std::numeric_limits<float>::epsilon());
  TestRyu("1.1920929E-7", std::numeric_limits<float>::epsilon());
 }

--- a/tests/cpp/common/test_common.cc
+++ b/tests/cpp/common/test_common.cc
@@ -1,14 +0,0 @@
-#include <gtest/gtest.h>
-#include <xgboost/span.h>
-#include "../../../src/common/common.h"
-
-namespace xgboost {
-namespace common {
-TEST(ArgSort, Basic) {
-  std::vector<float> inputs {3.0, 2.0, 1.0};
-  auto ret = ArgSort<bst_feature_t>(Span<float>{inputs});
-  std::vector<bst_feature_t> sol{2, 1, 0};
-  ASSERT_EQ(ret, sol);
-}
-}  // namespace common
-}  // namespace xgboost
--- a/tests/cpp/common/test_group_data.cc
+++ b/tests/cpp/common/test_group_data.cc
@@ -43,8 +43,8 @@ TEST(GroupData, ParallelGroupBuilder) {
  builder2.Push(2, Entry(0, 4), 0);
  builder2.Push(2, Entry(1, 5), 0);

-  expected_data.emplace_back(Entry(0, 4));
-  expected_data.emplace_back(Entry(1, 5));
+  expected_data.emplace_back(0, 4);
+  expected_data.emplace_back(1, 5);
  expected_offsets.emplace_back(6);

  EXPECT_EQ(data, expected_data);
--- a/tests/cpp/common/test_hist_util.cu
+++ b/tests/cpp/common/test_hist_util.cu
@@ -143,7 +143,7 @@ void TestMixedSketch() {
  size_t n_samples = 1000, n_features = 2, n_categories = 3;
  std::vector<float> data(n_samples * n_features);
  SimpleLCG gen;
-  SimpleRealUniformDistribution<float> cat_d{0.0f, float(n_categories)};
+  SimpleRealUniformDistribution<float> cat_d{0.0f, static_cast<float>(n_categories)};
  SimpleRealUniformDistribution<float> num_d{0.0f, 3.0f};
  for (size_t i = 0; i < n_samples * n_features; ++i) {
    if (i % 2 == 0) {
--- a/tests/cpp/common/test_intrusive_ptr.cc
+++ b/tests/cpp/common/test_intrusive_ptr.cc
@@ -13,9 +13,9 @@ class NotCopyConstructible {
  NotCopyConstructible(NotCopyConstructible&& that) = default;
 };
 static_assert(
-    !std::is_trivially_copy_constructible<NotCopyConstructible>::value, "");
+    !std::is_trivially_copy_constructible<NotCopyConstructible>::value);
 static_assert(
-    !std::is_trivially_copy_assignable<NotCopyConstructible>::value, "");
+    !std::is_trivially_copy_assignable<NotCopyConstructible>::value);

 class ForIntrusivePtrTest {
 public:
--- a/tests/cpp/common/test_linalg.cc
+++ b/tests/cpp/common/test_linalg.cc
@@ -1,22 +1,23 @@
-/*!
- * Copyright 2021 by XGBoost Contributors
+/**
+ * Copyright 2021-2023 by XGBoost Contributors
 */
 #include <gtest/gtest.h>
 #include <xgboost/context.h>
 #include <xgboost/host_device_vector.h>
 #include <xgboost/linalg.h>

-#include <numeric>
+#include <cstddef>  // size_t
+#include <numeric>  // iota
+#include <vector>

 #include "../../../src/common/linalg_op.h"

-namespace xgboost {
-namespace linalg {
+namespace xgboost::linalg {
 namespace {
 auto kCpuId = Context::kCpuId;
 }

-auto MakeMatrixFromTest(HostDeviceVector<float> *storage, size_t n_rows, size_t n_cols) {
+auto MakeMatrixFromTest(HostDeviceVector<float> *storage, std::size_t n_rows, std::size_t n_cols) {
  storage->Resize(n_rows * n_cols);
  auto &h_storage = storage->HostVector();

@@ -48,10 +49,11 @@ TEST(Linalg, VectorView) {
 }

 TEST(Linalg, TensorView) {
+  Context ctx;
  std::vector<double> data(2 * 3 * 4, 0);
  std::iota(data.begin(), data.end(), 0);

-  auto t = MakeTensorView(data, {2, 3, 4}, -1);
+  auto t = MakeTensorView(&ctx, data, 2, 3, 4);
  ASSERT_EQ(t.Shape()[0], 2);
  ASSERT_EQ(t.Shape()[1], 3);
  ASSERT_EQ(t.Shape()[2], 4);
@@ -106,12 +108,12 @@ TEST(Linalg, TensorView) {
  {
    // Don't assign the initial dimension, tensor should be able to deduce the correct dim
    // for Slice.
-    auto t = MakeTensorView(data, {2, 3, 4}, 0);
+    auto t = MakeTensorView(&ctx, data, 2, 3, 4);
    auto s = t.Slice(1, 2, All());
-    static_assert(decltype(s)::kDimension == 1, "");
+    static_assert(decltype(s)::kDimension == 1);
  }
  {
-    auto t = MakeTensorView(data, {2, 3, 4}, 0);
+    auto t = MakeTensorView(&ctx, data, 2, 3, 4);
    auto s = t.Slice(1, linalg::All(), 1);
    ASSERT_EQ(s(0), 13);
    ASSERT_EQ(s(1), 17);
@@ -119,9 +121,9 @@ TEST(Linalg, TensorView) {
  }
  {
    // range slice
-    auto t = MakeTensorView(data, {2, 3, 4}, 0);
+    auto t = MakeTensorView(&ctx, data, 2, 3, 4);
    auto s = t.Slice(linalg::All(), linalg::Range(1, 3), 2);
-    static_assert(decltype(s)::kDimension == 2, "");
+    static_assert(decltype(s)::kDimension == 2);
    std::vector<double> sol{6, 10, 18, 22};
    auto k = 0;
    for (size_t i = 0; i < s.Shape(0); ++i) {
@@ -134,9 +136,9 @@ TEST(Linalg, TensorView) {
  }
  {
    // range slice
-    auto t = MakeTensorView(data, {2, 3, 4}, 0);
+    auto t = MakeTensorView(&ctx, data, 2, 3, 4);
    auto s = t.Slice(1, linalg::Range(1, 3), linalg::Range(1, 3));
-    static_assert(decltype(s)::kDimension == 2, "");
+    static_assert(decltype(s)::kDimension == 2);
    std::vector<double> sol{17, 18, 21, 22};
    auto k = 0;
    for (size_t i = 0; i < s.Shape(0); ++i) {
@@ -149,9 +151,9 @@ TEST(Linalg, TensorView) {
  }
  {
    // same as no slice.
-    auto t = MakeTensorView(data, {2, 3, 4}, 0);
+    auto t = MakeTensorView(&ctx, data, 2, 3, 4);
    auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4));
-    static_assert(decltype(s)::kDimension == 3, "");
+    static_assert(decltype(s)::kDimension == 3);
    auto all = t.Slice(linalg::All(), linalg::All(), linalg::All());
    for (size_t i = 0; i < s.Shape(0); ++i) {
      for (size_t j = 0; j < s.Shape(1); ++j) {
@@ -166,7 +168,7 @@ TEST(Linalg, TensorView) {

  {
    // copy and move constructor.
-    auto t = MakeTensorView(data, {2, 3, 4}, kCpuId);
+    auto t = MakeTensorView(&ctx, data, 2, 3, 4);
    auto from_copy = t;
    auto from_move = std::move(t);
    for (size_t i = 0; i < t.Shape().size(); ++i) {
@@ -177,7 +179,7 @@ TEST(Linalg, TensorView) {

  {
    // multiple slices
-    auto t = MakeTensorView(data, {2, 3, 4}, kCpuId);
+    auto t = MakeTensorView(&ctx, data, 2, 3, 4);
    auto s_0 = t.Slice(linalg::All(), linalg::Range(0, 2), linalg::Range(1, 4));
    ASSERT_FALSE(s_0.CContiguous());
    auto s_1 = s_0.Slice(1, 1, linalg::Range(0, 2));
@@ -208,7 +210,7 @@ TEST(Linalg, TensorView) {

 TEST(Linalg, Tensor) {
  {
-    Tensor<float, 3> t{{2, 3, 4}, kCpuId};
+    Tensor<float, 3> t{{2, 3, 4}, kCpuId, Order::kC};
    auto view = t.View(kCpuId);

    auto const &as_const = t;
@@ -227,7 +229,7 @@ TEST(Linalg, Tensor) {
  }
  {
    // Reshape
-    Tensor<float, 3> t{{2, 3, 4}, kCpuId};
+    Tensor<float, 3> t{{2, 3, 4}, kCpuId, Order::kC};
    t.Reshape(4, 3, 2);
    ASSERT_EQ(t.Size(), 24);
    ASSERT_EQ(t.Shape(2), 2);
@@ -245,7 +247,7 @@ TEST(Linalg, Tensor) {

 TEST(Linalg, Empty) {
  {
-    auto t = TensorView<double, 2>{{}, {0, 3}, kCpuId};
+    auto t = TensorView<double, 2>{{}, {0, 3}, kCpuId, Order::kC};
    for (int32_t i : {0, 1, 2}) {
      auto s = t.Slice(All(), i);
      ASSERT_EQ(s.Size(), 0);
@@ -254,7 +256,7 @@ TEST(Linalg, Empty) {
    }
  }
  {
-    auto t = Tensor<double, 2>{{0, 3}, kCpuId};
+    auto t = Tensor<double, 2>{{0, 3}, kCpuId, Order::kC};
    ASSERT_EQ(t.Size(), 0);
    auto view = t.View(kCpuId);

@@ -269,7 +271,7 @@ TEST(Linalg, Empty) {

 TEST(Linalg, ArrayInterface) {
  auto cpu = kCpuId;
-  auto t = Tensor<double, 2>{{3, 3}, cpu};
+  auto t = Tensor<double, 2>{{3, 3}, cpu, Order::kC};
  auto v = t.View(cpu);
  std::iota(v.Values().begin(), v.Values().end(), 0);
  auto arr = Json::Load(StringView{ArrayInterfaceStr(v)});
@@ -313,21 +315,48 @@ TEST(Linalg, Popc) {
 }

 TEST(Linalg, Stack) {
-  Tensor<float, 3> l{{2, 3, 4}, kCpuId};
+  Tensor<float, 3> l{{2, 3, 4}, kCpuId, Order::kC};
  ElementWiseTransformHost(l.View(kCpuId), omp_get_max_threads(),
                           [=](size_t i, float) { return i; });
-  Tensor<float, 3> r_0{{2, 3, 4}, kCpuId};
+  Tensor<float, 3> r_0{{2, 3, 4}, kCpuId, Order::kC};
  ElementWiseTransformHost(r_0.View(kCpuId), omp_get_max_threads(),
                           [=](size_t i, float) { return i; });

  Stack(&l, r_0);

-  Tensor<float, 3> r_1{{0, 3, 4}, kCpuId};
+  Tensor<float, 3> r_1{{0, 3, 4}, kCpuId, Order::kC};
  Stack(&l, r_1);
  ASSERT_EQ(l.Shape(0), 4);

  Stack(&r_1, l);
  ASSERT_EQ(r_1.Shape(0), l.Shape(0));
 }
-}  // namespace linalg
-}  // namespace xgboost
+
+TEST(Linalg, FOrder) {
+  std::size_t constexpr kRows = 16, kCols = 3;
+  std::vector<float> data(kRows * kCols);
+  MatrixView<float> mat{data, {kRows, kCols}, Context::kCpuId, Order::kF};
+  float k{0};
+  for (std::size_t i = 0; i < kRows; ++i) {
+    for (std::size_t j = 0; j < kCols; ++j) {
+      mat(i, j) = k;
+      k++;
+    }
+  }
+  auto column = mat.Slice(linalg::All(), 1);
+  ASSERT_TRUE(column.FContiguous());
+  ASSERT_EQ(column.Stride(0), 1);
+  ASSERT_TRUE(column.CContiguous());
+  k = 1;
+  for (auto it = linalg::cbegin(column); it != linalg::cend(column); ++it) {
+    ASSERT_EQ(*it, k);
+    k += kCols;
+  }
+  k = 1;
+  auto ptr = column.Values().data();
+  for (auto it = ptr; it != ptr + kRows; ++it) {
+    ASSERT_EQ(*it, k);
+    k += kCols;
+  }
+}
+}  // namespace xgboost::linalg
--- a/tests/cpp/common/test_linalg.cu
+++ b/tests/cpp/common/test_linalg.cu
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2021-2022 by XGBoost Contributors
+/**
+ * Copyright 2021-2023 by XGBoost Contributors
 */
 #include <gtest/gtest.h>

@@ -7,8 +7,7 @@
 #include "xgboost/context.h"
 #include "xgboost/linalg.h"

-namespace xgboost {
-namespace linalg {
+namespace xgboost::linalg {
 namespace {
 void TestElementWiseKernel() {
  Tensor<float, 3> l{{2, 3, 4}, 0};
@@ -55,12 +54,14 @@ void TestElementWiseKernel() {
 }

 void TestSlice() {
+  Context ctx;
+  ctx.gpu_id = 1;
  thrust::device_vector<double> data(2 * 3 * 4);
-  auto t = MakeTensorView(dh::ToSpan(data), {2, 3, 4}, 0);
+  auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4);
  dh::LaunchN(1, [=] __device__(size_t) {
    auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4));
    auto all = t.Slice(linalg::All(), linalg::All(), linalg::All());
-    static_assert(decltype(s)::kDimension == 3, "");
+    static_assert(decltype(s)::kDimension == 3);
    for (size_t i = 0; i < s.Shape(0); ++i) {
      for (size_t j = 0; j < s.Shape(1); ++j) {
        for (size_t k = 0; k < s.Shape(2); ++k) {
@@ -75,5 +76,4 @@ void TestSlice() {
 TEST(Linalg, GPUElementWise) { TestElementWiseKernel(); }

 TEST(Linalg, GPUTensorView) { TestSlice(); }
-}  // namespace linalg
-}  // namespace xgboost
+}  // namespace xgboost::linalg
--- a/tests/cpp/common/test_random.cc
+++ b/tests/cpp/common/test_random.cc
@@ -2,16 +2,18 @@
 #include "../../../src/common/random.h"
 #include "../helpers.h"
 #include "gtest/gtest.h"
+#include "xgboost/context.h"  // Context

 namespace xgboost {
 namespace common {
 TEST(ColumnSampler, Test) {
+  Context ctx;
  int n = 128;
  ColumnSampler cs;
  std::vector<float> feature_weights;

  // No node sampling
-  cs.Init(n, feature_weights, 1.0f, 0.5f, 0.5f);
+  cs.Init(&ctx, n, feature_weights, 1.0f, 0.5f, 0.5f);
  auto set0 = cs.GetFeatureSet(0);
  ASSERT_EQ(set0->Size(), 32);

@@ -24,7 +26,7 @@ TEST(ColumnSampler, Test) {
  ASSERT_EQ(set2->Size(), 32);

  // Node sampling
-  cs.Init(n, feature_weights, 0.5f, 1.0f, 0.5f);
+  cs.Init(&ctx, n, feature_weights, 0.5f, 1.0f, 0.5f);
  auto set3 = cs.GetFeatureSet(0);
  ASSERT_EQ(set3->Size(), 32);

@@ -34,24 +36,25 @@ TEST(ColumnSampler, Test) {
  ASSERT_EQ(set4->Size(), 32);

  // No level or node sampling, should be the same at different depth
-  cs.Init(n, feature_weights, 1.0f, 1.0f, 0.5f);
+  cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 0.5f);
  ASSERT_EQ(cs.GetFeatureSet(0)->HostVector(),
            cs.GetFeatureSet(1)->HostVector());

-  cs.Init(n, feature_weights, 1.0f, 1.0f, 1.0f);
+  cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
  auto set5 = cs.GetFeatureSet(0);
  ASSERT_EQ(set5->Size(), n);
-  cs.Init(n, feature_weights, 1.0f, 1.0f, 1.0f);
+  cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
  auto set6 = cs.GetFeatureSet(0);
  ASSERT_EQ(set5->HostVector(), set6->HostVector());

  // Should always be a minimum of one feature
-  cs.Init(n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
+  cs.Init(&ctx, n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
  ASSERT_EQ(cs.GetFeatureSet(0)->Size(), 1);
 }

 // Test if different threads using the same seed produce the same result
 TEST(ColumnSampler, ThreadSynchronisation) {
+  Context ctx;
  const int64_t num_threads = 100;
  int n = 128;
  size_t iterations = 10;
@@ -63,7 +66,7 @@ TEST(ColumnSampler, ThreadSynchronisation) {
  {
    for (auto j = 0ull; j < iterations; j++) {
      ColumnSampler cs(j);
-      cs.Init(n, feature_weights, 0.5f, 0.5f, 0.5f);
+      cs.Init(&ctx, n, feature_weights, 0.5f, 0.5f, 0.5f);
      for (auto level = 0ull; level < levels; level++) {
        auto result = cs.GetFeatureSet(level)->ConstHostVector();
 #pragma omp single
@@ -80,11 +83,12 @@ TEST(ColumnSampler, ThreadSynchronisation) {

 TEST(ColumnSampler, WeightedSampling) {
  auto test_basic = [](int first) {
+    Context ctx;
    std::vector<float> feature_weights(2);
    feature_weights[0] = std::abs(first - 1.0f);
    feature_weights[1] = first - 0.0f;
    ColumnSampler cs{0};
-    cs.Init(2, feature_weights, 1.0, 1.0, 0.5);
+    cs.Init(&ctx, 2, feature_weights, 1.0, 1.0, 0.5);
    auto feature_sets = cs.GetFeatureSet(0);
    auto const &h_feat_set = feature_sets->HostVector();
    ASSERT_EQ(h_feat_set.size(), 1);
@@ -100,7 +104,8 @@ TEST(ColumnSampler, WeightedSampling) {
  SimpleRealUniformDistribution<float> dist(.0f, 12.0f);
  std::generate(feature_weights.begin(), feature_weights.end(), [&]() { return dist(&rng); });
  ColumnSampler cs{0};
-  cs.Init(kCols, feature_weights, 0.5f, 1.0f, 1.0f);
+  Context ctx;
+  cs.Init(&ctx, kCols, feature_weights, 0.5f, 1.0f, 1.0f);
  std::vector<bst_feature_t> features(kCols);
  std::iota(features.begin(), features.end(), 0);
  std::vector<float> freq(kCols, 0);
@@ -135,7 +140,8 @@ TEST(ColumnSampler, WeightedMultiSampling) {
  }
  ColumnSampler cs{0};
  float bytree{0.5}, bylevel{0.5}, bynode{0.5};
-  cs.Init(feature_weights.size(), feature_weights, bytree, bylevel, bynode);
+  Context ctx;
+  cs.Init(&ctx, feature_weights.size(), feature_weights, bytree, bylevel, bynode);
  auto feature_set = cs.GetFeatureSet(0);
  size_t n_sampled = kCols * bytree * bylevel * bynode;
  ASSERT_EQ(feature_set->Size(), n_sampled);
--- a/tests/cpp/common/test_span.cc
+++ b/tests/cpp/common/test_span.cc
@@ -522,9 +522,9 @@ TEST(Span, Empty) {
 TEST(SpanDeathTest, Empty) {
  std::vector<float> data(1, 0);
  ASSERT_TRUE(data.data());
-  Span<float> s{data.data(), Span<float>::index_type(0)};  // ok to define 0 size span.
+  // ok to define 0 size span.
+  Span<float> s{data.data(), static_cast<Span<float>::index_type>(0)};
  EXPECT_DEATH(s[0], "");  // not ok to use it.
 }
-
 }  // namespace common
 }  // namespace xgboost
--- a/tests/cpp/common/test_stats.cc
+++ b/tests/cpp/common/test_stats.cc
@@ -11,19 +11,20 @@
 namespace xgboost {
 namespace common {
 TEST(Stats, Quantile) {
+  Context ctx;
  {
    linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, Context::kCpuId);
    std::vector<size_t> index{0, 2, 3, 4, 6};
    auto h_arr = arr.HostView();
    auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(index[i]); });
    auto end = beg + index.size();
-    auto q = Quantile(0.40f, beg, end);
+    auto q = Quantile(&ctx, 0.40f, beg, end);
    ASSERT_EQ(q, 26.0);

-    q = Quantile(0.20f, beg, end);
+    q = Quantile(&ctx, 0.20f, beg, end);
    ASSERT_EQ(q, 16.0);

-    q = Quantile(0.10f, beg, end);
+    q = Quantile(&ctx, 0.10f, beg, end);
    ASSERT_EQ(q, 15.0);
  }

@@ -31,12 +32,13 @@ TEST(Stats, Quantile) {
    std::vector<float> vec{1., 2., 3., 4., 5.};
    auto beg = MakeIndexTransformIter([&](size_t i) { return vec[i]; });
    auto end = beg + vec.size();
-    auto q = Quantile(0.5f, beg, end);
+    auto q = Quantile(&ctx, 0.5f, beg, end);
    ASSERT_EQ(q, 3.);
  }
 }

 TEST(Stats, WeightedQuantile) {
+  Context ctx;
  linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, Context::kCpuId);
  linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, Context::kCpuId);

@@ -47,13 +49,13 @@ TEST(Stats, WeightedQuantile) {
  auto end = beg + arr.Size();
  auto w = MakeIndexTransformIter([&](size_t i) { return h_weight(i); });

-  auto q = WeightedQuantile(0.50f, beg, end, w);
+  auto q = WeightedQuantile(&ctx, 0.50f, beg, end, w);
  ASSERT_EQ(q, 3);

-  q = WeightedQuantile(0.0, beg, end, w);
+  q = WeightedQuantile(&ctx, 0.0, beg, end, w);
  ASSERT_EQ(q, 1);

-  q = WeightedQuantile(1.0, beg, end, w);
+  q = WeightedQuantile(&ctx, 1.0, beg, end, w);
  ASSERT_EQ(q, 5);
 }

--- a/tests/cpp/data/test_array_interface.cc
+++ b/tests/cpp/data/test_array_interface.cc
@@ -119,13 +119,13 @@ TEST(ArrayInterface, TrivialDim) {
 }

 TEST(ArrayInterface, ToDType) {
-  static_assert(ToDType<float>::kType == ArrayInterfaceHandler::kF4, "");
-  static_assert(ToDType<double>::kType == ArrayInterfaceHandler::kF8, "");
+  static_assert(ToDType<float>::kType == ArrayInterfaceHandler::kF4);
+  static_assert(ToDType<double>::kType == ArrayInterfaceHandler::kF8);

-  static_assert(ToDType<uint32_t>::kType == ArrayInterfaceHandler::kU4, "");
-  static_assert(ToDType<uint64_t>::kType == ArrayInterfaceHandler::kU8, "");
+  static_assert(ToDType<uint32_t>::kType == ArrayInterfaceHandler::kU4);
+  static_assert(ToDType<uint64_t>::kType == ArrayInterfaceHandler::kU8);

-  static_assert(ToDType<int32_t>::kType == ArrayInterfaceHandler::kI4, "");
-  static_assert(ToDType<int64_t>::kType == ArrayInterfaceHandler::kI8, "");
+  static_assert(ToDType<int32_t>::kType == ArrayInterfaceHandler::kI4);
+  static_assert(ToDType<int64_t>::kType == ArrayInterfaceHandler::kI8);
 }
 }  // namespace xgboost
--- a/tests/cpp/data/test_data.cc
+++ b/tests/cpp/data/test_data.cc
@@ -21,7 +21,7 @@ TEST(SparsePage, PushCSC) {

  offset = {0, 1, 4};
  for (size_t i = 0; i < offset.back(); ++i) {
-    data.emplace_back(Entry(i, 0.1f));
+    data.emplace_back(i, 0.1f);
  }

  SparsePage other;
--- a/tests/cpp/data/test_gradient_index.cc
+++ b/tests/cpp/data/test_gradient_index.cc
@@ -68,6 +68,30 @@ TEST(GradientIndex, FromCategoricalBasic) {
  }
 }

+TEST(GradientIndex, FromCategoricalLarge) {
+  size_t constexpr kRows = 1000, kCats = 512, kCols = 1;
+  bst_bin_t max_bins = 8;
+  auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats);
+  auto m = GetDMatrixFromData(x, kRows, 1);
+  Context ctx;
+
+  auto &h_ft = m->Info().feature_types.HostVector();
+  h_ft.resize(kCols, FeatureType::kCategorical);
+
+  BatchParam p{max_bins, 0.8};
+  {
+    GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {});
+    ASSERT_TRUE(gidx.index.GetBinTypeSize() == common::kUint16BinsTypeSize);
+  }
+  {
+    for (auto const &page : m->GetBatches<GHistIndexMatrix>(p)) {
+      common::HistogramCuts cut = page.cut;
+      GHistIndexMatrix gidx{m->Info(), std::move(cut), max_bins};
+      ASSERT_EQ(gidx.MaxNumBinPerFeat(), kCats);
+    }
+  }
+}
+
 TEST(GradientIndex, PushBatch) {
  size_t constexpr kRows = 64, kCols = 4;
  bst_bin_t max_bins = 64;
--- a/tests/cpp/data/test_simple_dmatrix.cu
+++ b/tests/cpp/data/test_simple_dmatrix.cu
@@ -189,8 +189,8 @@ TEST(SimpleCSRSource, FromColumnarSparse) {
    auto& mask = column_bitfields[0];
    mask.resize(8);

-    for (size_t j = 0; j < mask.size(); ++j) {
-      mask[j] = ~0;
+    for (auto && j : mask) {
+      j = ~0;
    }
    // the 2^th entry of first column is invalid
    // [0 0 0 0 0 1 0 0]
@@ -201,8 +201,8 @@ TEST(SimpleCSRSource, FromColumnarSparse) {
    auto& mask = column_bitfields[1];
    mask.resize(8);

-    for (size_t j = 0; j < mask.size(); ++j) {
-      mask[j] = ~0;
+    for (auto && j : mask) {
+      j = ~0;
    }
    // the 19^th entry of second column is invalid
    // [~0~], [~0~], [0 0 0 0 1 0 0 0]
--- a/tests/cpp/data/test_sparse_page_dmatrix.cc
+++ b/tests/cpp/data/test_sparse_page_dmatrix.cc
@@ -96,7 +96,7 @@ void TestRetainPage() {

  // make sure it's const and the caller can not modify the content of page.
  for (auto& page : m->GetBatches<Page>()) {
-    static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value, "");
+    static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
  }
 }

--- a/tests/cpp/data/test_sparse_page_dmatrix.cu
+++ b/tests/cpp/data/test_sparse_page_dmatrix.cu
@@ -1,5 +1,6 @@
-// Copyright by Contributors
-
+/**
+ * Copyright 2019-2023 by XGBoost Contributors
+ */
 #include "../../../src/common/compressed_iterator.h"
 #include "../../../src/data/ellpack_page.cuh"
 #include "../../../src/data/sparse_page_dmatrix.h"
@@ -69,7 +70,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
  std::vector<std::shared_ptr<EllpackPage const>> iterators;
  for (auto it = begin; it != end; ++it) {
    iterators.push_back(it.Page());
-    gidx_buffers.emplace_back(HostDeviceVector<common::CompressedByteT>{});
+    gidx_buffers.emplace_back();
    gidx_buffers.back().Resize((*it).Impl()->gidx_buffer.Size());
    gidx_buffers.back().Copy((*it).Impl()->gidx_buffer);
  }
@@ -87,7 +88,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {

  // make sure it's const and the caller can not modify the content of page.
  for (auto& page : m->GetBatches<EllpackPage>({0, 32})) {
-    static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value, "");
+    static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
  }

  // The above iteration clears out all references inside DMatrix.
--- a/tests/cpp/helpers.cc
+++ b/tests/cpp/helpers.cc
@@ -186,7 +186,7 @@ SimpleLCG::StateType SimpleLCG::operator()() {
 SimpleLCG::StateType SimpleLCG::Min() const { return min(); }
 SimpleLCG::StateType SimpleLCG::Max() const { return max(); }
 // Make sure it's compile time constant.
-static_assert(SimpleLCG::max() - SimpleLCG::min(), "");
+static_assert(SimpleLCG::max() - SimpleLCG::min());

 void RandomDataGenerator::GenerateDense(HostDeviceVector<float> *out) const {
  xgboost::SimpleRealUniformDistribution<bst_float> dist(lower_, upper_);
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@@ -46,7 +46,7 @@ class GradientBooster;

 template <typename Float>
 Float RelError(Float l, Float r) {
-  static_assert(std::is_floating_point<Float>::value, "");
+  static_assert(std::is_floating_point<Float>::value);
  return std::abs(1.0f - l / r);
 }

@@ -164,7 +164,7 @@ class SimpleRealUniformDistribution {
    ResultT sum_value = 0, r_k = 1;

    for (size_t k = m; k != 0; --k) {
-      sum_value += ResultT((*rng)() - rng->Min()) * r_k;
+      sum_value += static_cast<ResultT>((*rng)() - rng->Min()) * r_k;
      r_k *= r;
    }

@@ -191,12 +191,10 @@ Json GetArrayInterface(HostDeviceVector<T> *storage, size_t rows, size_t cols) {
  Json array_interface{Object()};
  array_interface["data"] = std::vector<Json>(2);
  if (storage->DeviceCanRead()) {
-    array_interface["data"][0] =
-        Integer(reinterpret_cast<int64_t>(storage->ConstDevicePointer()));
+    array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(storage->ConstDevicePointer())};
    array_interface["stream"] = nullptr;
  } else {
-    array_interface["data"][0] =
-        Integer(reinterpret_cast<int64_t>(storage->ConstHostPointer()));
+    array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(storage->ConstHostPointer())};
  }
  array_interface["data"][1] = Boolean(false);

--- a/tests/cpp/objective/test_objective.cc
+++ b/tests/cpp/objective/test_objective.cc
@@ -1,4 +1,6 @@
-// Copyright by Contributors
+/**
+ * Copyright 2016-2023 by XGBoost contributors
+ */
 #include <gtest/gtest.h>
 #include <xgboost/context.h>
 #include <xgboost/objective.h>
@@ -25,11 +27,14 @@ TEST(Objective, PredTransform) {
  tparam.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
  size_t n = 100;

-  for (const auto &entry :
-       ::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) {
-    std::unique_ptr<xgboost::ObjFunction> obj{
-        xgboost::ObjFunction::Create(entry->name, &tparam)};
-    obj->Configure(Args{{"num_class", "2"}});
+  for (const auto& entry : ::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) {
+    std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create(entry->name, &tparam)};
+    if (entry->name.find("multi") != std::string::npos) {
+      obj->Configure(Args{{"num_class", "2"}});
+    }
+    if (entry->name.find("quantile") != std::string::npos) {
+      obj->Configure(Args{{"quantile_alpha", "0.5"}});
+    }
    HostDeviceVector<float> predts;
    predts.Resize(n, 3.14f);  // prediction is performed on host.
    ASSERT_FALSE(predts.DeviceCanRead());
--- a/tests/cpp/objective/test_quantile_obj.cc
+++ b/tests/cpp/objective/test_quantile_obj.cc
@@ -0,0 +1,74 @@
+/**
+ * Copyright 2023 by XGBoost contributors
+ */
+#include <gtest/gtest.h>
+#include <xgboost/base.h>       // Args
+#include <xgboost/context.h>    // Context
+#include <xgboost/objective.h>  // ObjFunction
+#include <xgboost/span.h>       // Span
+
+#include <memory>               // std::unique_ptr
+#include <vector>               // std::vector
+
+#include "../helpers.h"         // CheckConfigReload,CreateEmptyGenericParam,DeclareUnifiedTest
+
+namespace xgboost {
+TEST(Objective, DeclareUnifiedTest(Quantile)) {
+  Context ctx = CreateEmptyGenericParam(GPUIDX);
+
+  {
+    Args args{{"quantile_alpha", "[0.6, 0.8]"}};
+    std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:quantileerror", &ctx)};
+    obj->Configure(args);
+    CheckConfigReload(obj, "reg:quantileerror");
+  }
+
+  Args args{{"quantile_alpha", "0.6"}};
+  std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:quantileerror", &ctx)};
+  obj->Configure(args);
+  CheckConfigReload(obj, "reg:quantileerror");
+
+  std::vector<float> predts{1.0f, 2.0f, 3.0f};
+  std::vector<float> labels{3.0f, 2.0f, 1.0f};
+  std::vector<float> weights{1.0f, 1.0f, 1.0f};
+  std::vector<float> grad{-0.6f, 0.4f, 0.4f};
+  std::vector<float> hess = weights;
+  CheckObjFunction(obj, predts, labels, weights, grad, hess);
+}
+
+TEST(Objective, DeclareUnifiedTest(QuantileIntercept)) {
+  Context ctx = CreateEmptyGenericParam(GPUIDX);
+  Args args{{"quantile_alpha", "[0.6, 0.8]"}};
+  std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:quantileerror", &ctx)};
+  obj->Configure(args);
+
+  MetaInfo info;
+  info.num_row_ = 10;
+  info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {
+    data->SetDevice(ctx.gpu_id);
+    data->Resize(info.num_row_);
+    shape[0] = info.num_row_;
+    shape[1] = 1;
+
+    auto& h_labels = data->HostVector();
+    for (std::size_t i = 0; i < info.num_row_; ++i) {
+      h_labels[i] = i;
+    }
+  });
+
+  linalg::Vector<float> base_scores;
+  obj->InitEstimation(info, &base_scores);
+  ASSERT_EQ(base_scores.Size(), 1) << "Vector is not yet supported.";
+  // mean([5.6, 7.8])
+  ASSERT_NEAR(base_scores(0), 6.7, kRtEps);
+
+  for (std::size_t i = 0; i < info.num_row_; ++i) {
+    info.weights_.HostVector().emplace_back(info.num_row_ - i - 1.0);
+  }
+
+  obj->InitEstimation(info, &base_scores);
+  ASSERT_EQ(base_scores.Size(), 1) << "Vector is not yet supported.";
+  // mean([3, 5])
+  ASSERT_NEAR(base_scores(0), 4.0, kRtEps);
+}
+}  // namespace xgboost
--- a/tests/cpp/objective/test_quantile_obj_gpu.cu
+++ b/tests/cpp/objective/test_quantile_obj_gpu.cu
@@ -0,0 +1,5 @@
+/**
+ * Copyright 2023 XGBoost contributors
+ */
+// Dummy file to enable the CUDA tests.
+#include "test_quantile_obj.cc"
--- a/tests/cpp/objective/test_regression_obj.cc
+++ b/tests/cpp/objective/test_regression_obj.cc
@@ -6,8 +6,9 @@
 #include <xgboost/json.h>
 #include <xgboost/objective.h>

-#include "../../../src/common/linalg_op.h"  // begin,end
+#include "../../../src/common/linalg_op.h"  // for begin, end
 #include "../../../src/objective/adaptive.h"
+#include "../../../src/tree/param.h"        // for TrainParam
 #include "../helpers.h"
 #include "xgboost/base.h"
 #include "xgboost/data.h"
@@ -157,7 +158,7 @@ TEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) {
    ObjFunction::Create("count:poisson", &ctx)
  };

-  args.emplace_back(std::make_pair("max_delta_step", "0.1f"));
+  args.emplace_back("max_delta_step", "0.1f");
  obj->Configure(args);

  CheckObjFunction(obj,
@@ -259,7 +260,7 @@ TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) {
  std::vector<std::pair<std::string, std::string>> args;
  std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:tweedie", &ctx)};

-  args.emplace_back(std::make_pair("tweedie_variance_power", "1.1f"));
+  args.emplace_back("tweedie_variance_power", "1.1f");
  obj->Configure(args);

  CheckObjFunction(obj,
@@ -408,9 +409,13 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteError)) {
    h_predt[i] = labels[i] + i;
  }

-  obj->UpdateTreeLeaf(position, info, predt, 0, &tree);
-  ASSERT_EQ(tree[1].LeafValue(), -1);
-  ASSERT_EQ(tree[2].LeafValue(), -4);
+  tree::TrainParam param;
+  param.Init(Args{});
+  auto lr = param.learning_rate;
+
+  obj->UpdateTreeLeaf(position, info, param.learning_rate, predt, 0, &tree);
+  ASSERT_EQ(tree[1].LeafValue(), -1.0f * lr);
+  ASSERT_EQ(tree[2].LeafValue(), -4.0f * lr);
 }

 TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
@@ -428,8 +433,8 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
    auto h_labels = info.labels.HostView().Slice(linalg::All(), t);
    std::iota(linalg::begin(h_labels), linalg::end(h_labels), 0);

-    auto h_predt = linalg::MakeTensorView(predt.HostSpan(), {kRows, kTargets}, Context::kCpuId)
-                       .Slice(linalg::All(), t);
+    auto h_predt =
+        linalg::MakeTensorView(&ctx, predt.HostSpan(), kRows, kTargets).Slice(linalg::All(), t);
    for (size_t i = 0; i < h_predt.Size(); ++i) {
      h_predt(i) = h_labels(i) + i;
    }
@@ -457,11 +462,16 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
    ASSERT_EQ(tree.GetNumLeaves(), 4);

    auto empty_leaf = tree[4].LeafValue();
-    obj->UpdateTreeLeaf(position, info, predt, t, &tree);
-    ASSERT_EQ(tree[3].LeafValue(), -5);
-    ASSERT_EQ(tree[4].LeafValue(), empty_leaf);
-    ASSERT_EQ(tree[5].LeafValue(), -10);
-    ASSERT_EQ(tree[6].LeafValue(), -14);
+
+    tree::TrainParam param;
+    param.Init(Args{});
+    auto lr = param.learning_rate;
+
+    obj->UpdateTreeLeaf(position, info, lr, predt, t, &tree);
+    ASSERT_EQ(tree[3].LeafValue(), -5.0f * lr);
+    ASSERT_EQ(tree[4].LeafValue(), empty_leaf * lr);
+    ASSERT_EQ(tree[5].LeafValue(), -10.0f * lr);
+    ASSERT_EQ(tree[6].LeafValue(), -14.0f * lr);
  }
 }

--- a/tests/cpp/test_cache.cc
+++ b/tests/cpp/test_cache.cc
@@ -3,16 +3,18 @@
 */
 #include <gtest/gtest.h>
 #include <xgboost/cache.h>
-#include <xgboost/data.h>  // DMatrix
+#include <xgboost/data.h>  // for DMatrix

-#include <cstddef>         // std::size_t
+#include <cstddef>         // for size_t
+#include <cstdint>         // for uint32_t
+#include <thread>          // for thread

-#include "helpers.h"       // RandomDataGenerator
+#include "helpers.h"       // for RandomDataGenerator

 namespace xgboost {
 namespace {
 struct CacheForTest {
-  std::size_t i;
+  std::size_t const i;

  explicit CacheForTest(std::size_t k) : i{k} {}
 };
@@ -20,7 +22,7 @@ struct CacheForTest {

 TEST(DMatrixCache, Basic) {
  std::size_t constexpr kRows = 2, kCols = 1, kCacheSize = 4;
-  DMatrixCache<CacheForTest> cache(kCacheSize);
+  DMatrixCache<CacheForTest> cache{kCacheSize};

  auto add_cache = [&]() {
    // Create a lambda function here, so that p_fmat gets deleted upon the
@@ -52,4 +54,63 @@ TEST(DMatrixCache, Basic) {
    }
  }
 }
+
+TEST(DMatrixCache, MultiThread) {
+  std::size_t constexpr kRows = 2, kCols = 1, kCacheSize = 3;
+  auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
+
+  auto n = std::thread::hardware_concurrency() * 128u;
+  CHECK_NE(n, 0);
+  std::vector<std::shared_ptr<CacheForTest>> results(n);
+
+  {
+    DMatrixCache<CacheForTest> cache{kCacheSize};
+    std::vector<std::thread> tasks;
+    for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
+      tasks.emplace_back([&, i = tidx]() {
+        cache.CacheItem(p_fmat, i);
+
+        auto p_fmat_local = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
+        results[i] = cache.CacheItem(p_fmat_local, i);
+      });
+    }
+    for (auto& t : tasks) {
+      t.join();
+    }
+    for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
+      ASSERT_EQ(results[tidx]->i, tidx);
+    }
+
+    tasks.clear();
+
+    for (std::int32_t tidx = static_cast<std::int32_t>(n - 1); tidx >= 0; --tidx) {
+      tasks.emplace_back([&, i = tidx]() {
+        cache.CacheItem(p_fmat, i);
+
+        auto p_fmat_local = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
+        results[i] = cache.CacheItem(p_fmat_local, i);
+      });
+    }
+    for (auto& t : tasks) {
+      t.join();
+    }
+    for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
+      ASSERT_EQ(results[tidx]->i, tidx);
+    }
+  }
+
+  {
+    DMatrixCache<CacheForTest> cache{n};
+    std::vector<std::thread> tasks;
+    for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
+      tasks.emplace_back([&, tidx]() { results[tidx] = cache.CacheItem(p_fmat, tidx); });
+    }
+    for (auto& t : tasks) {
+      t.join();
+    }
+    for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
+      ASSERT_EQ(results[tidx]->i, tidx);
+    }
+  }
+}
 }  // namespace xgboost
--- a/tests/cpp/tree/hist/test_evaluate_splits.cc
+++ b/tests/cpp/tree/hist/test_evaluate_splits.cc
@@ -9,12 +9,14 @@
 #include "../../../../src/tree/hist/evaluate_splits.h"
 #include "../test_evaluate_splits.h"
 #include "../../helpers.h"
+#include "xgboost/context.h"  // Context

 namespace xgboost {
 namespace tree {
 void TestEvaluateSplits(bool force_read_by_column) {
+  Context ctx;
+  ctx.nthread = 4;
  int static constexpr kRows = 8, kCols = 16;
-  int32_t n_threads = std::min(omp_get_max_threads(), 4);
  auto sampler = std::make_shared<common::ColumnSampler>();

  TrainParam param;
@@ -22,7 +24,7 @@ void TestEvaluateSplits(bool force_read_by_column) {

  auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();

-  auto evaluator = HistEvaluator<CPUExpandEntry>{param, dmat->Info(), n_threads, sampler};
+  auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
  common::HistCollection hist;
  std::vector<GradientPair> row_gpairs = {
      {1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f},  {2.27f, 0.28f},
@@ -86,13 +88,15 @@ TEST(HistEvaluator, Evaluate) {
 }

 TEST(HistEvaluator, Apply) {
+  Context ctx;
+  ctx.nthread = 4;
  RegTree tree;
  int static constexpr kNRows = 8, kNCols = 16;
  TrainParam param;
  param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}});
  auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
  auto sampler = std::make_shared<common::ColumnSampler>();
-  auto evaluator_ = HistEvaluator<CPUExpandEntry>{param, dmat->Info(), 4, sampler};
+  auto evaluator_ = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};

  CPUExpandEntry entry{0, 0, 10.0f};
  entry.split.left_sum = GradStats{0.4, 0.6f};
@@ -115,10 +119,11 @@ TEST(HistEvaluator, Apply) {
 }

 TEST_F(TestPartitionBasedSplit, CPUHist) {
+  Context ctx;
  // check the evaluator is returning the optimal split
  std::vector<FeatureType> ft{FeatureType::kCategorical};
  auto sampler = std::make_shared<common::ColumnSampler>();
-  HistEvaluator<CPUExpandEntry> evaluator{param_, info_, AllThreadsForTest(), sampler};
+  HistEvaluator<CPUExpandEntry> evaluator{&ctx, &param_, info_, sampler};
  evaluator.InitRoot(GradStats{total_gpair_});
  RegTree tree;
  std::vector<CPUExpandEntry> entries(1);
@@ -128,6 +133,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {

 namespace {
 auto CompareOneHotAndPartition(bool onehot) {
+  Context ctx;
  int static constexpr kRows = 128, kCols = 1;
  std::vector<FeatureType> ft(kCols, FeatureType::kCategorical);

@@ -147,8 +153,7 @@ auto CompareOneHotAndPartition(bool onehot) {
      RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();

  auto sampler = std::make_shared<common::ColumnSampler>();
-  auto evaluator =
-      HistEvaluator<CPUExpandEntry>{param, dmat->Info(), AllThreadsForTest(), sampler};
+  auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
  std::vector<CPUExpandEntry> entries(1);

  for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({32, param.sparse_threshold})) {
@@ -198,8 +203,8 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
  MetaInfo info;
  info.num_col_ = 1;
  info.feature_types = {FeatureType::kCategorical};
-  auto evaluator =
-      HistEvaluator<CPUExpandEntry>{param_, info, AllThreadsForTest(), sampler};
+  Context ctx;
+  auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param_, info, sampler};
  evaluator.InitRoot(GradStats{parent_sum_});

  std::vector<CPUExpandEntry> entries(1);
--- a/tests/cpp/tree/hist/test_histogram.cc
+++ b/tests/cpp/tree/hist/test_histogram.cc
@@ -48,7 +48,7 @@ void TestAddHistRows(bool is_distributed) {

  HistogramBuilder<CPUExpandEntry> histogram_builder;
  histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1,
-                          is_distributed);
+                          is_distributed, false);
  histogram_builder.AddHistRows(&starting_index, &sync_count,
                                nodes_for_explicit_hist_build_,
                                nodes_for_subtraction_trick_, &tree);
@@ -86,7 +86,7 @@ void TestSyncHist(bool is_distributed) {

  HistogramBuilder<CPUExpandEntry> histogram;
  uint32_t total_bins = gmat.cut.Ptrs().back();
-  histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed);
+  histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed, false);

  common::RowSetCollection row_set_collection_;
  {
@@ -226,11 +226,14 @@ TEST(CPUHistogram, SyncHist) {
  TestSyncHist(false);
 }

-void TestBuildHistogram(bool is_distributed, bool force_read_by_column) {
+void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_col_split) {
  size_t constexpr kNRows = 8, kNCols = 16;
  int32_t constexpr kMaxBins = 4;
-  auto p_fmat =
-      RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
+  auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
+  if (is_col_split) {
+    p_fmat = std::shared_ptr<DMatrix>{
+        p_fmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
+  }
  auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
  uint32_t total_bins = gmat.cut.Ptrs().back();

@@ -241,7 +244,8 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column) {

  bst_node_t nid = 0;
  HistogramBuilder<CPUExpandEntry> histogram;
-  histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed);
+  histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed,
+                  is_col_split);

  RegTree tree;

@@ -284,11 +288,16 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column) {
 }

 TEST(CPUHistogram, BuildHist) {
-  TestBuildHistogram(true, false);
-  TestBuildHistogram(false, false);
-  TestBuildHistogram(true, true);
-  TestBuildHistogram(false, true);
+  TestBuildHistogram(true, false, false);
+  TestBuildHistogram(false, false, false);
+  TestBuildHistogram(true, true, false);
+  TestBuildHistogram(false, true, false);
+}

+TEST(CPUHistogram, BuildHistColSplit) {
+  auto constexpr kWorkers = 4;
+  RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, true, true);
+  RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, false, true);
 }

 namespace {
@@ -340,7 +349,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
  HistogramBuilder<CPUExpandEntry> cat_hist;
  for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
    auto total_bins = gidx.cut.TotalBins();
-    cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false);
+    cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
    cat_hist.BuildHist(0, gidx, &tree, row_set_collection,
                        nodes_for_explicit_hist_build, {}, gpair.HostVector(),
                        force_read_by_column);
@@ -354,7 +363,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
  HistogramBuilder<CPUExpandEntry> onehot_hist;
  for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
    auto total_bins = gidx.cut.TotalBins();
-    onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false);
+    onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
    onehot_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
                          gpair.HostVector(),
                          force_read_by_column);
@@ -419,7 +428,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
        1, [&](size_t nidx_in_set) { return partition_size.at(nidx_in_set); },
        256};

-    multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false);
+    multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false, false);

    size_t page_idx{0};
    for (auto const &page : m->GetBatches<GHistIndexMatrix>(batch_param)) {
@@ -440,7 +449,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
    common::RowSetCollection row_set_collection;
    InitRowPartitionForTest(&row_set_collection, n_samples);

-    single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false);
+    single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false, false);
    SparsePage concat;
    std::vector<float> hess(m->Info().num_row_, 1.0f);
    for (auto const& page : m->GetBatches<SparsePage>()) {
--- a/tests/cpp/tree/test_approx.cc
+++ b/tests/cpp/tree/test_approx.cc
@@ -10,29 +10,36 @@

 namespace xgboost {
 namespace tree {
-TEST(Approx, Partitioner) {
-  size_t n_samples = 1024, n_features = 1, base_rowid = 0;
-  Context ctx;
-  CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
-  ASSERT_EQ(partitioner.base_rowid, base_rowid);
-  ASSERT_EQ(partitioner.Size(), 1);
-  ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
-
-  auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
-  ctx.InitAllowUnknown(Args{});
-  std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};

+namespace {
+std::vector<float> GenerateHess(size_t n_samples) {
  auto grad = GenerateRandomGradients(n_samples);
  std::vector<float> hess(grad.Size());
  std::transform(grad.HostVector().cbegin(), grad.HostVector().cend(), hess.begin(),
                 [](auto gpair) { return gpair.GetHess(); });
+  return hess;
+}
+}  // anonymous namespace
+
+TEST(Approx, Partitioner) {
+  size_t n_samples = 1024, n_features = 1, base_rowid = 0;
+  Context ctx;
+  ctx.InitAllowUnknown(Args{});
+  CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
+  ASSERT_EQ(partitioner.base_rowid, base_rowid);
+  ASSERT_EQ(partitioner.Size(), 1);
+  ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
+
+  auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
+  auto hess = GenerateHess(n_samples);
+  std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};

  for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
    bst_feature_t const split_ind = 0;
    {
      auto min_value = page.cut.MinValues()[split_ind];
      RegTree tree;
-      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
+      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
      GetSplit(&tree, min_value, &candidates);
      partitioner.UpdatePosition(&ctx, page, candidates, &tree);
      ASSERT_EQ(partitioner.Size(), 3);
@@ -40,7 +47,7 @@ TEST(Approx, Partitioner) {
      ASSERT_EQ(partitioner[2].Size(), n_samples);
    }
    {
-      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
+      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
      auto ptr = page.cut.Ptrs()[split_ind + 1];
      float split_value = page.cut.Values().at(ptr / 2);
      RegTree tree;
@@ -66,12 +73,85 @@ TEST(Approx, Partitioner) {
  }
 }

+namespace {
+void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared_ptr<DMatrix> Xy,
+                                std::vector<float>* hess, float min_value, float mid_value,
+                                CommonRowPartitioner const& expected_mid_partitioner) {
+  auto dmat =
+      std::unique_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
+  std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
+  Context ctx;
+  ctx.InitAllowUnknown(Args{});
+  for (auto const& page : dmat->GetBatches<GHistIndexMatrix>({64, *hess, true})) {
+    {
+      RegTree tree;
+      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
+      GetSplit(&tree, min_value, &candidates);
+      partitioner.UpdatePosition(&ctx, page, candidates, &tree);
+      ASSERT_EQ(partitioner.Size(), 3);
+      ASSERT_EQ(partitioner[1].Size(), 0);
+      ASSERT_EQ(partitioner[2].Size(), n_samples);
+    }
+    {
+      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
+      RegTree tree;
+      GetSplit(&tree, mid_value, &candidates);
+      partitioner.UpdatePosition(&ctx, page, candidates, &tree);
+
+      auto left_nidx = tree[RegTree::kRoot].LeftChild();
+      auto elem = partitioner[left_nidx];
+      ASSERT_LT(elem.Size(), n_samples);
+      ASSERT_GT(elem.Size(), 1);
+      auto expected_elem = expected_mid_partitioner[left_nidx];
+      ASSERT_EQ(elem.Size(), expected_elem.Size());
+      for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
+        ASSERT_EQ(*it, *eit);
+      }
+
+      auto right_nidx = tree[RegTree::kRoot].RightChild();
+      elem = partitioner[right_nidx];
+      expected_elem = expected_mid_partitioner[right_nidx];
+      ASSERT_EQ(elem.Size(), expected_elem.Size());
+      for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
+        ASSERT_EQ(*it, *eit);
+      }
+    }
+  }
+}
+}  // anonymous namespace
+
+TEST(Approx, PartitionerColSplit) {
+  size_t n_samples = 1024, n_features = 16, base_rowid = 0;
+  auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
+  auto hess = GenerateHess(n_samples);
+  std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
+
+  float min_value, mid_value;
+  Context ctx;
+  ctx.InitAllowUnknown(Args{});
+  CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false};
+  for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
+    bst_feature_t const split_ind = 0;
+    min_value = page.cut.MinValues()[split_ind];
+
+    auto ptr = page.cut.Ptrs()[split_ind + 1];
+    mid_value = page.cut.Values().at(ptr / 2);
+    RegTree tree;
+    GetSplit(&tree, mid_value, &candidates);
+    mid_partitioner.UpdatePosition(&ctx, page, candidates, &tree);
+  }
+
+  auto constexpr kWorkers = 4;
+  RunWithInMemoryCommunicator(kWorkers, TestColumnSplitPartitioner, n_samples, base_rowid, Xy,
+                              &hess, min_value, mid_value, mid_partitioner);
+}
+
 namespace {
 void TestLeafPartition(size_t n_samples) {
  size_t const n_features = 2, base_rowid = 0;
  Context ctx;
  common::RowSetCollection row_set;
-  CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
+  CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};

  auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
  std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
--- a/tests/cpp/tree/test_evaluate_splits.h
+++ b/tests/cpp/tree/test_evaluate_splits.h
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2022 by XGBoost Contributors
+/**
+ * Copyright 2022-2023 by XGBoost Contributors
 */
 #include <gtest/gtest.h>
 #include <xgboost/data.h>
@@ -12,8 +12,7 @@
 #include "../../../src/tree/split_evaluator.h"
 #include "../helpers.h"

-namespace xgboost {
-namespace tree {
+namespace xgboost::tree {
 /**
 * \brief Enumerate all possible partitions for categorical split.
 */
@@ -151,5 +150,4 @@ class TestCategoricalSplitWithMissing : public testing::Test {
    ASSERT_EQ(right_sum.GetHess(), parent_sum_.GetHess() - left_sum.GetHess());
  }
 };
-}  // namespace tree
-}  // namespace xgboost
+}  // namespace xgboost::tree
--- a/tests/cpp/tree/test_gpu_hist.cu
+++ b/tests/cpp/tree/test_gpu_hist.cu
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2017-2022 XGBoost contributors
+/**
+ * Copyright 2017-2023 by XGBoost contributors
 */
 #include <gtest/gtest.h>
 #include <thrust/device_vector.h>
@@ -13,6 +13,7 @@
 #include "../../../src/common/common.h"
 #include "../../../src/data/sparse_page_source.h"
 #include "../../../src/tree/constraints.cuh"
+#include "../../../src/tree/param.h"  // for TrainParam
 #include "../../../src/tree/updater_gpu_common.cuh"
 #include "../../../src/tree/updater_gpu_hist.cu"
 #include "../filesystem.h"  // dmlc::TemporaryDirectory
@@ -21,8 +22,7 @@
 #include "xgboost/context.h"
 #include "xgboost/json.h"

-namespace xgboost {
-namespace tree {
+namespace xgboost::tree {
 TEST(GpuHist, DeviceHistogram) {
  // Ensures that node allocates correctly after reaching `kStopGrowingSize`.
  dh::safe_cuda(cudaSetDevice(0));
@@ -83,11 +83,12 @@ void TestBuildHist(bool use_shared_memory_histograms) {
  int const kNRows = 16, kNCols = 8;

  TrainParam param;
-  std::vector<std::pair<std::string, std::string>> args {
-    {"max_depth", "6"},
-    {"max_leaves", "0"},
+  Args args{
+      {"max_depth", "6"},
+      {"max_leaves", "0"},
  };
  param.Init(args);
+
  auto page = BuildEllpackPage(kNRows, kNCols);
  BatchParam batch_param{};
  Context ctx{CreateEmptyGenericParam(0)};
@@ -168,7 +169,6 @@ void TestHistogramIndexImpl() {
  int constexpr kNRows = 1000, kNCols = 10;

  // Build 2 matrices and build a histogram maker with that
-
  Context ctx(CreateEmptyGenericParam(0));
  tree::GPUHistMaker hist_maker{&ctx, ObjInfo{ObjInfo::kRegression}},
      hist_maker_ext{&ctx, ObjInfo{ObjInfo::kRegression}};
@@ -179,15 +179,14 @@ void TestHistogramIndexImpl() {
  std::unique_ptr<DMatrix> hist_maker_ext_dmat(
    CreateSparsePageDMatrixWithRC(kNRows, kNCols, 128UL, true, tempdir));

-  std::vector<std::pair<std::string, std::string>> training_params = {
-    {"max_depth", "10"},
-    {"max_leaves", "0"}
-  };
+  Args training_params = {{"max_depth", "10"}, {"max_leaves", "0"}};
+  TrainParam param;
+  param.UpdateAllowUnknown(training_params);

  hist_maker.Configure(training_params);
-  hist_maker.InitDataOnce(hist_maker_dmat.get());
+  hist_maker.InitDataOnce(&param, hist_maker_dmat.get());
  hist_maker_ext.Configure(training_params);
-  hist_maker_ext.InitDataOnce(hist_maker_ext_dmat.get());
+  hist_maker_ext.InitDataOnce(&param, hist_maker_ext_dmat.get());

  // Extract the device maker from the histogram makers and from that its compressed
  // histogram index
@@ -237,13 +236,15 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
      {"subsample", std::to_string(subsample)},
      {"sampling_method", sampling_method},
  };
+  TrainParam param;
+  param.UpdateAllowUnknown(args);

  Context ctx(CreateEmptyGenericParam(0));
  tree::GPUHistMaker hist_maker{&ctx,ObjInfo{ObjInfo::kRegression}};
-  hist_maker.Configure(args);

  std::vector<HostDeviceVector<bst_node_t>> position(1);
-  hist_maker.Update(gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position}, {tree});
+  hist_maker.Update(&param, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
+                    {tree});
  auto cache = linalg::VectorView<float>{preds->DeviceSpan(), {preds->Size()}, 0};
  hist_maker.UpdatePredictionCache(dmat, cache);
 }
@@ -391,13 +392,11 @@ TEST(GpuHist, ConfigIO) {
  Json j_updater { Object() };
  updater->SaveConfig(&j_updater);
  ASSERT_TRUE(IsA<Object>(j_updater["gpu_hist_train_param"]));
-  ASSERT_TRUE(IsA<Object>(j_updater["train_param"]));
  updater->LoadConfig(j_updater);

  Json j_updater_roundtrip { Object() };
  updater->SaveConfig(&j_updater_roundtrip);
  ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["gpu_hist_train_param"]));
-  ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["train_param"]));

  ASSERT_EQ(j_updater, j_updater_roundtrip);
 }
@@ -414,5 +413,4 @@ TEST(GpuHist, MaxDepth) {

  ASSERT_THROW({learner->UpdateOneIter(0, p_mat);}, dmlc::Error);
 }
-}  // namespace tree
-}  // namespace xgboost
+}  // namespace xgboost::tree
--- a/tests/cpp/tree/test_histmaker.cc
+++ b/tests/cpp/tree/test_histmaker.cc
@@ -1,33 +1,42 @@
+/**
+ * Copyright 2019-2023 by XGBoost Contributors
+ */
 #include <gtest/gtest.h>
-
 #include <xgboost/tree_model.h>
 #include <xgboost/tree_updater.h>

+#include "../../../src/tree/param.h"  // for TrainParam
 #include "../helpers.h"

-namespace xgboost {
-namespace tree {
+namespace xgboost::tree {
+std::shared_ptr<DMatrix> GenerateDMatrix(std::size_t rows, std::size_t cols){
+  return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix();
+}

-TEST(GrowHistMaker, InteractionConstraint) {
-  size_t constexpr kRows = 32;
-  size_t constexpr kCols = 16;
-
-  Context ctx;
-
-  auto p_dmat = RandomDataGenerator{kRows, kCols, 0.6f}.Seed(3).GenerateDMatrix();
-
-  HostDeviceVector<GradientPair> gradients (kRows);
-  std::vector<GradientPair>& h_gradients = gradients.HostVector();
+std::unique_ptr<HostDeviceVector<GradientPair>> GenerateGradients(std::size_t rows) {
+  auto p_gradients = std::make_unique<HostDeviceVector<GradientPair>>(rows);
+  auto& h_gradients = p_gradients->HostVector();

  xgboost::SimpleLCG gen;
  xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);

-  for (size_t i = 0; i < kRows; ++i) {
-    bst_float grad = dist(&gen);
-    bst_float hess = dist(&gen);
-    h_gradients[i] = GradientPair(grad, hess);
+  for (std::size_t i = 0; i < rows; ++i) {
+    auto grad = dist(&gen);
+    auto hess = dist(&gen);
+    h_gradients[i] = GradientPair{grad, hess};
  }

+  return p_gradients;
+}
+
+TEST(GrowHistMaker, InteractionConstraint)
+{
+  auto constexpr kRows = 32;
+  auto constexpr kCols = 16;
+  auto p_dmat = GenerateDMatrix(kRows, kCols);
+  auto p_gradients = GenerateGradients(kRows);
+
+  Context ctx;
  {
    // With constraints
    RegTree tree;
@@ -35,11 +44,11 @@ TEST(GrowHistMaker, InteractionConstraint) {

    std::unique_ptr<TreeUpdater> updater{
        TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
-    updater->Configure(Args{
-        {"interaction_constraints", "[[0, 1]]"},
-        {"num_feature", std::to_string(kCols)}});
+    TrainParam param;
+    param.UpdateAllowUnknown(
+        Args{{"interaction_constraints", "[[0, 1]]"}, {"num_feature", std::to_string(kCols)}});
    std::vector<HostDeviceVector<bst_node_t>> position(1);
-    updater->Update(&gradients, p_dmat.get(), position, {&tree});
+    updater->Update(&param, p_gradients.get(), p_dmat.get(), position, {&tree});

    ASSERT_EQ(tree.NumExtraNodes(), 4);
    ASSERT_EQ(tree[0].SplitIndex(), 1);
@@ -54,9 +63,10 @@ TEST(GrowHistMaker, InteractionConstraint) {

    std::unique_ptr<TreeUpdater> updater{
        TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
-    updater->Configure(Args{{"num_feature", std::to_string(kCols)}});
    std::vector<HostDeviceVector<bst_node_t>> position(1);
-    updater->Update(&gradients, p_dmat.get(), position, {&tree});
+    TrainParam param;
+    param.Init(Args{});
+    updater->Update(&param, p_gradients.get(), p_dmat.get(), position, {&tree});

    ASSERT_EQ(tree.NumExtraNodes(), 10);
    ASSERT_EQ(tree[0].SplitIndex(), 1);
@@ -66,5 +76,53 @@ TEST(GrowHistMaker, InteractionConstraint) {
  }
 }

-}  // namespace tree
-}  // namespace xgboost
+namespace {
+void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
+  auto p_dmat = GenerateDMatrix(rows, cols);
+  auto p_gradients = GenerateGradients(rows);
+  Context ctx;
+  std::unique_ptr<TreeUpdater> updater{
+      TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
+  std::vector<HostDeviceVector<bst_node_t>> position(1);
+
+  std::unique_ptr<DMatrix> sliced{
+      p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
+
+  RegTree tree;
+  tree.param.num_feature = cols;
+  TrainParam param;
+  param.Init(Args{});
+  updater->Update(&param, p_gradients.get(), sliced.get(), position, {&tree});
+
+  EXPECT_EQ(tree.NumExtraNodes(), 10);
+  EXPECT_EQ(tree[0].SplitIndex(), 1);
+
+  EXPECT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);
+  EXPECT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
+
+  EXPECT_EQ(tree, expected_tree);
+}
+}  // anonymous namespace
+
+TEST(GrowHistMaker, ColumnSplit) {
+  auto constexpr kRows = 32;
+  auto constexpr kCols = 16;
+
+  RegTree expected_tree;
+  expected_tree.param.num_feature = kCols;
+  {
+    auto p_dmat = GenerateDMatrix(kRows, kCols);
+    auto p_gradients = GenerateGradients(kRows);
+    Context ctx;
+    std::unique_ptr<TreeUpdater> updater{
+        TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
+    std::vector<HostDeviceVector<bst_node_t>> position(1);
+    TrainParam param;
+    param.Init(Args{});
+    updater->Update(&param, p_gradients.get(), p_dmat.get(), position, {&expected_tree});
+  }
+
+  auto constexpr kWorldSize = 2;
+  RunWithInMemoryCommunicator(kWorldSize, TestColumnSplit, kRows, kCols, std::cref(expected_tree));
+}
+}  // namespace xgboost::tree
--- a/tests/cpp/tree/test_prediction_cache.cc
+++ b/tests/cpp/tree/test_prediction_cache.cc
@@ -7,6 +7,7 @@

 #include <memory>

+#include "../../../src/tree/param.h"  // for TrainParam
 #include "../helpers.h"

 namespace xgboost {
@@ -75,9 +76,11 @@ class TestPredictionCache : public ::testing::Test {
      RegTree tree;
      std::vector<RegTree *> trees{&tree};
      auto gpair = GenerateRandomGradients(n_samples_);
-      updater->Configure(Args{{"max_bin", "64"}});
+      tree::TrainParam param;
+      param.UpdateAllowUnknown(Args{{"max_bin", "64"}});
+
      std::vector<HostDeviceVector<bst_node_t>> position(1);
-      updater->Update(&gpair, Xy_.get(), position, trees);
+      updater->Update(&param, &gpair, Xy_.get(), position, trees);
      HostDeviceVector<float> out_prediction_cached;
      out_prediction_cached.SetDevice(ctx.gpu_id);
      out_prediction_cached.Resize(n_samples_);
--- a/tests/cpp/tree/test_prune.cc
+++ b/tests/cpp/tree/test_prune.cc
@@ -1,28 +1,26 @@
-/*!
- * Copyright 2018-2019 by Contributors
+/**
+ * Copyright 2018-2023 by XGBoost Contributors
 */
+#include <gtest/gtest.h>
 #include <xgboost/data.h>
 #include <xgboost/host_device_vector.h>
-#include <xgboost/tree_updater.h>
 #include <xgboost/learner.h>
-#include <gtest/gtest.h>
-#include <vector>
-#include <string>
-#include <memory>
+#include <xgboost/tree_updater.h>

+#include <memory>
+#include <string>
+#include <vector>
+
+#include "../../../src/tree/param.h"  // for TrainParam
 #include "../helpers.h"

-namespace xgboost {
-namespace tree {
-
+namespace xgboost::tree {
 TEST(Updater, Prune) {
  int constexpr kCols = 16;

  std::vector<std::pair<std::string, std::string>> cfg;
-  cfg.emplace_back(std::pair<std::string, std::string>("num_feature",
-                                                       std::to_string(kCols)));
-  cfg.emplace_back(std::pair<std::string, std::string>(
-      "min_split_loss", "10"));
+  cfg.emplace_back("num_feature", std::to_string(kCols));
+  cfg.emplace_back("min_split_loss", "10");

  // These data are just place holders.
  HostDeviceVector<GradientPair> gpair =
@@ -38,28 +36,30 @@ TEST(Updater, Prune) {
  tree.param.UpdateAllowUnknown(cfg);
  std::vector<RegTree*> trees {&tree};
  // prepare pruner
+  TrainParam param;
+  param.UpdateAllowUnknown(cfg);
+
  std::unique_ptr<TreeUpdater> pruner(
      TreeUpdater::Create("prune", &ctx, ObjInfo{ObjInfo::kRegression}));
-  pruner->Configure(cfg);

  // loss_chg < min_split_loss;
  std::vector<HostDeviceVector<bst_node_t>> position(trees.size());
  tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 0.0f, 0.0f,
                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);
-  pruner->Update(&gpair, p_dmat.get(), position, trees);
+  pruner->Update(&param, &gpair, p_dmat.get(), position, trees);

  ASSERT_EQ(tree.NumExtraNodes(), 0);

  // loss_chg > min_split_loss;
  tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 11.0f, 0.0f,
                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);
-  pruner->Update(&gpair, p_dmat.get(), position, trees);
+  pruner->Update(&param, &gpair, p_dmat.get(), position, trees);

  ASSERT_EQ(tree.NumExtraNodes(), 2);

  // loss_chg == min_split_loss;
  tree.Stat(0).loss_chg = 10;
-  pruner->Update(&gpair, p_dmat.get(), position, trees);
+  pruner->Update(&param, &gpair, p_dmat.get(), position, trees);

  ASSERT_EQ(tree.NumExtraNodes(), 2);

@@ -73,20 +73,20 @@ TEST(Updater, Prune) {
                  0, 0.5f, true, 0.3, 0.4, 0.5,
                  /*loss_chg=*/19.0f, 0.0f,
                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);
-  cfg.emplace_back(std::make_pair("max_depth", "1"));
-  pruner->Configure(cfg);
-  pruner->Update(&gpair, p_dmat.get(), position, trees);

+  cfg.emplace_back("max_depth", "1");
+  param.UpdateAllowUnknown(cfg);
+  pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
  ASSERT_EQ(tree.NumExtraNodes(), 2);

  tree.ExpandNode(tree[0].LeftChild(),
                  0, 0.5f, true, 0.3, 0.4, 0.5,
                  /*loss_chg=*/18.0f, 0.0f,
                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);
-  cfg.emplace_back(std::make_pair("min_split_loss", "0"));
-  pruner->Configure(cfg);
-  pruner->Update(&gpair, p_dmat.get(), position, trees);
+  cfg.emplace_back("min_split_loss", "0");
+  param.UpdateAllowUnknown(cfg);
+
+  pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
  ASSERT_EQ(tree.NumExtraNodes(), 2);
 }
-}  // namespace tree
-}  // namespace xgboost
+}  // namespace xgboost::tree
--- a/tests/cpp/tree/test_quantile_hist.cc
+++ b/tests/cpp/tree/test_quantile_hist.cc
@@ -23,7 +23,7 @@ TEST(QuantileHist, Partitioner) {
  Context ctx;
  ctx.InitAllowUnknown(Args{});

-  CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
+  CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
  ASSERT_EQ(partitioner.base_rowid, base_rowid);
  ASSERT_EQ(partitioner.Size(), 1);
  ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
@@ -41,7 +41,7 @@ TEST(QuantileHist, Partitioner) {
    {
      auto min_value = gmat.cut.MinValues()[split_ind];
      RegTree tree;
-      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
+      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
      GetSplit(&tree, min_value, &candidates);
      partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
      ASSERT_EQ(partitioner.Size(), 3);
@@ -49,7 +49,7 @@ TEST(QuantileHist, Partitioner) {
      ASSERT_EQ(partitioner[2].Size(), n_samples);
    }
    {
-      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
+      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
      auto ptr = gmat.cut.Ptrs()[split_ind + 1];
      float split_value = gmat.cut.Values().at(ptr / 2);
      RegTree tree;
--- a/tests/cpp/tree/test_refresh.cc
+++ b/tests/cpp/tree/test_refresh.cc
@@ -1,14 +1,15 @@
-/*!
- * Copyright 2018-2019 by Contributors
+/**
+ * Copyright 2018-2013 by XGBoost Contributors
 */
+#include <gtest/gtest.h>
 #include <xgboost/host_device_vector.h>
 #include <xgboost/tree_updater.h>
-#include <gtest/gtest.h>

-#include <vector>
-#include <string>
 #include <memory>
+#include <string>
+#include <vector>

+#include "../../../src/tree/param.h"  // for TrainParam
 #include "../helpers.h"

 namespace xgboost {
@@ -43,9 +44,11 @@ TEST(Updater, Refresh) {
  tree.Stat(cleft).base_weight = 1.2;
  tree.Stat(cright).base_weight = 1.3;

-  refresher->Configure(cfg);
  std::vector<HostDeviceVector<bst_node_t>> position;
-  refresher->Update(&gpair, p_dmat.get(), position, trees);
+  tree::TrainParam param;
+  param.UpdateAllowUnknown(cfg);
+
+  refresher->Update(&param, &gpair, p_dmat.get(), position, trees);

  bst_float constexpr kEps = 1e-6;
  ASSERT_NEAR(-0.183392, tree[cright].LeafValue(), kEps);
--- a/tests/cpp/tree/test_tree_stat.cc
+++ b/tests/cpp/tree/test_tree_stat.cc
@@ -1,7 +1,11 @@
+/**
+ * Copyright 2020-2023 by XGBoost Contributors
+ */
 #include <gtest/gtest.h>
 #include <xgboost/tree_model.h>
 #include <xgboost/tree_updater.h>

+#include "../../../src/tree/param.h"  // for TrainParam
 #include "../helpers.h"

 namespace xgboost {
@@ -21,6 +25,9 @@ class UpdaterTreeStatTest : public ::testing::Test {
  }

  void RunTest(std::string updater) {
+    tree::TrainParam param;
+    param.Init(Args{});
+
    Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
                                           : CreateEmptyGenericParam(Context::kCpuId));
    auto up = std::unique_ptr<TreeUpdater>{
@@ -29,7 +36,7 @@ class UpdaterTreeStatTest : public ::testing::Test {
    RegTree tree;
    tree.param.num_feature = kCols;
    std::vector<HostDeviceVector<bst_node_t>> position(1);
-    up->Update(&gpairs_, p_dmat_.get(), position, {&tree});
+    up->Update(&param, &gpairs_, p_dmat_.get(), position, {&tree});

    tree.WalkTree([&tree](bst_node_t nidx) {
      if (tree[nidx].IsLeaf()) {
@@ -69,28 +76,33 @@ class UpdaterEtaTest : public ::testing::Test {
  void RunTest(std::string updater) {
    Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
                                           : CreateEmptyGenericParam(Context::kCpuId));
+
    float eta = 0.4;
    auto up_0 = std::unique_ptr<TreeUpdater>{
        TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
-    up_0->Configure(Args{{"eta", std::to_string(eta)}});
+    up_0->Configure(Args{});
+    tree::TrainParam param0;
+    param0.Init(Args{{"eta", std::to_string(eta)}});

    auto up_1 = std::unique_ptr<TreeUpdater>{
        TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
    up_1->Configure(Args{{"eta", "1.0"}});
+    tree::TrainParam param1;
+    param1.Init(Args{{"eta", "1.0"}});

    for (size_t iter = 0; iter < 4; ++iter) {
      RegTree tree_0;
      {
        tree_0.param.num_feature = kCols;
        std::vector<HostDeviceVector<bst_node_t>> position(1);
-        up_0->Update(&gpairs_, p_dmat_.get(), position, {&tree_0});
+        up_0->Update(&param0, &gpairs_, p_dmat_.get(), position, {&tree_0});
      }

      RegTree tree_1;
      {
        tree_1.param.num_feature = kCols;
        std::vector<HostDeviceVector<bst_node_t>> position(1);
-        up_1->Update(&gpairs_, p_dmat_.get(), position, {&tree_1});
+        up_1->Update(&param1, &gpairs_, p_dmat_.get(), position, {&tree_1});
      }
      tree_0.WalkTree([&](bst_node_t nidx) {
        if (tree_0[nidx].IsLeaf()) {
@@ -139,17 +151,18 @@ class TestMinSplitLoss : public ::testing::Test {

              // test gamma
              {"gamma", std::to_string(gamma)}};
+    tree::TrainParam param;
+    param.UpdateAllowUnknown(args);

    Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
                                           : CreateEmptyGenericParam(Context::kCpuId));
-    std::cout << ctx.gpu_id << std::endl;
    auto up = std::unique_ptr<TreeUpdater>{
        TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kRegression})};
-    up->Configure(args);
+    up->Configure({});

    RegTree tree;
    std::vector<HostDeviceVector<bst_node_t>> position(1);
-    up->Update(&gpair_, dmat_.get(), position, {&tree});
+    up->Update(&param, &gpair_, dmat_.get(), position, {&tree});

    auto n_nodes = tree.NumExtraNodes();
    return n_nodes;