Remove internal use of gpu_id. (#9568)

2023-09-20 23:29:51 +08:00
parent 38ac52dd87
commit 8c676c889d
121 changed files with 1012 additions and 1044 deletions
--- a/tests/cpp/collective/test_nccl_device_communicator.cu
+++ b/tests/cpp/collective/test_nccl_device_communicator.cu
@@ -34,7 +34,7 @@ void VerifyAllReduceBitwiseAND() {
  auto const rank = collective::GetRank();
  std::bitset<64> original{};
  original[rank] = true;
-  HostDeviceVector<uint64_t> buffer({original.to_ullong()}, rank);
+  HostDeviceVector<uint64_t> buffer({original.to_ullong()}, DeviceOrd::CUDA(rank));
  collective::AllReduce<collective::Operation::kBitwiseAND>(rank, buffer.DevicePointer(), 1);
  collective::Synchronize(rank);
  EXPECT_EQ(buffer.HostVector()[0], 0ULL);
@@ -56,7 +56,7 @@ void VerifyAllReduceBitwiseOR() {
  auto const rank = collective::GetRank();
  std::bitset<64> original{};
  original[rank] = true;
-  HostDeviceVector<uint64_t> buffer({original.to_ullong()}, rank);
+  HostDeviceVector<uint64_t> buffer({original.to_ullong()}, DeviceOrd::CUDA(rank));
  collective::AllReduce<collective::Operation::kBitwiseOR>(rank, buffer.DevicePointer(), 1);
  collective::Synchronize(rank);
  EXPECT_EQ(buffer.HostVector()[0], (1ULL << world_size) - 1);
@@ -78,7 +78,7 @@ void VerifyAllReduceBitwiseXOR() {
  auto const rank = collective::GetRank();
  std::bitset<64> original{~0ULL};
  original[rank] = false;
-  HostDeviceVector<uint64_t> buffer({original.to_ullong()}, rank);
+  HostDeviceVector<uint64_t> buffer({original.to_ullong()}, DeviceOrd::CUDA(rank));
  collective::AllReduce<collective::Operation::kBitwiseXOR>(rank, buffer.DevicePointer(), 1);
  collective::Synchronize(rank);
  EXPECT_EQ(buffer.HostVector()[0], (1ULL << world_size) - 1);
--- a/tests/cpp/common/test_hist_util.cc
+++ b/tests/cpp/common/test_hist_util.cc
@@ -147,7 +147,7 @@ TEST(CutsBuilder, SearchGroupInd) {

  EXPECT_ANY_THROW(HostSketchContainer::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17));

-  p_mat->Info().Validate(-1);
+  p_mat->Info().Validate(DeviceOrd::CPU());
  EXPECT_THROW(HostSketchContainer::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17),
               dmlc::Error);

@@ -330,7 +330,7 @@ TEST(HistUtil, IndexBinData) {
 void TestSketchFromWeights(bool with_group) {
  size_t constexpr kRows = 300, kCols = 20, kBins = 256;
  size_t constexpr kGroups = 10;
-  auto m = RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateDMatrix();
+  auto m = RandomDataGenerator{kRows, kCols, 0}.Device(DeviceOrd::CUDA(0)).GenerateDMatrix();
  Context ctx;
  common::HistogramCuts cuts = SketchOnDMatrix(&ctx, m.get(), kBins);

--- a/tests/cpp/common/test_hist_util.cu
+++ b/tests/cpp/common/test_hist_util.cu
@@ -208,7 +208,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
  ASSERT_EQ(info.feature_types.Size(), n_features);

  HostDeviceVector<bst_row_t> cuts_ptr{0, n_samples, n_samples * 2, n_samples * 3};
-  cuts_ptr.SetDevice(0);
+  cuts_ptr.SetDevice(DeviceOrd::CUDA(0));

  dh::device_vector<float> weight(n_samples * n_features, 0);
  dh::Iota(dh::ToSpan(weight));
@@ -221,7 +221,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
  thrust::sort_by_key(sorted_entries.begin(), sorted_entries.end(), weight.begin(),
                      detail::EntryCompareOp());

-  detail::RemoveDuplicatedCategories(ctx.gpu_id, info, cuts_ptr.DeviceSpan(), &sorted_entries,
+  detail::RemoveDuplicatedCategories(ctx.Device(), info, cuts_ptr.DeviceSpan(), &sorted_entries,
                                     &weight, &columns_ptr);

  auto const& h_cptr = cuts_ptr.ConstHostVector();
@@ -363,7 +363,8 @@ template <typename Adapter>
 auto MakeUnweightedCutsForTest(Adapter adapter, int32_t num_bins, float missing, size_t batch_size = 0) {
  common::HistogramCuts batched_cuts;
  HostDeviceVector<FeatureType> ft;
-  SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(), 0);
+  SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(),
+                                   DeviceOrd::CUDA(0));
  MetaInfo info;
  AdapterDeviceSketch(adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
  sketch_container.MakeCuts(&batched_cuts, info.IsColumnSplit());
@@ -430,7 +431,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
  ConsoleLogger::Configure({{"verbosity", "3"}});
  common::HistogramCuts batched_cuts;
  HostDeviceVector<FeatureType> ft;
-  SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, 0);
+  SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, DeviceOrd::CUDA(0));
  AdapterDeviceSketch(adapter.Value(), num_bins, info, std::numeric_limits<float>::quiet_NaN(),
                      &sketch_container);
  HistogramCuts cuts;
@@ -458,7 +459,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
  ConsoleLogger::Configure({{"verbosity", "3"}});
  common::HistogramCuts batched_cuts;
  HostDeviceVector<FeatureType> ft;
-  SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, 0);
+  SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, DeviceOrd::CUDA(0));
  AdapterDeviceSketch(adapter.Value(), num_bins, info,
                      std::numeric_limits<float>::quiet_NaN(),
                      &sketch_container);
@@ -493,7 +494,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
  }

  ASSERT_EQ(info.feature_types.Size(), 1);
-  SketchContainer container(info.feature_types, num_bins, 1, n, 0);
+  SketchContainer container(info.feature_types, num_bins, 1, n, DeviceOrd::CUDA(0));
  AdapterDeviceSketch(adapter.Value(), num_bins, info,
                      std::numeric_limits<float>::quiet_NaN(), &container);
  HistogramCuts cuts;
@@ -566,7 +567,7 @@ TEST(HistUtil, AdapterDeviceSketchBatches) {

 namespace {
 auto MakeData(Context const* ctx, std::size_t n_samples, bst_feature_t n_features) {
-  dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
+  dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));
  auto n = n_samples * n_features;
  std::vector<float> x;
  x.resize(n);
@@ -606,21 +607,21 @@ void TestGetColumnSize(std::size_t n_samples) {
  std::vector<std::size_t> h_column_size_1(column_sizes_scan.size());

  detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, true>(
-      ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
+      ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
  thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size.begin());

  detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, false>(
-      ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
+      ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
  thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
  ASSERT_EQ(h_column_size, h_column_size_1);

  detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, true>(
-      ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
+      ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
  thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
  ASSERT_EQ(h_column_size, h_column_size_1);

  detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, false>(
-      ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
+      ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
  thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
  ASSERT_EQ(h_column_size, h_column_size_1);
 }
@@ -697,9 +698,9 @@ void TestAdapterSketchFromWeights(bool with_group) {
  size_t constexpr kRows = 300, kCols = 20, kBins = 256;
  size_t constexpr kGroups = 10;
  HostDeviceVector<float> storage;
-  std::string m =
-      RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateArrayInterface(
-          &storage);
+  std::string m = RandomDataGenerator{kRows, kCols, 0}
+                      .Device(DeviceOrd::CUDA(0))
+                      .GenerateArrayInterface(&storage);
  MetaInfo info;
  Context ctx;
  auto& h_weights = info.weights_.HostVector();
@@ -718,14 +719,14 @@ void TestAdapterSketchFromWeights(bool with_group) {
    info.SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups);
  }

-  info.weights_.SetDevice(0);
+  info.weights_.SetDevice(DeviceOrd::CUDA(0));
  info.num_row_ = kRows;
  info.num_col_ = kCols;

  data::CupyAdapter adapter(m);
  auto const& batch = adapter.Value();
  HostDeviceVector<FeatureType> ft;
-  SketchContainer sketch_container(ft, kBins, kCols, kRows, 0);
+  SketchContainer sketch_container(ft, kBins, kCols, kRows, DeviceOrd::CUDA(0));
  AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
                      &sketch_container);

@@ -769,7 +770,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
      // https://github.com/dmlc/xgboost/issues/7946
      h_weights[i] = (i % 2 == 0 ? 1 : 2) / static_cast<float>(kGroups);
    }
-    SketchContainer sketch_container(ft, kBins, kCols, kRows, 0);
+    SketchContainer sketch_container{ft, kBins, kCols, kRows, DeviceOrd::CUDA(0)};
    AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
                        &sketch_container);
    sketch_container.MakeCuts(&weighted, info.IsColumnSplit());
--- a/tests/cpp/common/test_host_device_vector.cu
+++ b/tests/cpp/common/test_host_device_vector.cu
@@ -1,7 +1,6 @@
-/*!
- * Copyright 2018 XGBoost contributors
+/**
+ * Copyright 2018-2023 XGBoost contributors
 */
-
 #include <gtest/gtest.h>
 #include <thrust/equal.h>
 #include <thrust/iterator/counting_iterator.h>
@@ -9,14 +8,13 @@
 #include "../../../src/common/device_helpers.cuh"
 #include <xgboost/host_device_vector.h>

-namespace xgboost {
-namespace common {
+namespace xgboost::common {
 namespace {
-void SetDeviceForTest(int device) {
+void SetDeviceForTest(DeviceOrd device) {
  int n_devices;
  dh::safe_cuda(cudaGetDeviceCount(&n_devices));
-  device %= n_devices;
-  dh::safe_cuda(cudaSetDevice(device));
+  device.ordinal %= n_devices;
+  dh::safe_cuda(cudaSetDevice(device.ordinal));
 }
 }  // namespace

@@ -31,13 +29,13 @@ struct HostDeviceVectorSetDeviceHandler {
  }
 };

-void InitHostDeviceVector(size_t n, int device, HostDeviceVector<int> *v) {
+void InitHostDeviceVector(size_t n, DeviceOrd device, HostDeviceVector<int> *v) {
  // create the vector
  v->SetDevice(device);
  v->Resize(n);

  ASSERT_EQ(v->Size(), n);
-  ASSERT_EQ(v->DeviceIdx(), device);
+  ASSERT_EQ(v->Device(), device);
  // ensure that the device have read-write access
  ASSERT_TRUE(v->DeviceCanRead());
  ASSERT_TRUE(v->DeviceCanWrite());
@@ -57,7 +55,7 @@ void InitHostDeviceVector(size_t n, int device, HostDeviceVector<int> *v) {
 }

 void PlusOne(HostDeviceVector<int> *v) {
-  int device = v->DeviceIdx();
+  auto device = v->Device();
  SetDeviceForTest(device);
  thrust::transform(dh::tcbegin(*v), dh::tcend(*v), dh::tbegin(*v),
                    [=]__device__(unsigned int a){ return a + 1; });
@@ -69,7 +67,7 @@ void CheckDevice(HostDeviceVector<int>* v,
                 unsigned int first,
                 GPUAccess access) {
  ASSERT_EQ(v->Size(), size);
-  SetDeviceForTest(v->DeviceIdx());
+  SetDeviceForTest(v->Device());

  ASSERT_TRUE(thrust::equal(dh::tcbegin(*v), dh::tcend(*v),
                            thrust::make_counting_iterator(first)));
@@ -100,7 +98,7 @@ void CheckHost(HostDeviceVector<int> *v, GPUAccess access) {
  ASSERT_FALSE(v->DeviceCanWrite());
 }

-void TestHostDeviceVector(size_t n, int device) {
+void TestHostDeviceVector(size_t n, DeviceOrd device) {
  HostDeviceVectorSetDeviceHandler hdvec_dev_hndlr(SetDevice);
  HostDeviceVector<int> v;
  InitHostDeviceVector(n, device, &v);
@@ -113,13 +111,13 @@ void TestHostDeviceVector(size_t n, int device) {

 TEST(HostDeviceVector, Basic) {
  size_t n = 1001;
-  int device = 0;
+  DeviceOrd device = DeviceOrd::CUDA(0);
  TestHostDeviceVector(n, device);
 }

 TEST(HostDeviceVector, Copy) {
  size_t n = 1001;
-  int device = 0;
+  auto device = DeviceOrd::CUDA(0);
  HostDeviceVectorSetDeviceHandler hdvec_dev_hndlr(SetDevice);

  HostDeviceVector<int> v;
@@ -143,15 +141,15 @@ TEST(HostDeviceVector, SetDevice) {
    h_vec[i] = i;
  }
  HostDeviceVector<int> vec (h_vec);
-  auto device = 0;
+  auto device = DeviceOrd::CUDA(0);

  vec.SetDevice(device);
  ASSERT_EQ(vec.Size(), h_vec.size());
  auto span = vec.DeviceSpan();  // sync to device

-  vec.SetDevice(-1);  // pull back to cpu.
+  vec.SetDevice(DeviceOrd::CPU());  // pull back to cpu.
  ASSERT_EQ(vec.Size(), h_vec.size());
-  ASSERT_EQ(vec.DeviceIdx(), -1);
+  ASSERT_EQ(vec.Device(), DeviceOrd::CPU());

  auto h_vec_1 = vec.HostVector();
  ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));
@@ -159,7 +157,7 @@ TEST(HostDeviceVector, SetDevice) {

 TEST(HostDeviceVector, Span) {
  HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
-  vec.SetDevice(0);
+  vec.SetDevice(DeviceOrd::CUDA(0));
  auto span = vec.DeviceSpan();
  ASSERT_EQ(vec.Size(), span.size());
  ASSERT_EQ(vec.DevicePointer(), span.data());
@@ -183,5 +181,4 @@ TEST(HostDeviceVector, Empty) {
  ASSERT_FALSE(another.Empty());
  ASSERT_TRUE(vec.Empty());
 }
-}  // namespace common
-}  // namespace xgboost
+}  // namespace xgboost::common
--- a/tests/cpp/common/test_linalg.cu
+++ b/tests/cpp/common/test_linalg.cu
@@ -12,7 +12,7 @@ namespace xgboost::linalg {
 namespace {
 void TestElementWiseKernel() {
  auto device = DeviceOrd::CUDA(0);
-  Tensor<float, 3> l{{2, 3, 4}, 0};
+  Tensor<float, 3> l{{2, 3, 4}, device};
  {
    /**
     * Non-contiguous
--- a/tests/cpp/common/test_quantile.cc
+++ b/tests/cpp/common/test_quantile.cc
@@ -9,9 +9,7 @@
 #include "../../../src/data/adapter.h"
 #include "xgboost/context.h"

-namespace xgboost {
-namespace common {
-
+namespace xgboost::common {
 TEST(Quantile, LoadBalance) {
  size_t constexpr kRows = 1000, kCols = 100;
  auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
@@ -314,7 +312,7 @@ void TestSameOnAllWorkers() {
        }

        auto m = RandomDataGenerator{kRows, kCols, 0}
-                     .Device(Context::kCpuId)
+                     .Device(DeviceOrd::CPU())
                     .Type(ft)
                     .MaxCategory(17)
                     .Seed(rank + seed)
@@ -373,6 +371,4 @@ TEST(Quantile, SameOnAllWorkers) {
  auto constexpr kWorkers = 4;
  RunWithInMemoryCommunicator(kWorkers, TestSameOnAllWorkers);
 }
-
-}  // namespace common
-}  // namespace xgboost
+}  // namespace xgboost::common
--- a/tests/cpp/common/test_quantile.cu
+++ b/tests/cpp/common/test_quantile.cu
@@ -25,7 +25,7 @@ class MGPUQuantileTest : public BaseMGPUTest {};
 TEST(GPUQuantile, Basic) {
  constexpr size_t kRows = 1000, kCols = 100, kBins = 256;
  HostDeviceVector<FeatureType> ft;
-  SketchContainer sketch(ft, kBins, kCols, kRows, 0);
+  SketchContainer sketch(ft, kBins, kCols, kRows, FstCU());
  dh::caching_device_vector<Entry> entries;
  dh::device_vector<bst_row_t> cuts_ptr(kCols+1);
  thrust::fill(cuts_ptr.begin(), cuts_ptr.end(), 0);
@@ -38,12 +38,12 @@ void TestSketchUnique(float sparsity) {
  constexpr size_t kRows = 1000, kCols = 100;
  RunWithSeedsAndBins(kRows, [kRows, kCols, sparsity](int32_t seed, size_t n_bins, MetaInfo const& info) {
    HostDeviceVector<FeatureType> ft;
-    SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
+    SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());

    HostDeviceVector<float> storage;
    std::string interface_str = RandomDataGenerator{kRows, kCols, sparsity}
                                    .Seed(seed)
-                                    .Device(0)
+                                    .Device(FstCU())
                                    .GenerateArrayInterface(&storage);
    data::CupyAdapter adapter(interface_str);
    AdapterDeviceSketch(adapter.Value(), n_bins, info,
@@ -58,7 +58,7 @@ void TestSketchUnique(float sparsity) {
        thrust::make_counting_iterator(0llu),
        [=] __device__(size_t idx) { return batch.GetElement(idx); });
    auto end = kCols * kRows;
-    detail::GetColumnSizesScan(0, kCols, n_cuts, IterSpan{batch_iter, end}, is_valid,
+    detail::GetColumnSizesScan(FstCU(), kCols, n_cuts, IterSpan{batch_iter, end}, is_valid,
                               &cut_sizes_scan, &column_sizes_scan);
    auto const& cut_sizes = cut_sizes_scan.HostVector();
    ASSERT_LE(sketch.Data().size(), cut_sizes.back());
@@ -86,9 +86,9 @@ TEST(GPUQuantile, Unique) {
 }

 // if with_error is true, the test tolerates floating point error
-void TestQuantileElemRank(int32_t device, Span<SketchEntry const> in,
+void TestQuantileElemRank(DeviceOrd device, Span<SketchEntry const> in,
                          Span<bst_row_t const> d_columns_ptr, bool with_error = false) {
-  dh::safe_cuda(cudaSetDevice(device));
+  dh::safe_cuda(cudaSetDevice(device.ordinal));
  std::vector<SketchEntry> h_in(in.size());
  dh::CopyDeviceSpanToVector(&h_in, in);
  std::vector<bst_row_t> h_columns_ptr(d_columns_ptr.size());
@@ -123,13 +123,12 @@ TEST(GPUQuantile, Prune) {
  constexpr size_t kRows = 1000, kCols = 100;
  RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
    HostDeviceVector<FeatureType> ft;
-    SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
+    SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());

    HostDeviceVector<float> storage;
-    std::string interface_str = RandomDataGenerator{kRows, kCols, 0}
-                                    .Device(0)
-                                    .Seed(seed)
-                                    .GenerateArrayInterface(&storage);
+    std::string interface_str =
+        RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
+            &storage);
    data::CupyAdapter adapter(interface_str);
    AdapterDeviceSketch(adapter.Value(), n_bins, info,
                        std::numeric_limits<float>::quiet_NaN(), &sketch);
@@ -145,7 +144,7 @@ TEST(GPUQuantile, Prune) {
    ASSERT_TRUE(thrust::is_sorted(thrust::device, sketch.Data().data(),
                                  sketch.Data().data() + sketch.Data().size(),
                                  detail::SketchUnique{}));
-    TestQuantileElemRank(0, sketch.Data(), sketch.ColumnsPtr());
+    TestQuantileElemRank(FstCU(), sketch.Data(), sketch.ColumnsPtr());
  });
 }

@@ -153,10 +152,10 @@ TEST(GPUQuantile, MergeEmpty) {
  constexpr size_t kRows = 1000, kCols = 100;
  size_t n_bins = 10;
  HostDeviceVector<FeatureType> ft;
-  SketchContainer sketch_0(ft, n_bins, kCols, kRows, 0);
+  SketchContainer sketch_0(ft, n_bins, kCols, kRows, FstCU());
  HostDeviceVector<float> storage_0;
  std::string interface_str_0 =
-      RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateArrayInterface(
+      RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).GenerateArrayInterface(
          &storage_0);
  data::CupyAdapter adapter_0(interface_str_0);
  MetaInfo info;
@@ -193,34 +192,33 @@ TEST(GPUQuantile, MergeBasic) {
  constexpr size_t kRows = 1000, kCols = 100;
  RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const &info) {
    HostDeviceVector<FeatureType> ft;
-    SketchContainer sketch_0(ft, n_bins, kCols, kRows, 0);
+    SketchContainer sketch_0(ft, n_bins, kCols, kRows, FstCU());
    HostDeviceVector<float> storage_0;
    std::string interface_str_0 = RandomDataGenerator{kRows, kCols, 0}
-                                      .Device(0)
+                                  .Device(FstCU())
                                      .Seed(seed)
                                      .GenerateArrayInterface(&storage_0);
    data::CupyAdapter adapter_0(interface_str_0);
    AdapterDeviceSketch(adapter_0.Value(), n_bins, info,
                        std::numeric_limits<float>::quiet_NaN(), &sketch_0);

-    SketchContainer sketch_1(ft, n_bins, kCols, kRows * kRows, 0);
+    SketchContainer sketch_1(ft, n_bins, kCols, kRows * kRows, FstCU());
    HostDeviceVector<float> storage_1;
-    std::string interface_str_1 = RandomDataGenerator{kRows, kCols, 0}
-                                      .Device(0)
-                                      .Seed(seed)
-                                      .GenerateArrayInterface(&storage_1);
+    std::string interface_str_1 =
+        RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
+            &storage_1);
    data::CupyAdapter adapter_1(interface_str_1);
-    AdapterDeviceSketch(adapter_1.Value(), n_bins, info,
-                        std::numeric_limits<float>::quiet_NaN(), &sketch_1);
+    AdapterDeviceSketch(adapter_1.Value(), n_bins, info, std::numeric_limits<float>::quiet_NaN(),
+                        &sketch_1);

    size_t size_before_merge = sketch_0.Data().size();
    sketch_0.Merge(sketch_1.ColumnsPtr(), sketch_1.Data());
    if (info.weights_.Size() != 0) {
-      TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr(), true);
+      TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr(), true);
      sketch_0.FixError();
-      TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr(), false);
+      TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr(), false);
    } else {
-      TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr());
+      TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());
    }

    auto columns_ptr = sketch_0.ColumnsPtr();
@@ -240,24 +238,22 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {
  MetaInfo info;
  int32_t seed = 0;
  HostDeviceVector<FeatureType> ft;
-  SketchContainer sketch_0(ft, n_bins, cols, rows, 0);
+  SketchContainer sketch_0(ft, n_bins, cols, rows, FstCU());
  HostDeviceVector<float> storage_0;
-  std::string interface_str_0 = RandomDataGenerator{rows, cols, 0}
-                                    .Device(0)
-                                    .Seed(seed)
-                                    .GenerateArrayInterface(&storage_0);
+  std::string interface_str_0 =
+      RandomDataGenerator{rows, cols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
+          &storage_0);
  data::CupyAdapter adapter_0(interface_str_0);
  AdapterDeviceSketch(adapter_0.Value(), n_bins, info,
                      std::numeric_limits<float>::quiet_NaN(),
                      &sketch_0);

  size_t f_rows = rows * frac;
-  SketchContainer sketch_1(ft, n_bins, cols, f_rows, 0);
+  SketchContainer sketch_1(ft, n_bins, cols, f_rows, FstCU());
  HostDeviceVector<float> storage_1;
-  std::string interface_str_1 = RandomDataGenerator{f_rows, cols, 0}
-                                    .Device(0)
-                                    .Seed(seed)
-                                    .GenerateArrayInterface(&storage_1);
+  std::string interface_str_1 =
+      RandomDataGenerator{f_rows, cols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
+          &storage_1);
  auto data_1 = storage_1.DeviceSpan();
  auto tuple_it = thrust::make_tuple(
      thrust::make_counting_iterator<size_t>(0ul), data_1.data());
@@ -279,7 +275,7 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {

  size_t size_before_merge = sketch_0.Data().size();
  sketch_0.Merge(sketch_1.ColumnsPtr(), sketch_1.Data());
-  TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr());
+  TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());

  auto columns_ptr = sketch_0.ColumnsPtr();
  std::vector<bst_row_t> h_columns_ptr(columns_ptr.size());
@@ -310,11 +306,10 @@ TEST(GPUQuantile, MergeDuplicated) {
 TEST(GPUQuantile, MultiMerge) {
  constexpr size_t kRows = 20, kCols = 1;
  int32_t world = 2;
-  RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins,
-                                 MetaInfo const &info) {
+  RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
    // Set up single node version
    HostDeviceVector<FeatureType> ft;
-    SketchContainer sketch_on_single_node(ft, n_bins, kCols, kRows, 0);
+    SketchContainer sketch_on_single_node(ft, n_bins, kCols, kRows, FstCU());

    size_t intermediate_num_cuts = std::min(
        kRows * world, static_cast<size_t>(n_bins * WQSketch::kFactor));
@@ -322,12 +317,12 @@ TEST(GPUQuantile, MultiMerge) {
    for (auto rank = 0; rank < world; ++rank) {
      HostDeviceVector<float> storage;
      std::string interface_str = RandomDataGenerator{kRows, kCols, 0}
-                                      .Device(0)
+                                      .Device(FstCU())
                                      .Seed(rank + seed)
                                      .GenerateArrayInterface(&storage);
      data::CupyAdapter adapter(interface_str);
      HostDeviceVector<FeatureType> ft;
-      containers.emplace_back(ft, n_bins, kCols, kRows, 0);
+      containers.emplace_back(ft, n_bins, kCols, kRows, FstCU());
      AdapterDeviceSketch(adapter.Value(), n_bins, info,
                          std::numeric_limits<float>::quiet_NaN(),
                          &containers.back());
@@ -337,12 +332,10 @@ TEST(GPUQuantile, MultiMerge) {
      sketch_on_single_node.Merge(sketch.ColumnsPtr(), sketch.Data());
      sketch_on_single_node.FixError();
    }
-    TestQuantileElemRank(0, sketch_on_single_node.Data(),
-                         sketch_on_single_node.ColumnsPtr());
+    TestQuantileElemRank(FstCU(), sketch_on_single_node.Data(), sketch_on_single_node.ColumnsPtr());

    sketch_on_single_node.Unique();
-    TestQuantileElemRank(0, sketch_on_single_node.Data(),
-                         sketch_on_single_node.ColumnsPtr());
+    TestQuantileElemRank(FstCU(), sketch_on_single_node.Data(), sketch_on_single_node.ColumnsPtr());
  });
 }

@@ -351,7 +344,7 @@ void TestAllReduceBasic() {
  auto const world = collective::GetWorldSize();
  constexpr size_t kRows = 1000, kCols = 100;
  RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
-    auto const device = GPUIDX;
+    auto const device = DeviceOrd::CUDA(GPUIDX);

    // Set up single node version;
    HostDeviceVector<FeatureType> ft({}, device);
@@ -483,7 +476,7 @@ void TestSameOnAllWorkers() {
  RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins,
                                 MetaInfo const &info) {
    auto const rank = collective::GetRank();
-    auto const device = GPUIDX;
+    auto const device = DeviceOrd::CUDA(GPUIDX);
    HostDeviceVector<FeatureType> ft({}, device);
    SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, device);
    HostDeviceVector<float> storage({}, device);
@@ -514,9 +507,9 @@ void TestSameOnAllWorkers() {
    thrust::copy(thrust::device, local_data.data(),
                 local_data.data() + local_data.size(),
                 all_workers.begin() + local_data.size() * rank);
-    collective::AllReduce<collective::Operation::kSum>(device, all_workers.data().get(),
+    collective::AllReduce<collective::Operation::kSum>(device.ordinal, all_workers.data().get(),
                                                       all_workers.size());
-    collective::Synchronize(device);
+    collective::Synchronize(device.ordinal);

    auto base_line = dh::ToSpan(all_workers).subspan(0, size_as_float);
    std::vector<float> h_base_line(base_line.size());
@@ -562,7 +555,7 @@ TEST(GPUQuantile, Push) {
  columns_ptr[1] = kRows;

  HostDeviceVector<FeatureType> ft;
-  SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
+  SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
  sketch.Push(dh::ToSpan(d_entries), dh::ToSpan(columns_ptr), dh::ToSpan(columns_ptr), kRows, {});

  auto sketch_data = sketch.Data();
@@ -602,7 +595,7 @@ TEST(GPUQuantile, MultiColPush) {

  int32_t n_bins = 16;
  HostDeviceVector<FeatureType> ft;
-  SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
+  SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
  dh::device_vector<Entry> d_entries {entries};

  dh::device_vector<size_t> columns_ptr(kCols + 1, 0);
--- a/tests/cpp/common/test_ranking_utils.cc
+++ b/tests/cpp/common/test_ranking_utils.cc
@@ -95,7 +95,7 @@ void TestRankingCache(Context const* ctx) {
  HostDeviceVector<float> predt(info.num_row_, 0);
  auto& h_predt = predt.HostVector();
  std::iota(h_predt.begin(), h_predt.end(), 0.0f);
-  predt.SetDevice(ctx->gpu_id);
+  predt.SetDevice(ctx->Device());

  auto rank_idx =
      cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
@@ -129,7 +129,7 @@ void TestNDCGCache(Context const* ctx) {
    auto fail = [&]() { NDCGCache cache{ctx, info, param}; };
    // empty label
    ASSERT_THROW(fail(), dmlc::Error);
-    info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, Context::kCpuId};
+    info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, DeviceOrd::CPU()};
    // invalid label
    ASSERT_THROW(fail(), dmlc::Error);
    auto h_labels = info.labels.HostView();
--- a/tests/cpp/common/test_ranking_utils.cu
+++ b/tests/cpp/common/test_ranking_utils.cu
@@ -35,7 +35,7 @@ void TestCalcQueriesInvIDCG() {
  auto d_scores = dh::ToSpan(scores);
  common::SegmentedSequence(&ctx, d_group_ptr, d_scores);

-  linalg::Vector<double> inv_IDCG({n_groups}, ctx.gpu_id);
+  linalg::Vector<double> inv_IDCG({n_groups}, ctx.Device());

  ltr::LambdaRankParam p;
  p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
@@ -70,7 +70,7 @@ void TestRankingCache(Context const* ctx) {
  HostDeviceVector<float> predt(info.num_row_, 0);
  auto& h_predt = predt.HostVector();
  std::iota(h_predt.begin(), h_predt.end(), 0.0f);
-  predt.SetDevice(ctx->gpu_id);
+  predt.SetDevice(ctx->Device());

  auto rank_idx =
      cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
--- a/tests/cpp/common/test_stats.cc
+++ b/tests/cpp/common/test_stats.cc
@@ -9,12 +9,11 @@
 #include "../../../src/common/transform_iterator.h"  // common::MakeIndexTransformIter
 #include "../helpers.h"

-namespace xgboost {
-namespace common {
+namespace xgboost::common {
 TEST(Stats, Quantile) {
  Context ctx;
  {
-    linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, Context::kCpuId);
+    linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, DeviceOrd::CPU());
    std::vector<size_t> index{0, 2, 3, 4, 6};
    auto h_arr = arr.HostView();
    auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(index[i]); });
@@ -40,8 +39,8 @@ TEST(Stats, Quantile) {

 TEST(Stats, WeightedQuantile) {
  Context ctx;
-  linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, Context::kCpuId);
-  linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, Context::kCpuId);
+  linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, DeviceOrd::CPU());
+  linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, DeviceOrd::CPU());

  auto h_arr = arr.HostView();
  auto h_weight = weight.HostView();
@@ -64,7 +63,7 @@ TEST(Stats, Median) {
  Context ctx;

  {
-    linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, Context::kCpuId};
+    linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, DeviceOrd::CPU()};
    HostDeviceVector<float> weights;
    linalg::Tensor<float, 1> out;
    Median(&ctx, values, weights, &out);
@@ -83,7 +82,7 @@ TEST(Stats, Median) {
  {
    ctx = ctx.MakeCPU();
    // 4x2 matrix
-    linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.gpu_id};
+    linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.Device()};
    HostDeviceVector<float> weights;
    linalg::Tensor<float, 1> out;
    Median(&ctx, values, weights, &out);
@@ -102,14 +101,14 @@ TEST(Stats, Median) {
 namespace {
 void TestMean(Context const* ctx) {
  std::size_t n{128};
-  linalg::Vector<float> data({n}, ctx->gpu_id);
+  linalg::Vector<float> data({n}, ctx->Device());
  auto h_v = data.HostView().Values();
  std::iota(h_v.begin(), h_v.end(), .0f);

  auto nf = static_cast<float>(n);
  float mean = nf * (nf - 1) / 2 / n;

-  linalg::Vector<float> res{{1}, ctx->gpu_id};
+  linalg::Vector<float> res{{1}, ctx->Device()};
  Mean(ctx, data, &res);
  auto h_res = res.HostView();
  ASSERT_EQ(h_res.Size(), 1);
@@ -128,5 +127,4 @@ TEST(Stats, GPUMean) {
  TestMean(&ctx);
 }
 #endif  // defined(XGBOOST_USE_CUDA)
-}  // namespace common
-}  // namespace xgboost
+}  // namespace xgboost::common
--- a/tests/cpp/common/test_stats.cu
+++ b/tests/cpp/common/test_stats.cu
@@ -20,8 +20,8 @@ namespace common {
 namespace {
 class StatsGPU : public ::testing::Test {
 private:
-  linalg::Tensor<float, 1> arr_{{1.f, 2.f, 3.f, 4.f, 5.f, 2.f, 4.f, 5.f, 3.f, 1.f}, {10}, 0};
-  linalg::Tensor<std::size_t, 1> indptr_{{0, 5, 10}, {3}, 0};
+  linalg::Tensor<float, 1> arr_{{1.f, 2.f, 3.f, 4.f, 5.f, 2.f, 4.f, 5.f, 3.f, 1.f}, {10}, FstCU()};
+  linalg::Tensor<std::size_t, 1> indptr_{{0, 5, 10}, {3}, FstCU()};
  HostDeviceVector<float> results_;
  using TestSet = std::vector<std::pair<float, float>>;
  Context ctx_;
@@ -46,7 +46,7 @@ class StatsGPU : public ::testing::Test {
    data.insert(data.cend(), seg.begin(), seg.end());
    data.insert(data.cend(), seg.begin(), seg.end());
    data.insert(data.cend(), seg.begin(), seg.end());
-    linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
+    linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, FstCU()};
    auto d_arr = arr.View(DeviceOrd::CUDA(0));

    auto key_it = dh::MakeTransformIterator<std::size_t>(
@@ -58,7 +58,7 @@ class StatsGPU : public ::testing::Test {

    // one alpha for each segment
    HostDeviceVector<float> alphas{0.0f, 0.5f, 1.0f};
-    alphas.SetDevice(0);
+    alphas.SetDevice(FstCU());
    auto d_alphas = alphas.ConstDeviceSpan();
    auto w_it = thrust::make_constant_iterator(0.1f);
    SegmentedWeightedQuantile(&ctx_, d_alphas.data(), key_it, key_it + d_alphas.size() + 1, val_it,
@@ -80,7 +80,7 @@ class StatsGPU : public ::testing::Test {
    auto val_it =
        dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
                                         [=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); });
-    linalg::Tensor<float, 1> weights{{10}, 0};
+    linalg::Tensor<float, 1> weights{{10}, FstCU()};
    linalg::ElementWiseTransformDevice(weights.View(DeviceOrd::CUDA(0)),
                                       [=] XGBOOST_DEVICE(std::size_t, float) { return 1.0; });
    auto w_it = weights.Data()->ConstDevicePointer();
@@ -101,7 +101,7 @@ class StatsGPU : public ::testing::Test {
    data.insert(data.cend(), seg.begin(), seg.end());
    data.insert(data.cend(), seg.begin(), seg.end());
    data.insert(data.cend(), seg.begin(), seg.end());
-    linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
+    linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, FstCU()};
    auto d_arr = arr.View(DeviceOrd::CUDA(0));

    auto key_it = dh::MakeTransformIterator<std::size_t>(
@@ -113,7 +113,7 @@ class StatsGPU : public ::testing::Test {

    // one alpha for each segment
    HostDeviceVector<float> alphas{0.1f, 0.2f, 0.4f};
-    alphas.SetDevice(0);
+    alphas.SetDevice(FstCU());
    auto d_alphas = alphas.ConstDeviceSpan();
    SegmentedQuantile(&ctx_, d_alphas.data(), key_it, key_it + d_alphas.size() + 1, val_it,
                      val_it + d_arr.Size(), &results_);
--- a/tests/cpp/common/test_transform_range.cc
+++ b/tests/cpp/common/test_transform_range.cc
@@ -11,63 +11,59 @@
 #include "../../../src/common/transform.h"
 #include "../helpers.h"

+namespace xgboost::common {
+namespace {
+constexpr DeviceOrd TransformDevice() {
 #if defined(__CUDACC__)
-
-#define TRANSFORM_GPU 0
-
+  return DeviceOrd::CUDA(0);
 #else
-
-#define TRANSFORM_GPU -1
-
+  return DeviceOrd::CPU();
 #endif
-
-namespace xgboost {
-namespace common {
+}
+}  // namespace

 template <typename T>
 struct TestTransformRange {
-  void XGBOOST_DEVICE operator()(size_t _idx,
-                                 Span<bst_float> _out, Span<const bst_float> _in) {
+  void XGBOOST_DEVICE operator()(std::size_t _idx, Span<float> _out, Span<const float> _in) {
    _out[_idx] = _in[_idx];
  }
 };

 TEST(Transform, DeclareUnifiedTest(Basic)) {
-  const size_t size {256};
-  std::vector<bst_float> h_in(size);
-  std::vector<bst_float> h_out(size);
+  const size_t size{256};
+  std::vector<float> h_in(size);
+  std::vector<float> h_out(size);
  std::iota(h_in.begin(), h_in.end(), 0);
-  std::vector<bst_float> h_sol(size);
+  std::vector<float> h_sol(size);
  std::iota(h_sol.begin(), h_sol.end(), 0);

-  const HostDeviceVector<bst_float> in_vec{h_in, TRANSFORM_GPU};
-  HostDeviceVector<bst_float> out_vec{h_out, TRANSFORM_GPU};
+  auto device = TransformDevice();
+  HostDeviceVector<float> const in_vec{h_in, device};
+  HostDeviceVector<float> out_vec{h_out, device};
  out_vec.Fill(0);

-  Transform<>::Init(TestTransformRange<bst_float>{},
+  Transform<>::Init(TestTransformRange<float>{},
                    Range{0, static_cast<Range::DifferenceType>(size)}, AllThreadsForTest(),
-                    TRANSFORM_GPU)
+                    TransformDevice())
      .Eval(&out_vec, &in_vec);
-  std::vector<bst_float> res = out_vec.HostVector();
+  std::vector<float> res = out_vec.HostVector();

  ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin()));
 }

 #if !defined(__CUDACC__)
 TEST(TransformDeathTest, Exception) {
-  size_t const kSize {16};
-  std::vector<bst_float> h_in(kSize);
-  const HostDeviceVector<bst_float> in_vec{h_in, -1};
+  size_t const kSize{16};
+  std::vector<float> h_in(kSize);
+  const HostDeviceVector<float> in_vec{h_in, DeviceOrd::CPU()};
  EXPECT_DEATH(
      {
        Transform<>::Init([](size_t idx, common::Span<float const> _in) { _in[idx + 1]; },
                          Range(0, static_cast<Range::DifferenceType>(kSize)), AllThreadsForTest(),
-                          -1)
+                          DeviceOrd::CPU())
            .Eval(&in_vec);
      },
      "");
 }
 #endif
-
-} // namespace common
-} // namespace xgboost
+}  // namespace xgboost::common
--- a/tests/cpp/common/test_transform_range.cu
+++ b/tests/cpp/common/test_transform_range.cu
@@ -0,0 +1,5 @@
+/**
+ * Copyright 2023 XGBoost contributors
+ */
+// Dummy file to keep the CUDA tests.
+#include "test_transform_range.cc"
--- a/tests/cpp/data/test_device_adapter.cu
+++ b/tests/cpp/data/test_device_adapter.cu
@@ -59,12 +59,12 @@ TEST(DeviceAdapter, GetRowCounts) {
  for (bst_feature_t n_features : {1, 2, 4, 64, 128, 256}) {
    HostDeviceVector<float> storage;
    auto str_arr = RandomDataGenerator{8192, n_features, 0.0}
-                       .Device(ctx.gpu_id)
+                       .Device(ctx.Device())
                       .GenerateArrayInterface(&storage);
    auto adapter = CupyAdapter{str_arr};
    HostDeviceVector<bst_row_t> offset(adapter.NumRows() + 1, 0);
-    offset.SetDevice(ctx.gpu_id);
-    auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.gpu_id,
+    offset.SetDevice(ctx.Device());
+    auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.Device(),
                                std::numeric_limits<float>::quiet_NaN());
    ASSERT_EQ(rstride, n_features);
  }
--- a/tests/cpp/data/test_ellpack_page.cu
+++ b/tests/cpp/data/test_ellpack_page.cu
@@ -94,7 +94,7 @@ TEST(EllpackPage, FromCategoricalBasic) {
  Context ctx{MakeCUDACtx(0)};
  auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()};
  auto ellpack = EllpackPage(&ctx, m.get(), p);
-  auto accessor = ellpack.Impl()->GetDeviceAccessor(0);
+  auto accessor = ellpack.Impl()->GetDeviceAccessor(FstCU());
  ASSERT_EQ(kCats, accessor.NumBins());

  auto x_copy = x;
@@ -152,13 +152,12 @@ TEST(EllpackPage, Copy) {
  auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();

  // Create an empty result page.
-  EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
-                         kRows);
+  EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kRows);

  // Copy batch pages into the result page.
  size_t offset = 0;
  for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
-    size_t num_elements = result.Copy(0, batch.Impl(), offset);
+    size_t num_elements = result.Copy(FstCU(), batch.Impl(), offset);
    offset += num_elements;
  }

@@ -172,10 +171,12 @@ TEST(EllpackPage, Copy) {
    EXPECT_EQ(impl->base_rowid, current_row);

    for (size_t i = 0; i < impl->Size(); i++) {
-      dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0), current_row, row_d.data().get()));
+      dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(FstCU()), current_row,
+                                         row_d.data().get()));
      thrust::copy(row_d.begin(), row_d.end(), row.begin());

-      dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(0), current_row, row_result_d.data().get()));
+      dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(FstCU()), current_row,
+                                         row_result_d.data().get()));
      thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());

      EXPECT_EQ(row, row_result);
@@ -199,8 +200,7 @@ TEST(EllpackPage, Compact) {
  auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();

  // Create an empty result page.
-  EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
-                         kCompactedRows);
+  EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kCompactedRows);

  // Compact batch pages into the result page.
  std::vector<size_t> row_indexes_h {
@@ -209,7 +209,7 @@ TEST(EllpackPage, Compact) {
  thrust::device_vector<size_t> row_indexes_d = row_indexes_h;
  common::Span<size_t> row_indexes_span(row_indexes_d.data().get(), kRows);
  for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
-    result.Compact(0, batch.Impl(), row_indexes_span);
+    result.Compact(FstCU(), batch.Impl(), row_indexes_span);
  }

  size_t current_row = 0;
@@ -228,13 +228,13 @@ TEST(EllpackPage, Compact) {
        continue;
      }

-      dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0),
+      dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(FstCU()),
                                         current_row, row_d.data().get()));
      dh::safe_cuda(cudaDeviceSynchronize());
      thrust::copy(row_d.begin(), row_d.end(), row.begin());

      dh::LaunchN(kCols,
-                  ReadRowFunction(result.GetDeviceAccessor(0), compacted_row,
+                  ReadRowFunction(result.GetDeviceAccessor(FstCU()), compacted_row,
                                  row_result_d.data().get()));
      thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());

--- a/tests/cpp/data/test_gradient_index.cc
+++ b/tests/cpp/data/test_gradient_index.cc
@@ -30,7 +30,7 @@ namespace xgboost::data {
 TEST(GradientIndex, ExternalMemoryBaseRowID) {
  Context ctx;
  auto p_fmat = RandomDataGenerator{4096, 256, 0.5}
-                    .Device(ctx.gpu_id)
+                    .Device(ctx.Device())
                    .Batches(8)
                    .GenerateSparsePageDMatrix("cache", true);

--- a/tests/cpp/data/test_iterative_dmatrix.cu
+++ b/tests/cpp/data/test_iterative_dmatrix.cu
@@ -11,9 +11,7 @@
 #include "../helpers.h"
 #include "test_iterative_dmatrix.h"

-namespace xgboost {
-namespace data {
-
+namespace xgboost::data {
 void TestEquivalent(float sparsity) {
  Context ctx{MakeCUDACtx(0)};

@@ -23,14 +21,14 @@ void TestEquivalent(float sparsity) {
  std::size_t offset = 0;
  auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
  std::unique_ptr<EllpackPageImpl> page_concatenated {
-    new EllpackPageImpl(0, first->Cuts(), first->is_dense,
+    new EllpackPageImpl(ctx.Device(), first->Cuts(), first->is_dense,
                        first->row_stride, 1000 * 100)};
  for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {
    auto page = batch.Impl();
-    size_t num_elements = page_concatenated->Copy(0, page, offset);
+    size_t num_elements = page_concatenated->Copy(ctx.Device(), page, offset);
    offset += num_elements;
  }
-  auto from_iter = page_concatenated->GetDeviceAccessor(0);
+  auto from_iter = page_concatenated->GetDeviceAccessor(ctx.Device());
  ASSERT_EQ(m.Info().num_col_, CudaArrayIterForTest::Cols());
  ASSERT_EQ(m.Info().num_row_, CudaArrayIterForTest::Rows());

@@ -40,7 +38,7 @@ void TestEquivalent(float sparsity) {
      DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0)};
  auto bp = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
  for (auto& ellpack : dm->GetBatches<EllpackPage>(&ctx, bp)) {
-    auto from_data = ellpack.Impl()->GetDeviceAccessor(0);
+    auto from_data = ellpack.Impl()->GetDeviceAccessor(ctx.Device());

    std::vector<float> cuts_from_iter(from_iter.gidx_fvalue_map.size());
    std::vector<float> min_fvalues_iter(from_iter.min_fvalue.size());
@@ -152,10 +150,10 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
  auto impl = ellpack.Impl();
  common::CompressedIterator<uint32_t> iterator(
      impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
-  EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(0).NullValue());
-  EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(0).NullValue());
+  EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(ctx.Device()).NullValue());
+  EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(ctx.Device()).NullValue());
  // null values get placed after valid values in a row
-  EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(0).NullValue());
+  EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(ctx.Device()).NullValue());
  EXPECT_EQ(m.Info().num_col_, cols);
  EXPECT_EQ(m.Info().num_row_, rows);
  EXPECT_EQ(m.Info().num_nonzero_, rows* cols - 3);
@@ -183,5 +181,4 @@ TEST(IterativeDeviceDMatrix, Ref) {
  TestRefDMatrix<EllpackPage, CudaArrayIterForTest>(
      &ctx, [](EllpackPage const& page) { return page.Impl()->Cuts(); });
 }
-}  // namespace data
-}  // namespace xgboost
+}  // namespace xgboost::data
--- a/tests/cpp/data/test_metainfo.cc
+++ b/tests/cpp/data/test_metainfo.cc
@@ -12,6 +12,7 @@
 #include "../helpers.h"
 #include "xgboost/base.h"

+namespace xgboost {
 TEST(MetaInfo, GetSet) {
  xgboost::Context ctx;
  xgboost::MetaInfo info;
@@ -236,9 +237,9 @@ TEST(MetaInfo, Validate) {
  info.num_nonzero_ = 12;
  info.num_col_ = 3;
  std::vector<xgboost::bst_group_t> groups (11);
-  xgboost::Context ctx;
+  Context ctx;
  info.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, 11);
-  EXPECT_THROW(info.Validate(0), dmlc::Error);
+  EXPECT_THROW(info.Validate(FstCU()), dmlc::Error);

  std::vector<float> labels(info.num_row_ + 1);
  EXPECT_THROW(
@@ -261,11 +262,11 @@ TEST(MetaInfo, Validate) {
  info.group_ptr_.clear();
  labels.resize(info.num_row_);
  info.SetInfo(ctx, "label", labels.data(), xgboost::DataType::kFloat32, info.num_row_);
-  info.labels.SetDevice(0);
-  EXPECT_THROW(info.Validate(1), dmlc::Error);
+  info.labels.SetDevice(FstCU());
+  EXPECT_THROW(info.Validate(DeviceOrd::CUDA(1)), dmlc::Error);

  xgboost::HostDeviceVector<xgboost::bst_group_t> d_groups{groups};
-  d_groups.SetDevice(0);
+  d_groups.SetDevice(FstCU());
  d_groups.DevicePointer();  // pull to device
  std::string arr_interface_str{ArrayInterfaceStr(xgboost::linalg::MakeVec(
      d_groups.ConstDevicePointer(), d_groups.Size(), xgboost::DeviceOrd::CUDA(0)))};
@@ -306,6 +307,5 @@ TEST(MetaInfo, HostExtend) {
  }
 }

-namespace xgboost {
 TEST(MetaInfo, CPUStridedData) { TestMetaInfoStridedData(DeviceOrd::CPU()); }
 }  // namespace xgboost
--- a/tests/cpp/data/test_proxy_dmatrix.cc
+++ b/tests/cpp/data/test_proxy_dmatrix.cc
@@ -1,31 +1,27 @@
-/*!
- * Copyright 2021 XGBoost contributors
+/**
+ * Copyright 2021-2023, XGBoost contributors
 */
 #include <gtest/gtest.h>
-#include "../helpers.h"
-#include "../../../src/data/proxy_dmatrix.h"
-#include "../../../src/data/adapter.h"

-namespace xgboost {
-namespace data {
+#include "../../../src/data/adapter.h"
+#include "../../../src/data/proxy_dmatrix.h"
+#include "../helpers.h"
+
+namespace xgboost::data {
 TEST(ProxyDMatrix, HostData) {
  DMatrixProxy proxy;
  size_t constexpr kRows = 100, kCols = 10;
  std::vector<HostDeviceVector<float>> label_storage(1);

  HostDeviceVector<float> storage;
-  auto data = RandomDataGenerator(kRows, kCols, 0.5)
-                  .Device(0)
-                  .GenerateArrayInterface(&storage);
+  auto data =
+      RandomDataGenerator(kRows, kCols, 0.5).Device(FstCU()).GenerateArrayInterface(&storage);

  proxy.SetArrayData(data.c_str());

-  auto n_samples = HostAdapterDispatch(
-      &proxy, [](auto const &value) { return value.Size(); });
+  auto n_samples = HostAdapterDispatch(&proxy, [](auto const &value) { return value.Size(); });
  ASSERT_EQ(n_samples, kRows);
-  auto n_features = HostAdapterDispatch(
-      &proxy, [](auto const &value) { return value.NumCols(); });
+  auto n_features = HostAdapterDispatch(&proxy, [](auto const &value) { return value.NumCols(); });
  ASSERT_EQ(n_features, kCols);
 }
-}  // namespace data
-}  // namespace xgboost
+}  // namespace xgboost::data
--- a/tests/cpp/data/test_proxy_dmatrix.cu
+++ b/tests/cpp/data/test_proxy_dmatrix.cu
@@ -15,10 +15,12 @@ namespace xgboost::data {
 TEST(ProxyDMatrix, DeviceData) {
  constexpr size_t kRows{100}, kCols{100};
  HostDeviceVector<float> storage;
-  auto data = RandomDataGenerator(kRows, kCols, 0.5).Device(0).GenerateArrayInterface(&storage);
+  auto data =
+      RandomDataGenerator(kRows, kCols, 0.5).Device(FstCU()).GenerateArrayInterface(&storage);
  std::vector<HostDeviceVector<float>> label_storage(1);
-  auto labels =
-      RandomDataGenerator(kRows, 1, 0).Device(0).GenerateColumnarArrayInterface(&label_storage);
+  auto labels = RandomDataGenerator(kRows, 1, 0)
+                    .Device(FstCU())
+                    .GenerateColumnarArrayInterface(&label_storage);

  DMatrixProxy proxy;
  proxy.SetCUDAArray(data.c_str());
@@ -31,7 +33,7 @@ TEST(ProxyDMatrix, DeviceData) {

  std::vector<HostDeviceVector<float>> columnar_storage(kCols);
  data = RandomDataGenerator(kRows, kCols, 0)
-             .Device(0)
+             .Device(FstCU())
             .GenerateColumnarArrayInterface(&columnar_storage);
  proxy.SetCUDAArray(data.c_str());
  ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CudfAdapter>));
--- a/tests/cpp/data/test_simple_dmatrix.cc
+++ b/tests/cpp/data/test_simple_dmatrix.cc
@@ -268,7 +268,7 @@ TEST(SimpleDMatrix, Slice) {
  std::iota(upper.begin(), upper.end(), 1.0f);

  auto& margin = p_m->Info().base_margin_;
-  margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, Context::kCpuId};
+  margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, DeviceOrd::CPU()};

  std::array<int32_t, 3> ridxs {1, 3, 5};
  std::unique_ptr<DMatrix> out { p_m->Slice(ridxs) };
@@ -341,7 +341,7 @@ TEST(SimpleDMatrix, SliceCol) {
  std::iota(upper.begin(), upper.end(), 1.0f);

  auto& margin = p_m->Info().base_margin_;
-  margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, Context::kCpuId};
+  margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, DeviceOrd::CPU()};

  auto constexpr kSlices {2};
  auto constexpr kSliceSize {4};
--- a/tests/cpp/data/test_sparse_page_dmatrix.cu
+++ b/tests/cpp/data/test_sparse_page_dmatrix.cu
@@ -134,11 +134,11 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
  size_t offset = 0;
  for (auto& batch : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
    if (!impl_ext) {
-      impl_ext.reset(new EllpackPageImpl(
-          batch.Impl()->gidx_buffer.DeviceIdx(), batch.Impl()->Cuts(),
-          batch.Impl()->is_dense, batch.Impl()->row_stride, kRows));
+      impl_ext = std::make_unique<EllpackPageImpl>(batch.Impl()->gidx_buffer.Device(),
+                                                   batch.Impl()->Cuts(), batch.Impl()->is_dense,
+                                                   batch.Impl()->row_stride, kRows);
    }
-    auto n_elems = impl_ext->Copy(0, batch.Impl(), offset);
+    auto n_elems = impl_ext->Copy(ctx.Device(), batch.Impl(), offset);
    offset += n_elems;
  }
  EXPECT_EQ(impl_ext->base_rowid, 0);
@@ -198,10 +198,12 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
    EXPECT_EQ(impl_ext->base_rowid, current_row);

    for (size_t i = 0; i < impl_ext->Size(); i++) {
-      dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0), current_row, row_d.data().get()));
+      dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(ctx.Device()), current_row,
+                                         row_d.data().get()));
      thrust::copy(row_d.begin(), row_d.end(), row.begin());

-      dh::LaunchN(kCols, ReadRowFunction(impl_ext->GetDeviceAccessor(0), current_row, row_ext_d.data().get()));
+      dh::LaunchN(kCols, ReadRowFunction(impl_ext->GetDeviceAccessor(ctx.Device()), current_row,
+                                         row_ext_d.data().get()));
      thrust::copy(row_ext_d.begin(), row_ext_d.end(), row_ext.begin());

      EXPECT_EQ(row, row_ext);
--- a/tests/cpp/gbm/test_gbtree.cc
+++ b/tests/cpp/gbm/test_gbtree.cc
@@ -65,7 +65,7 @@ TEST(GBTree, PredictionCache) {

  gbtree.Configure({{"tree_method", "hist"}});
  auto p_m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
-  linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
+  linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
  gpair.Data()->Copy(GenerateRandomGradients(kRows));

  PredictionCacheEntry out_predictions;
@@ -156,7 +156,7 @@ TEST(GBTree, ChoosePredictor) {

  // pull data into device.
  data.HostVector();
-  data.SetDevice(0);
+  data.SetDevice(DeviceOrd::CUDA(0));
  data.DeviceSpan();
  ASSERT_FALSE(data.HostCanWrite());

@@ -215,7 +215,7 @@ TEST(GBTree, ChooseTreeMethod) {
    }
    learner->Configure();
    for (std::int32_t i = 0; i < 3; ++i) {
-      linalg::Matrix<GradientPair> gpair{{Xy->Info().num_row_}, Context::kCpuId};
+      linalg::Matrix<GradientPair> gpair{{Xy->Info().num_row_}, DeviceOrd::CPU()};
      gpair.Data()->Copy(GenerateRandomGradients(Xy->Info().num_row_));
      learner->BoostOneIter(0, Xy, &gpair);
    }
@@ -400,7 +400,7 @@ class Dart : public testing::TestWithParam<char const*> {
    if (device == "GPU") {
      ctx = MakeCUDACtx(0);
    }
-    auto rng = RandomDataGenerator(kRows, kCols, 0).Device(ctx.gpu_id);
+    auto rng = RandomDataGenerator(kRows, kCols, 0).Device(ctx.Device());
    auto array_str = rng.GenerateArrayInterface(&data);
    auto p_mat = GetDMatrixFromData(data.HostVector(), kRows, kCols);

@@ -710,7 +710,7 @@ TEST(GBTree, InplacePredictionError) {
  auto test_qdm_err = [&](std::string booster, Context const* ctx) {
    std::shared_ptr<DMatrix> p_fmat;
    bst_bin_t max_bins = 16;
-    auto rng = RandomDataGenerator{n_samples, n_features, 0.5f}.Device(ctx->gpu_id).Bins(max_bins);
+    auto rng = RandomDataGenerator{n_samples, n_features, 0.5f}.Device(ctx->Device()).Bins(max_bins);
    if (ctx->IsCPU()) {
      p_fmat = rng.GenerateQuantileDMatrix(true);
    } else {
--- a/tests/cpp/gbm/test_gbtree.cu
+++ b/tests/cpp/gbm/test_gbtree.cu
@@ -22,7 +22,7 @@ void TestInplaceFallback(Context const* ctx) {
  bst_feature_t n_features{32};
  HostDeviceVector<float> X_storage;
  // use a different device than the learner
-  std::int32_t data_ordinal = ctx->IsCPU() ? 0 : -1;
+  auto data_ordinal = ctx->IsCPU() ? DeviceOrd::CUDA(0) : DeviceOrd::CPU();
  auto X = RandomDataGenerator{n_samples, n_features, 0.0}
               .Device(data_ordinal)
               .GenerateArrayInterface(&X_storage);
@@ -30,7 +30,7 @@ void TestInplaceFallback(Context const* ctx) {
  auto y = RandomDataGenerator{n_samples, 1u, 0.0}.GenerateArrayInterface(&y_storage);

  std::shared_ptr<DMatrix> Xy;
-  if (data_ordinal == Context::kCpuId) {
+  if (data_ordinal.IsCPU()) {
    auto X_adapter = data::ArrayAdapter{StringView{X}};
    Xy.reset(DMatrix::Create(&X_adapter, std::numeric_limits<float>::quiet_NaN(), ctx->Threads()));
  } else {
@@ -49,7 +49,7 @@ void TestInplaceFallback(Context const* ctx) {

  std::shared_ptr<DMatrix> p_m{new data::DMatrixProxy};
  auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
-  if (data_ordinal == Context::kCpuId) {
+  if (data_ordinal.IsCPU()) {
    proxy->SetArrayData(StringView{X});
  } else {
    proxy->SetCUDAArray(X.c_str());
@@ -64,7 +64,7 @@ void TestInplaceFallback(Context const* ctx) {

  // test when the contexts match
  Context new_ctx = *proxy->Ctx();
-  ASSERT_NE(new_ctx.gpu_id, ctx->gpu_id);
+  ASSERT_NE(new_ctx.Ordinal(), ctx->Ordinal());

  learner->SetParam("device", new_ctx.DeviceName());
  HostDeviceVector<float>* out_predt_1{nullptr};
--- a/tests/cpp/helpers.cc
+++ b/tests/cpp/helpers.cc
@@ -119,8 +119,10 @@ void CheckObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
                      std::vector<xgboost::bst_float> out_hess) {
  xgboost::MetaInfo info;
  info.num_row_ = labels.size();
-  info.labels = xgboost::linalg::Tensor<float, 2>{
-      labels.cbegin(), labels.cend(), {labels.size(), static_cast<std::size_t>(1)}, -1};
+  info.labels = xgboost::linalg::Tensor<float, 2>{labels.cbegin(),
+                                                  labels.cend(),
+                                                  {labels.size(), static_cast<std::size_t>(1)},
+                                                  xgboost::DeviceOrd::CPU()};
  info.weights_.HostVector() = weights;

  CheckObjFunctionImpl(obj, preds, labels, weights, info, out_grad, out_hess);
@@ -155,8 +157,10 @@ void CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
                             std::vector<xgboost::bst_float> out_hess) {
  xgboost::MetaInfo info;
  info.num_row_ = labels.size();
-  info.labels = xgboost::linalg::Matrix<float>{
-      labels.cbegin(), labels.cend(), {labels.size(), static_cast<std::size_t>(1)}, -1};
+  info.labels = xgboost::linalg::Matrix<float>{labels.cbegin(),
+                                               labels.cend(),
+                                               {labels.size(), static_cast<std::size_t>(1)},
+                                               xgboost::DeviceOrd::CPU()};
  info.weights_.HostVector() = weights;
  info.group_ptr_ = groups;

@@ -171,8 +175,9 @@ xgboost::bst_float GetMetricEval(xgboost::Metric* metric,
                                 xgboost::DataSplitMode data_split_mode) {
  return GetMultiMetricEval(
      metric, preds,
-      xgboost::linalg::Tensor<float, 2>{labels.begin(), labels.end(), {labels.size()}, -1}, weights,
-      groups, data_split_mode);
+      xgboost::linalg::Tensor<float, 2>{
+          labels.begin(), labels.end(), {labels.size()}, xgboost::DeviceOrd::CPU()},
+      weights, groups, data_split_mode);
 }

 double GetMultiMetricEval(xgboost::Metric* metric,
@@ -215,7 +220,7 @@ void RandomDataGenerator::GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const
      p_fmat->Info().labels.Data());
  CHECK_EQ(p_fmat->Info().labels.Size(), this->rows_ * this->n_targets_);
  p_fmat->Info().labels.Reshape(this->rows_, this->n_targets_);
-  if (device_ != Context::kCpuId) {
+  if (device_.IsCUDA()) {
    p_fmat->Info().labels.SetDevice(device_);
  }
 }
@@ -236,7 +241,7 @@ void RandomDataGenerator::GenerateDense(HostDeviceVector<float> *out) const {
      v = dist(&lcg);
    }
  }
-  if (device_ >= 0) {
+  if (device_.IsCUDA()) {
    out->SetDevice(device_);
    out->DeviceSpan();
  }
@@ -258,7 +263,7 @@ std::string RandomDataGenerator::GenerateArrayInterface(

 std::pair<std::vector<std::string>, std::string> MakeArrayInterfaceBatch(
    HostDeviceVector<float> const* storage, std::size_t n_samples, bst_feature_t n_features,
-    std::size_t batches, std::int32_t device) {
+    std::size_t batches, DeviceOrd device) {
  std::vector<std::string> result(batches);
  std::vector<Json> objects;

@@ -267,7 +272,7 @@ std::pair<std::vector<std::string>, std::string> MakeArrayInterfaceBatch(
  auto make_interface = [storage, device, n_features](std::size_t offset, std::size_t rows) {
    Json array_interface{Object()};
    array_interface["data"] = std::vector<Json>(2);
-    if (device >= 0) {
+    if (device.IsCUDA()) {
      array_interface["data"][0] =
          Integer(reinterpret_cast<int64_t>(storage->DevicePointer() + offset));
      array_interface["stream"] = Null{};
@@ -359,7 +364,7 @@ void RandomDataGenerator::GenerateCSR(
    h_rptr.emplace_back(rptr);
  }

-  if (device_ >= 0) {
+  if (device_.IsCUDA()) {
    value->SetDevice(device_);
    value->DeviceSpan();
    row_ptr->SetDevice(device_);
@@ -400,7 +405,7 @@ void RandomDataGenerator::GenerateCSR(
      out->Info().labels.Reshape(this->rows_, this->n_targets_);
    }
  }
-  if (device_ >= 0) {
+  if (device_.IsCUDA()) {
    out->Info().labels.SetDevice(device_);
    out->Info().feature_types.SetDevice(device_);
    for (auto const& page : out->GetBatches<SparsePage>()) {
@@ -423,7 +428,7 @@ void RandomDataGenerator::GenerateCSR(
  CHECK_GE(this->n_batches_, 1)
      << "Must set the n_batches before generating an external memory DMatrix.";
  std::unique_ptr<ArrayIterForTest> iter;
-  if (device_ == Context::kCpuId) {
+  if (device_.IsCPU()) {
    iter = std::make_unique<NumpyArrayIterForTest>(this->sparsity_, rows_, cols_, n_batches_);
  } else {
 #if defined(XGBOOST_USE_CUDA)
@@ -487,7 +492,7 @@ int CudaArrayIterForTest::Next() {
 NumpyArrayIterForTest::NumpyArrayIterForTest(float sparsity, size_t rows, size_t cols,
                                             size_t batches)
    : ArrayIterForTest{sparsity, rows, cols, batches} {
-  rng_->Device(Context::kCpuId);
+  rng_->Device(DeviceOrd::CPU());
  std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
  this->Reset();
 }
@@ -644,8 +649,8 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
    labels[i] = i;
  }
  p_dmat->Info().labels =
-      linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, -1};
-  linalg::Matrix<GradientPair> gpair({kRows}, ctx->Ordinal());
+      linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, DeviceOrd::CPU()};
+  linalg::Matrix<GradientPair> gpair({kRows}, ctx->Device());
  auto h_gpair = gpair.HostView();
  for (size_t i = 0; i < kRows; ++i) {
    h_gpair(i) = GradientPair{static_cast<float>(i), 1};
@@ -674,7 +679,7 @@ ArrayIterForTest::ArrayIterForTest(Context const* ctx, HostDeviceVector<float> c
  CHECK_EQ(this->data_.Size(), rows_ * cols_ * n_batches);
  this->data_.Copy(data);
  std::tie(batches_, interface_) =
-      MakeArrayInterfaceBatch(&data_, rows_, cols_, n_batches_, ctx->gpu_id);
+      MakeArrayInterfaceBatch(&data_, rows_, cols_, n_batches_, ctx->Device());
 }

 ArrayIterForTest::~ArrayIterForTest() { XGDMatrixFree(proxy_); }
--- a/tests/cpp/helpers.cu
+++ b/tests/cpp/helpers.cu
@@ -9,7 +9,7 @@ namespace xgboost {
 CudaArrayIterForTest::CudaArrayIterForTest(float sparsity, size_t rows,
                                           size_t cols, size_t batches)
    : ArrayIterForTest{sparsity, rows, cols, batches} {
-  rng_->Device(0);
+  rng_->Device(FstCU());
  std::tie(batches_, interface_) =
      rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
  this->Reset();
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@@ -231,7 +231,7 @@ class RandomDataGenerator {

  bst_target_t n_targets_{1};

-  std::int32_t device_{Context::kCpuId};
+  DeviceOrd device_{DeviceOrd::CPU()};
  std::size_t n_batches_{0};
  std::uint64_t seed_{0};
  SimpleLCG lcg_;
@@ -256,7 +256,7 @@ class RandomDataGenerator {
    upper_ = v;
    return *this;
  }
-  RandomDataGenerator& Device(int32_t d) {
+  RandomDataGenerator& Device(DeviceOrd d) {
    device_ = d;
    return *this;
  }
@@ -391,7 +391,7 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
 * \brief Make a context that uses CUDA if device >= 0.
 */
 inline Context MakeCUDACtx(std::int32_t device) {
-  if (device == Context::kCpuId) {
+  if (device == DeviceOrd::CPUOrdinal()) {
    return Context{};
  }
  return Context{}.MakeCUDA(device);
@@ -501,7 +501,7 @@ RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv);
 * \brief Make learner model param
 */
 inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint32_t n_groups,
-                                int32_t device = Context::kCpuId) {
+                                DeviceOrd device = DeviceOrd::CPU()) {
  size_t shape[1]{1};
  LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape, device},
                           n_groups, 1, MultiStrategy::kOneOutputPerTree);
@@ -571,4 +571,5 @@ class BaseMGPUTest : public ::testing::Test {

 class DeclareUnifiedDistributedTest(MetricTest) : public BaseMGPUTest{};

+inline DeviceOrd FstCU() { return DeviceOrd::CUDA(0); }
 }  // namespace xgboost
--- a/tests/cpp/histogram_helpers.h
+++ b/tests/cpp/histogram_helpers.h
@@ -1,3 +1,8 @@
+/**
+ * Copyright 2020-2023, XGBoost contributors
+ */
+#pragma once
+
 #if defined(__CUDACC__)
 #include "../../src/data/ellpack_page.cuh"
 #endif
@@ -24,8 +29,8 @@ class HistogramCutsWrapper : public common::HistogramCuts {
 };
 }  //  anonymous namespace

-inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(
-    int n_rows, int n_cols, bst_float sparsity= 0) {
+inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(int n_rows, int n_cols,
+                                                         bst_float sparsity = 0) {
  auto dmat = RandomDataGenerator(n_rows, n_cols, sparsity).Seed(3).GenerateDMatrix();
  const SparsePage& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();

@@ -49,7 +54,7 @@ inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(
  }

  auto page = std::unique_ptr<EllpackPageImpl>(
-      new EllpackPageImpl(0, cmat, batch, dmat->IsDense(), row_stride, {}));
+      new EllpackPageImpl(DeviceOrd::CUDA(0), cmat, batch, dmat->IsDense(), row_stride, {}));

  return page;
 }
--- a/tests/cpp/metric/test_auc.h
+++ b/tests/cpp/metric/test_auc.h
@@ -28,7 +28,7 @@ inline void VerifyBinaryAUC(DataSplitMode data_split_mode = DataSplitMode::kRow)
  // Invalid dataset
  auto p_fmat = EmptyDMatrix();
  MetaInfo& info = p_fmat->Info();
-  info.labels = linalg::Tensor<float, 2>{{0.0f, 0.0f}, {2}, -1};
+  info.labels = linalg::Tensor<float, 2>{{0.0f, 0.0f}, {2}, DeviceOrd::CPU()};
  float auc = metric->Evaluate({1, 1}, p_fmat);
  ASSERT_TRUE(std::isnan(auc));
  *info.labels.Data() = HostDeviceVector<float>{};
--- a/tests/cpp/metric/test_elementwise_metric.cc
+++ b/tests/cpp/metric/test_elementwise_metric.cc
@@ -3,8 +3,7 @@
 */
 #include "test_elementwise_metric.h"

-namespace xgboost {
-namespace metric {
+namespace xgboost::metric {
 TEST(Metric, DeclareUnifiedTest(RMSE)) { VerifyRMSE(); }

 TEST(Metric, DeclareUnifiedTest(RMSLE)) { VerifyRMSLE(); }
@@ -104,5 +103,4 @@ TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileRowSplit) {
 TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileColumnSplit) {
  DoTest(VerifyQuantile, DataSplitMode::kCol);
 }
-}  // namespace metric
-}  // namespace xgboost
+}  // namespace xgboost::metric
--- a/tests/cpp/metric/test_elementwise_metric.h
+++ b/tests/cpp/metric/test_elementwise_metric.h
@@ -11,9 +11,7 @@
 #include "../../../src/common/linalg_op.h"
 #include "../helpers.h"

-namespace xgboost {
-namespace metric {
-
+namespace xgboost::metric {
 inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {
  auto ctx = MakeCUDACtx(device);
  std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
@@ -325,14 +323,14 @@ inline void VerifyPoissonNegLogLik(DataSplitMode data_split_mode = DataSplitMode
 }

 inline void VerifyMultiRMSE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
+  auto ctx = MakeCUDACtx(GPUIDX);
  size_t n_samples = 32, n_targets = 8;
-  linalg::Tensor<float, 2> y{{n_samples, n_targets}, GPUIDX};
+  linalg::Tensor<float, 2> y{{n_samples, n_targets}, ctx.Device()};
  auto &h_y = y.Data()->HostVector();
  std::iota(h_y.begin(), h_y.end(), 0);

  HostDeviceVector<float> predt(n_samples * n_targets, 0);

-  auto ctx = MakeCUDACtx(GPUIDX);
  std::unique_ptr<Metric> metric{Metric::Create("rmse", &ctx)};
  metric->Configure({});

@@ -381,5 +379,4 @@ inline void VerifyQuantile(DataSplitMode data_split_mode = DataSplitMode::kRow)
  metric->Configure(Args{{"quantile_alpha", "[1.0]"}});
  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);
 }
-}  // namespace metric
-}  // namespace xgboost
+}  // namespace xgboost::metric
--- a/tests/cpp/metric/test_rank_metric.h
+++ b/tests/cpp/metric/test_rank_metric.h
@@ -154,7 +154,7 @@ inline void VerifyNDCGExpGain(DataSplitMode data_split_mode = DataSplitMode::kRo

  auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix();
  MetaInfo& info = p_fmat->Info();
-  info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.gpu_id};
+  info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.Device()};
  info.num_row_ = info.labels.Shape(0);
  info.group_ptr_.resize(2);
  info.group_ptr_[0] = 0;
--- a/tests/cpp/objective/test_lambdarank_obj.cc
+++ b/tests/cpp/objective/test_lambdarank_obj.cc
@@ -71,7 +71,7 @@ void TestNDCGGPair(Context const* ctx) {

  HostDeviceVector<float> predts{0, 1, 0, 1};
  MetaInfo info;
-  info.labels = linalg::Tensor<float, 2>{{0, 1, 0, 1}, {4, 1}, GPUIDX};
+  info.labels = linalg::Tensor<float, 2>{{0, 1, 0, 1}, {4, 1}, ctx->Device()};
  info.group_ptr_ = {0, 2, 4};
  info.num_row_ = 4;
  linalg::Matrix<GradientPair> gpairs;
@@ -146,7 +146,7 @@ TEST(LambdaRank, UnbiasedNDCG) {
 }

 void InitMakePairTest(Context const* ctx, MetaInfo* out_info, HostDeviceVector<float>* out_predt) {
-  out_predt->SetDevice(ctx->gpu_id);
+  out_predt->SetDevice(ctx->Device());
  MetaInfo& info = *out_info;
  info.num_row_ = 128;
  info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {
@@ -243,7 +243,7 @@ void TestMAPStat(Context const* ctx) {

    auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);

-    predt.SetDevice(ctx->gpu_id);
+    predt.SetDevice(ctx->Device());
    auto rank_idx =
        p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());

@@ -280,7 +280,7 @@ void TestMAPStat(Context const* ctx) {

    auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);

-    predt.SetDevice(ctx->gpu_id);
+    predt.SetDevice(ctx->Device());
    auto rank_idx =
        p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());

--- a/tests/cpp/objective/test_quantile_obj.cc
+++ b/tests/cpp/objective/test_quantile_obj.cc
@@ -45,7 +45,7 @@ TEST(Objective, DeclareUnifiedTest(QuantileIntercept)) {
  MetaInfo info;
  info.num_row_ = 10;
  info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {
-    data->SetDevice(ctx.gpu_id);
+    data->SetDevice(ctx.Device());
    data->Resize(info.num_row_);
    shape[0] = info.num_row_;
    shape[1] = 1;
--- a/tests/cpp/predictor/test_cpu_predictor.cc
+++ b/tests/cpp/predictor/test_cpu_predictor.cc
@@ -142,7 +142,7 @@ TEST(CpuPredictor, InplacePredict) {
  bst_row_t constexpr kRows{128};
  bst_feature_t constexpr kCols{64};
  Context ctx;
-  auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.gpu_id);
+  auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());
  {
    HostDeviceVector<float> data;
    gen.GenerateDense(&data);
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@@ -34,7 +34,7 @@ TEST(GPUPredictor, Basic) {
    auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();

    auto ctx = MakeCUDACtx(0);
-    LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
+    LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};
    gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);

    // Test predict batch
@@ -70,7 +70,7 @@ void VerifyBasicColumnSplit(std::array<std::vector<float>, 32> const& expected_r
    auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
    std::unique_ptr<DMatrix> sliced{dmat->SliceCol(world_size, rank)};

-    LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
+    LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};
    gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);

    // Test predict batch
@@ -98,7 +98,7 @@ TEST_F(MGPUPredictorTest, BasicColumnSplit) {
    size_t n_row = i, n_col = i;
    auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();

-    LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
+    LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};
    gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);

    // Test predict batch
@@ -119,8 +119,10 @@ TEST(GPUPredictor, EllpackBasic) {
  auto ctx = MakeCUDACtx(0);
  for (size_t bins = 2; bins < 258; bins += 16) {
    size_t rows = bins * 16;
-    auto p_m =
-        RandomDataGenerator{rows, kCols, 0.0}.Bins(bins).Device(0).GenerateDeviceDMatrix(false);
+    auto p_m = RandomDataGenerator{rows, kCols, 0.0}
+                   .Bins(bins)
+                   .Device(DeviceOrd::CUDA(0))
+                   .GenerateDeviceDMatrix(false);
    ASSERT_FALSE(p_m->PageExists<SparsePage>());
    TestPredictionFromGradientIndex<EllpackPage>(&ctx, rows, kCols, p_m);
    TestPredictionFromGradientIndex<EllpackPage>(&ctx, bins, kCols, p_m);
@@ -132,11 +134,11 @@ TEST(GPUPredictor, EllpackTraining) {
  size_t constexpr kRows{128}, kCols{16}, kBins{64};
  auto p_ellpack = RandomDataGenerator{kRows, kCols, 0.0}
                       .Bins(kBins)
-                       .Device(ctx.Ordinal())
+                       .Device(ctx.Device())
                       .GenerateDeviceDMatrix(false);
  HostDeviceVector<float> storage(kRows * kCols);
  auto columnar =
-      RandomDataGenerator{kRows, kCols, 0.0}.Device(ctx.Ordinal()).GenerateArrayInterface(&storage);
+      RandomDataGenerator{kRows, kCols, 0.0}.Device(ctx.Device()).GenerateArrayInterface(&storage);
  auto adapter = data::CupyAdapter(columnar);
  std::shared_ptr<DMatrix> p_full{
      DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
@@ -151,7 +153,7 @@ TEST(GPUPredictor, ExternalMemoryTest) {

  const int n_classes = 3;
  Context ctx = MakeCUDACtx(0);
-  LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.Ordinal())};
+  LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.Device())};

  gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, n_classes);
  std::vector<std::unique_ptr<DMatrix>> dmats;
@@ -162,7 +164,7 @@ TEST(GPUPredictor, ExternalMemoryTest) {

  for (const auto& dmat: dmats) {
    dmat->Info().base_margin_ = decltype(dmat->Info().base_margin_){
-        {dmat->Info().num_row_, static_cast<size_t>(n_classes)}, 0};
+        {dmat->Info().num_row_, static_cast<size_t>(n_classes)}, DeviceOrd::CUDA(0)};
    dmat->Info().base_margin_.Data()->Fill(0.5);
    PredictionCacheEntry out_predictions;
    gpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
@@ -181,7 +183,7 @@ TEST(GPUPredictor, InplacePredictCupy) {
  auto ctx = MakeCUDACtx(0);
  size_t constexpr kRows{128}, kCols{64};
  RandomDataGenerator gen(kRows, kCols, 0.5);
-  gen.Device(ctx.Ordinal());
+  gen.Device(ctx.Device());
  HostDeviceVector<float> data;
  std::string interface_str = gen.GenerateArrayInterface(&data);
  std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
@@ -193,7 +195,7 @@ TEST(GPUPredictor, InplacePredictCuDF) {
  auto ctx = MakeCUDACtx(0);
  size_t constexpr kRows{128}, kCols{64};
  RandomDataGenerator gen(kRows, kCols, 0.5);
-  gen.Device(ctx.Ordinal());
+  gen.Device(ctx.Device());
  std::vector<HostDeviceVector<float>> storage(kCols);
  auto interface_str = gen.GenerateColumnarArrayInterface(&storage);
  std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
@@ -215,7 +217,7 @@ TEST(GPUPredictor, ShapStump) {
  cudaSetDevice(0);

  auto ctx = MakeCUDACtx(0);
-  LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
+  LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Device())};
  gbm::GBTreeModel model(&mparam, &ctx);

  std::vector<std::unique_ptr<RegTree>> trees;
@@ -241,7 +243,7 @@ TEST(GPUPredictor, ShapStump) {

 TEST(GPUPredictor, Shap) {
  auto ctx = MakeCUDACtx(0);
-  LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
+  LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Device())};
  gbm::GBTreeModel model(&mparam, &ctx);

  std::vector<std::unique_ptr<RegTree>> trees;
@@ -296,7 +298,7 @@ TEST_F(MGPUPredictorTest, CategoricalPredictionLeafColumnSplit) {

 TEST(GPUPredictor, PredictLeafBasic) {
  size_t constexpr kRows = 5, kCols = 5;
-  auto dmat = RandomDataGenerator(kRows, kCols, 0).Device(0).GenerateDMatrix();
+  auto dmat = RandomDataGenerator(kRows, kCols, 0).Device(DeviceOrd::CUDA(0)).GenerateDMatrix();
  auto lparam = MakeCUDACtx(GPUIDX);
  std::unique_ptr<Predictor> gpu_predictor =
      std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
--- a/tests/cpp/predictor/test_predictor.cc
+++ b/tests/cpp/predictor/test_predictor.cc
@@ -34,7 +34,7 @@ TEST(Predictor, PredictionCache) {
  // Add a cache that is immediately expired.
  auto add_cache = [&]() {
    auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
-    container.Cache(p_dmat, Context::kCpuId);
+    container.Cache(p_dmat, DeviceOrd::CPU());
    m = p_dmat.get();
  };

@@ -93,7 +93,7 @@ void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
 void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,
                           bst_feature_t cols) {
  std::size_t constexpr kClasses { 4 };
-  auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(ctx->gpu_id);
+  auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(ctx->Device());
  std::shared_ptr<DMatrix> m = gen.GenerateDMatrix(true, false, kClasses);

  std::unique_ptr<Learner> learner {
@@ -192,7 +192,7 @@ void TestPredictionDeviceAccess() {

  HostDeviceVector<float> from_cpu;
  {
-    ASSERT_EQ(from_cpu.DeviceIdx(), Context::kCpuId);
+    ASSERT_TRUE(from_cpu.Device().IsCPU());
    Context cpu_ctx;
    learner->SetParam("device", cpu_ctx.DeviceName());
    learner->Predict(m_test, false, &from_cpu, 0, 0);
@@ -206,7 +206,7 @@ void TestPredictionDeviceAccess() {
    Context cuda_ctx = MakeCUDACtx(0);
    learner->SetParam("device", cuda_ctx.DeviceName());
    learner->Predict(m_test, false, &from_cuda, 0, 0);
-    ASSERT_EQ(from_cuda.DeviceIdx(), 0);
+    ASSERT_EQ(from_cuda.Device(), DeviceOrd::CUDA(0));
    ASSERT_TRUE(from_cuda.DeviceCanWrite());
    ASSERT_FALSE(from_cuda.HostCanRead());
  }
@@ -351,7 +351,7 @@ void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split) {
 void TestIterationRange(Context const* ctx) {
  size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10;
  auto dmat = RandomDataGenerator(kRows, kCols, 0)
-                  .Device(ctx->gpu_id)
+                  .Device(ctx->Device())
                  .GenerateDMatrix(true, true, kClasses);
  auto learner = LearnerForTest(ctx, dmat, kIters, kForest);

@@ -522,7 +522,7 @@ void TestSparsePrediction(Context const *ctx, float sparsity) {

  if (ctx->IsCUDA()) {
    learner->SetParam("tree_method", "gpu_hist");
-    learner->SetParam("gpu_id", std::to_string(ctx->gpu_id));
+    learner->SetParam("device", ctx->Device().Name());
  }
  learner->Predict(Xy, false, &sparse_predt, 0, 0);

@@ -620,7 +620,7 @@ void TestVectorLeafPrediction(Context const *ctx) {
  size_t constexpr kCols = 5;

  LearnerModelParam mparam{static_cast<bst_feature_t>(kCols),
-                           linalg::Vector<float>{{0.5}, {1}, Context::kCpuId}, 1, 3,
+                           linalg::Vector<float>{{0.5}, {1}, DeviceOrd::CPU()}, 1, 3,
                           MultiStrategy::kMultiOutputTree};

  std::vector<std::unique_ptr<RegTree>> trees;
--- a/tests/cpp/test_context.cc
+++ b/tests/cpp/test_context.cc
@@ -5,11 +5,13 @@
 #include <xgboost/base.h>
 #include <xgboost/context.h>

+#include <sstream>
+
 namespace xgboost {
 TEST(Context, CPU) {
  Context ctx;
  ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());
-  ASSERT_EQ(ctx.Ordinal(), Context::kCpuId);
+  ASSERT_EQ(ctx.Ordinal(), DeviceOrd::CPUOrdinal());

  std::int32_t flag{0};
  ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });
@@ -27,5 +29,20 @@ TEST(Context, CPU) {
  ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":gpu"}}), dmlc::Error);
  ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":0"}}), dmlc::Error);
  ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ""}}), dmlc::Error);
+
+  std::stringstream ss;
+  ss << ctx.Device();
+  ASSERT_EQ(ss.str(), "cpu");
+}
+
+TEST(Context, ErrorInit) {
+  Context ctx;
+  ASSERT_THROW({ ctx.Init({{"foo", "bar"}}); }, dmlc::Error);
+  try {
+    ctx.Init({{"foo", "bar"}});
+  } catch (dmlc::Error const& e) {
+    auto msg = std::string{e.what()};
+    ASSERT_NE(msg.find("foo"), std::string::npos);
+  }
 }
 }  // namespace xgboost
--- a/tests/cpp/test_context.cu
+++ b/tests/cpp/test_context.cu
@@ -13,7 +13,6 @@
 namespace xgboost {
 namespace {
 void TestCUDA(Context const& ctx, bst_d_ordinal_t ord) {
-  ASSERT_EQ(ctx.gpu_id, ord);
  ASSERT_EQ(ctx.Device().ordinal, ord);
  ASSERT_EQ(ctx.DeviceName(), "cuda:" + std::to_string(ord));
  ASSERT_EQ(ctx.Ordinal(), ord);
@@ -25,7 +24,7 @@ void TestCUDA(Context const& ctx, bst_d_ordinal_t ord) {
  Context new_ctx;
  FromJson(jctx, &new_ctx);
  ASSERT_EQ(new_ctx.Device(), ctx.Device());
-  ASSERT_EQ(new_ctx.gpu_id, ctx.gpu_id);
+  ASSERT_EQ(new_ctx.Ordinal(), ctx.Ordinal());
 }
 }  // namespace

@@ -53,7 +52,7 @@ TEST(Context, DeviceOrdinal) {

  auto cpu_ctx = ctx.MakeCPU();
  ASSERT_TRUE(cpu_ctx.IsCPU());
-  ASSERT_EQ(cpu_ctx.Ordinal(), Context::kCpuId);
+  ASSERT_EQ(cpu_ctx.Ordinal(), DeviceOrd::CPUOrdinal());
  ASSERT_EQ(cpu_ctx.Device(), DeviceOrd::CPU());

  auto cuda_ctx = cpu_ctx.MakeCUDA(ctx.Ordinal());
--- a/tests/cpp/test_serialization.cc
+++ b/tests/cpp/test_serialization.cc
@@ -210,9 +210,9 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr
    }
    // Pull data to device
    for (auto &batch : p_dmat->GetBatches<SparsePage>()) {
-      batch.data.SetDevice(0);
+      batch.data.SetDevice(DeviceOrd::CUDA(0));
      batch.data.DeviceSpan();
-      batch.offset.SetDevice(0);
+      batch.offset.SetDevice(DeviceOrd::CUDA(0));
      batch.offset.DeviceSpan();
    }

--- a/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu
+++ b/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2020-2022 by XGBoost contributors
+/**
+ * Copyright 2020-2023, XGBoost contributors
 */
 #include <gtest/gtest.h>
 #include <thrust/host_vector.h>
@@ -9,9 +9,7 @@
 #include "../../histogram_helpers.h"
 #include "../test_evaluate_splits.h"  // TestPartitionBasedSplit

-namespace xgboost {
-namespace tree {
-
+namespace xgboost::tree {
 namespace {
 auto ZeroParam() {
  auto args = Args{{"min_child_weight", "0"}, {"lambda", "0"}};
@@ -37,11 +35,12 @@ thrust::device_vector<GradientPairInt64> ConvertToInteger(std::vector<GradientPa
 }

 TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
+  auto ctx = MakeCUDACtx(0);
  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};
  GPUTrainingParam param{param_};
-  cuts_.cut_ptrs_.SetDevice(0);
-  cuts_.cut_values_.SetDevice(0);
-  cuts_.min_vals_.SetDevice(0);
+  cuts_.cut_ptrs_.SetDevice(ctx.Device());
+  cuts_.cut_values_.SetDevice(ctx.Device());
+  cuts_.min_vals_.SetDevice(ctx.Device());
  thrust::device_vector<GradientPairInt64> feature_histogram{ConvertToInteger(feature_histogram_)};

  dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
@@ -57,9 +56,10 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
                                          cuts_.min_vals_.ConstDeviceSpan(),
                                          false};

-  GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(feature_set.size()), 0};
+  GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};

-  evaluator.Reset(cuts_, dh::ToSpan(feature_types), feature_set.size(), param_, false, 0);
+  evaluator.Reset(cuts_, dh::ToSpan(feature_types), feature_set.size(), param_, false,
+                  ctx.Device());
  DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;

  ASSERT_EQ(result.thresh, 1);
@@ -69,6 +69,7 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
 }

 TEST(GpuHist, PartitionBasic) {
+  auto ctx = MakeCUDACtx(0);
  TrainParam tparam = ZeroParam();
  tparam.max_cat_to_onehot = 0;
  GPUTrainingParam param{tparam};
@@ -77,9 +78,9 @@ TEST(GpuHist, PartitionBasic) {
  cuts.cut_values_.HostVector() = std::vector<float>{0.0, 1.0, 2.0};
  cuts.cut_ptrs_.HostVector() = std::vector<uint32_t>{0, 3};
  cuts.min_vals_.HostVector() = std::vector<float>{0.0};
-  cuts.cut_ptrs_.SetDevice(0);
-  cuts.cut_values_.SetDevice(0);
-  cuts.min_vals_.SetDevice(0);
+  cuts.cut_ptrs_.SetDevice(ctx.Device());
+  cuts.cut_values_.SetDevice(ctx.Device());
+  cuts.min_vals_.SetDevice(ctx.Device());
  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};

  thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
@@ -100,8 +101,8 @@ TEST(GpuHist, PartitionBasic) {
      false,
  };

-  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
-  evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
+  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
+  evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, ctx.Device());

  {
    // -1.0s go right
@@ -183,6 +184,7 @@ TEST(GpuHist, PartitionBasic) {
 }

 TEST(GpuHist, PartitionTwoFeatures) {
+  auto ctx = MakeCUDACtx(0);
  TrainParam tparam = ZeroParam();
  tparam.max_cat_to_onehot = 0;
  GPUTrainingParam param{tparam};
@@ -191,9 +193,9 @@ TEST(GpuHist, PartitionTwoFeatures) {
  cuts.cut_values_.HostVector() = std::vector<float>{0.0, 1.0, 2.0, 0.0, 1.0, 2.0};
  cuts.cut_ptrs_.HostVector() = std::vector<uint32_t>{0, 3, 6};
  cuts.min_vals_.HostVector() = std::vector<float>{0.0, 0.0};
-  cuts.cut_ptrs_.SetDevice(0);
-  cuts.cut_values_.SetDevice(0);
-  cuts.min_vals_.SetDevice(0);
+  cuts.cut_ptrs_.SetDevice(ctx.Device());
+  cuts.cut_values_.SetDevice(ctx.Device());
+  cuts.min_vals_.SetDevice(ctx.Device());
  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};

  thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
@@ -212,8 +214,8 @@ TEST(GpuHist, PartitionTwoFeatures) {
                                          cuts.min_vals_.ConstDeviceSpan(),
                                          false};

-  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
-  evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
+  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
+  evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, ctx.Device());

  {
    auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
@@ -243,6 +245,7 @@ TEST(GpuHist, PartitionTwoFeatures) {
 }

 TEST(GpuHist, PartitionTwoNodes) {
+  auto ctx = MakeCUDACtx(0);
  TrainParam tparam = ZeroParam();
  tparam.max_cat_to_onehot = 0;
  GPUTrainingParam param{tparam};
@@ -251,9 +254,9 @@ TEST(GpuHist, PartitionTwoNodes) {
  cuts.cut_values_.HostVector() = std::vector<float>{0.0, 1.0, 2.0};
  cuts.cut_ptrs_.HostVector() = std::vector<uint32_t>{0, 3};
  cuts.min_vals_.HostVector() = std::vector<float>{0.0};
-  cuts.cut_ptrs_.SetDevice(0);
-  cuts.cut_values_.SetDevice(0);
-  cuts.min_vals_.SetDevice(0);
+  cuts.cut_ptrs_.SetDevice(ctx.Device());
+  cuts.cut_values_.SetDevice(ctx.Device());
+  cuts.min_vals_.SetDevice(ctx.Device());
  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};

  thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
@@ -272,8 +275,10 @@ TEST(GpuHist, PartitionTwoNodes) {
                                          cuts.min_vals_.ConstDeviceSpan(),
                                          false};

-  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
-  evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
+  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()),
+                             ctx.Device()};
+  evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false,
+                  ctx.Device());

  {
    auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
@@ -295,12 +300,14 @@ TEST(GpuHist, PartitionTwoNodes) {
 }

 void TestEvaluateSingleSplit(bool is_categorical) {
+  auto ctx = MakeCUDACtx(0);
  auto quantiser = DummyRoundingFactor();
  auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
  TrainParam tparam = ZeroParam();
  GPUTrainingParam param{tparam};

-  common::HistogramCuts cuts{MakeCutsForTest({1.0, 2.0, 11.0, 12.0}, {0, 2, 4}, {0.0, 0.0}, 0)};
+  common::HistogramCuts cuts{
+      MakeCutsForTest({1.0, 2.0, 11.0, 12.0}, {0, 2, 4}, {0.0, 0.0}, ctx.Device())};
  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};

  // Setup gradients so that second feature gets higher gain
@@ -325,8 +332,10 @@ void TestEvaluateSingleSplit(bool is_categorical) {
                                          cuts.min_vals_.ConstDeviceSpan(),
                                          false};

-  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
-  evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
+  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()),
+                             ctx.Device()};
+  evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false,
+                  ctx.Device());
  DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;

  EXPECT_EQ(result.findex, 1);
@@ -363,7 +372,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
                                          dh::ToSpan(feature_min_values),
                                          false};

-  GPUHistEvaluator evaluator(tparam, feature_set.size(), 0);
+  GPUHistEvaluator evaluator(tparam, feature_set.size(), FstCU());
  DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;

  EXPECT_EQ(result.findex, 0);
@@ -375,7 +384,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {

 TEST(GpuHist, EvaluateSingleSplitEmpty) {
  TrainParam tparam = ZeroParam();
-  GPUHistEvaluator evaluator(tparam, 1, 0);
+  GPUHistEvaluator evaluator(tparam, 1, FstCU());
  DeviceSplitCandidate result =
      evaluator
          .EvaluateSingleSplit(
@@ -410,7 +419,7 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
                                          dh::ToSpan(feature_min_values),
                                          false};

-  GPUHistEvaluator evaluator(tparam, feature_min_values.size(), 0);
+  GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
  DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;

  EXPECT_EQ(result.findex, 1);
@@ -442,7 +451,7 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
                                          dh::ToSpan(feature_min_values),
                                          false};

-  GPUHistEvaluator evaluator(tparam, feature_min_values.size(), 0);
+  GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
  DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;

  EXPECT_EQ(result.findex, 0);
@@ -477,7 +486,8 @@ TEST(GpuHist, EvaluateSplits) {
                                          dh::ToSpan(feature_min_values),
                                          false};

-  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_min_values.size()), 0};
+  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_min_values.size()),
+                             FstCU()};
  dh::device_vector<EvaluateSplitInputs> inputs =
      std::vector<EvaluateSplitInputs>{input_left, input_right};
  evaluator.LaunchEvaluateSplits(input_left.feature_set.size(), dh::ToSpan(inputs), shared_inputs,
@@ -493,14 +503,15 @@ TEST(GpuHist, EvaluateSplits) {
 }

 TEST_F(TestPartitionBasedSplit, GpuHist) {
+  auto ctx = MakeCUDACtx(0);
  dh::device_vector<FeatureType> ft{std::vector<FeatureType>{FeatureType::kCategorical}};
-  GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(info_.num_col_), 0};
+  GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(info_.num_col_), ctx.Device()};

-  cuts_.cut_ptrs_.SetDevice(0);
-  cuts_.cut_values_.SetDevice(0);
-  cuts_.min_vals_.SetDevice(0);
+  cuts_.cut_ptrs_.SetDevice(ctx.Device());
+  cuts_.cut_values_.SetDevice(ctx.Device());
+  cuts_.min_vals_.SetDevice(ctx.Device());

-  evaluator.Reset(cuts_, dh::ToSpan(ft), info_.num_col_, param_, false, 0);
+  evaluator.Reset(cuts_, dh::ToSpan(ft), info_.num_col_, param_, false, ctx.Device());

  // Convert the sample histogram to fixed point
  auto quantiser = DummyRoundingFactor();
@@ -528,15 +539,16 @@ class MGPUHistTest : public BaseMGPUTest {};

 namespace {
 void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
+  auto ctx = MakeCUDACtx(GPUIDX);
  auto rank = collective::GetRank();
  auto quantiser = DummyRoundingFactor();
  auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
  TrainParam tparam = ZeroParam();
  GPUTrainingParam param{tparam};

-  common::HistogramCuts cuts{rank == 0
-                                 ? MakeCutsForTest({1.0, 2.0}, {0, 2, 2}, {0.0, 0.0}, GPUIDX)
-                                 : MakeCutsForTest({11.0, 12.0}, {0, 0, 2}, {0.0, 0.0}, GPUIDX)};
+  common::HistogramCuts cuts{
+      rank == 0 ? MakeCutsForTest({1.0, 2.0}, {0, 2, 2}, {0.0, 0.0}, ctx.Device())
+                : MakeCutsForTest({11.0, 12.0}, {0, 0, 2}, {0.0, 0.0}, ctx.Device())};
  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};

  // Setup gradients so that second feature gets higher gain
@@ -562,8 +574,8 @@ void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
                                          cuts.min_vals_.ConstDeviceSpan(),
                                          false};

-  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), GPUIDX};
-  evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, true, GPUIDX);
+  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
+  evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, true, ctx.Device());
  DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;

  EXPECT_EQ(result.findex, 1) << "rank: " << rank;
@@ -583,5 +595,4 @@ TEST_F(MGPUHistTest, ColumnSplitEvaluateSingleSplit) {
 TEST_F(MGPUHistTest, ColumnSplitEvaluateSingleCategoricalSplit) {
  DoTest(VerifyColumnSplitEvaluateSingleSplit, true);
 }
-}  // namespace tree
-}  // namespace xgboost
+}  // namespace xgboost::tree
--- a/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu
+++ b/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu
@@ -30,9 +30,9 @@ void VerifySampling(size_t page_size,
  for (const auto& gp : gpair.ConstHostVector()) {
    sum_gpair += gp;
  }
-  gpair.SetDevice(0);
-
  Context ctx{MakeCUDACtx(0)};
+  gpair.SetDevice(ctx.Device());
+
  auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
  auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
  if (page_size != 0) {
@@ -87,9 +87,9 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
  std::unique_ptr<DMatrix> dmat(
      CreateSparsePageDMatrix(kRows, kCols, kRows / kPageSize, tmpdir.path + "/cache"));
  auto gpair = GenerateRandomGradients(kRows);
-  gpair.SetDevice(0);
-
  Context ctx{MakeCUDACtx(0)};
+  gpair.SetDevice(ctx.Device());
+
  auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
  auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
  EXPECT_NE(page->n_rows, kRows);
--- a/tests/cpp/tree/gpu_hist/test_histogram.cu
+++ b/tests/cpp/tree/gpu_hist/test_histogram.cu
@@ -12,9 +12,7 @@
 #include "../../categorical_helpers.h"
 #include "../../helpers.h"

-namespace xgboost {
-namespace tree {
-
+namespace xgboost::tree {
 void TestDeterministicHistogram(bool is_dense, int shm_size) {
  Context ctx = MakeCUDACtx(0);
  size_t constexpr kBins = 256, kCols = 120, kRows = 16384, kRounds = 16;
@@ -27,22 +25,22 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
  for (auto const& batch : matrix->GetBatches<EllpackPage>(&ctx, batch_param)) {
    auto* page = batch.Impl();

-    tree::RowPartitioner row_partitioner(0, kRows);
+    tree::RowPartitioner row_partitioner(FstCU(), kRows);
    auto ridx = row_partitioner.GetRows(0);

    int num_bins = kBins * kCols;
    dh::device_vector<GradientPairInt64> histogram(num_bins);
    auto d_histogram = dh::ToSpan(histogram);
    auto gpair = GenerateRandomGradients(kRows, kLower, kUpper);
-    gpair.SetDevice(0);
+    gpair.SetDevice(FstCU());

    FeatureGroups feature_groups(page->Cuts(), page->is_dense, shm_size,
                                 sizeof(GradientPairInt64));

    auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
-    BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
-                           feature_groups.DeviceAccessor(0), gpair.DeviceSpan(), ridx, d_histogram,
-                           quantiser);
+    BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
+                           feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
+                           d_histogram, quantiser);

    std::vector<GradientPairInt64> histogram_h(num_bins);
    dh::safe_cuda(cudaMemcpy(histogram_h.data(), d_histogram.data(),
@@ -54,8 +52,8 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
      auto d_new_histogram = dh::ToSpan(new_histogram);

      auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
-      BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
-                             feature_groups.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
+      BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
+                             feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
                             d_new_histogram, quantiser);

      std::vector<GradientPairInt64> new_histogram_h(num_bins);
@@ -70,14 +68,14 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {

    {
      auto gpair = GenerateRandomGradients(kRows, kLower, kUpper);
-      gpair.SetDevice(0);
+      gpair.SetDevice(FstCU());

      // Use a single feature group to compute the baseline.
      FeatureGroups single_group(page->Cuts());

      dh::device_vector<GradientPairInt64> baseline(num_bins);
-      BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
-                             single_group.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
+      BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
+                             single_group.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
                             dh::ToSpan(baseline), quantiser);

      std::vector<GradientPairInt64> baseline_h(num_bins);
@@ -126,11 +124,11 @@ void TestGPUHistogramCategorical(size_t num_categories) {
  auto cat_m = GetDMatrixFromData(x, kRows, 1);
  cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
  auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()};
-  tree::RowPartitioner row_partitioner(0, kRows);
+  tree::RowPartitioner row_partitioner(ctx.Device(), kRows);
  auto ridx = row_partitioner.GetRows(0);
  dh::device_vector<GradientPairInt64> cat_hist(num_categories);
  auto gpair = GenerateRandomGradients(kRows, 0, 2);
-  gpair.SetDevice(0);
+  gpair.SetDevice(DeviceOrd::CUDA(0));
  auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
  /**
   * Generate hist with cat data.
@@ -138,8 +136,8 @@ void TestGPUHistogramCategorical(size_t num_categories) {
  for (auto const &batch : cat_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
    auto* page = batch.Impl();
    FeatureGroups single_group(page->Cuts());
-    BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
-                           single_group.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
+    BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()),
+                           single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
                           dh::ToSpan(cat_hist), quantiser);
  }

@@ -152,8 +150,8 @@ void TestGPUHistogramCategorical(size_t num_categories) {
  for (auto const &batch : encode_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
    auto* page = batch.Impl();
    FeatureGroups single_group(page->Cuts());
-    BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
-                           single_group.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
+    BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()),
+                           single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
                           dh::ToSpan(encode_hist), quantiser);
  }

@@ -241,5 +239,4 @@ void TestAtomicAdd() {
 TEST(Histogram, AtomicAddInt64) {
  TestAtomicAdd();
 }
-}  // namespace tree
-}  // namespace xgboost
+}  // namespace xgboost::tree
--- a/tests/cpp/tree/gpu_hist/test_row_partitioner.cu
+++ b/tests/cpp/tree/gpu_hist/test_row_partitioner.cu
@@ -16,12 +16,10 @@
 #include "xgboost/task.h"
 #include "xgboost/tree_model.h"

-namespace xgboost {
-namespace tree {
-
+namespace xgboost::tree {
 void TestUpdatePositionBatch() {
  const int kNumRows = 10;
-  RowPartitioner rp(0, kNumRows);
+  RowPartitioner rp(FstCU(), kNumRows);
  auto rows = rp.GetRowsHost(0);
  EXPECT_EQ(rows.size(), kNumRows);
  for (auto i = 0ull; i < kNumRows; i++) {
@@ -89,12 +87,11 @@ void TestSortPositionBatch(const std::vector<int>& ridx_in, const std::vector<Se
  }
 }

-TEST(GpuHist, SortPositionBatch) { 
-  TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 3}, {3, 6}}); 
-  TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 1}, {3, 6}}); 
+TEST(GpuHist, SortPositionBatch) {
+  TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 3}, {3, 6}});
+  TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 1}, {3, 6}});
  TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 6}});
  TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{3, 6}, {0, 2}});
 }

-}  // namespace tree
-}  // namespace xgboost
+}  // namespace xgboost::tree
--- a/tests/cpp/tree/hist/test_evaluate_splits.cc
+++ b/tests/cpp/tree/hist/test_evaluate_splits.cc
@@ -115,7 +115,7 @@ TEST(HistMultiEvaluator, Evaluate) {
  HistMultiEvaluator evaluator{&ctx, p_fmat->Info(), &param, sampler};
  HistMakerTrainParam hist_param;
  std::vector<BoundedHistCollection> histogram(n_targets);
-  linalg::Vector<GradientPairPrecise> root_sum({2}, Context::kCpuId);
+  linalg::Vector<GradientPairPrecise> root_sum({2}, DeviceOrd::CPU());
  for (bst_target_t t{0}; t < n_targets; ++t) {
    auto &hist = histogram[t];
    hist.Reset(n_bins * n_features, hist_param.max_cached_hist_node);
--- a/tests/cpp/tree/test_evaluate_splits.h
+++ b/tests/cpp/tree/test_evaluate_splits.h
@@ -76,7 +76,7 @@ class TestPartitionBasedSplit : public ::testing::Test {
                                                     GradientPairPrecise parent_sum) {
      int32_t best_thresh = -1;
      float best_score{-std::numeric_limits<float>::infinity()};
-      TreeEvaluator evaluator{param_, static_cast<bst_feature_t>(n_feat), -1};
+      TreeEvaluator evaluator{param_, static_cast<bst_feature_t>(n_feat), DeviceOrd::CPU()};
      auto tree_evaluator = evaluator.GetEvaluator<TrainParam>();
      GradientPairPrecise left_sum;
      auto parent_gain = tree_evaluator.CalcGain(0, param_, GradStats{total_gpair_});
@@ -111,13 +111,13 @@ class TestPartitionBasedSplit : public ::testing::Test {
 };

 inline auto MakeCutsForTest(std::vector<float> values, std::vector<uint32_t> ptrs,
-                            std::vector<float> min_values, int32_t device) {
+                            std::vector<float> min_values, DeviceOrd device) {
  common::HistogramCuts cuts;
  cuts.cut_values_.HostVector() = values;
  cuts.cut_ptrs_.HostVector() = ptrs;
  cuts.min_vals_.HostVector() = min_values;

-  if (device >= 0) {
+  if (device.IsCUDA()) {
    cuts.cut_ptrs_.SetDevice(device);
    cuts.cut_values_.SetDevice(device);
    cuts.min_vals_.SetDevice(device);
@@ -136,7 +136,7 @@ class TestCategoricalSplitWithMissing : public testing::Test {
  TrainParam param_;

  void SetUp() override {
-    cuts_ = MakeCutsForTest({0.0, 1.0, 2.0, 3.0}, {0, 4}, {0.0}, -1);
+    cuts_ = MakeCutsForTest({0.0, 1.0, 2.0, 3.0}, {0, 4}, {0.0}, DeviceOrd::CPU());
    auto max_cat = *std::max_element(cuts_.cut_values_.HostVector().begin(),
                                     cuts_.cut_values_.HostVector().end());
    cuts_.SetCategorical(true, max_cat);
--- a/tests/cpp/tree/test_gpu_hist.cu
+++ b/tests/cpp/tree/test_gpu_hist.cu
@@ -29,7 +29,7 @@ TEST(GpuHist, DeviceHistogram) {
  constexpr int kNNodes = 4;
  constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;
  DeviceHistogramStorage<kStopGrowing> histogram;
-  histogram.Init(0, kNBins);
+  histogram.Init(FstCU(), kNBins);
  for (int i = 0; i < kNNodes; ++i) {
    histogram.AllocateHistograms({i});
  }
@@ -102,12 +102,12 @@ void TestBuildHist(bool use_shared_memory_histograms) {
    bst_float hess = dist(&gen);
    gp = GradientPair(grad, hess);
  }
-  gpair.SetDevice(0);
+  gpair.SetDevice(DeviceOrd::CUDA(0));

  thrust::host_vector<common::CompressedByteT> h_gidx_buffer (page->gidx_buffer.HostVector());
-  maker.row_partitioner = std::make_unique<RowPartitioner>(0, kNRows);
+  maker.row_partitioner = std::make_unique<RowPartitioner>(FstCU(), kNRows);

-  maker.hist.Init(0, page->Cuts().TotalBins());
+  maker.hist.Init(FstCU(), page->Cuts().TotalBins());
  maker.hist.AllocateHistograms({0});

  maker.gpair = gpair.DeviceSpan();
@@ -116,8 +116,8 @@ void TestBuildHist(bool use_shared_memory_histograms) {

  maker.InitFeatureGroupsOnce();

-  BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
-                         maker.feature_groups->DeviceAccessor(0), gpair.DeviceSpan(),
+  BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(DeviceOrd::CUDA(0)),
+                         maker.feature_groups->DeviceAccessor(DeviceOrd::CUDA(0)), gpair.DeviceSpan(),
                         maker.row_partitioner->GetRows(0), maker.hist.GetNodeHistogram(0),
                         *maker.quantiser, !use_shared_memory_histograms);

@@ -198,7 +198,7 @@ void TestHistogramIndexImpl() {
  // histogram index
  const auto &maker = hist_maker.maker;
  auto grad = GenerateRandomGradients(kNRows);
-  grad.SetDevice(0);
+  grad.SetDevice(DeviceOrd::CUDA(0));
  maker->Reset(&grad, hist_maker_dmat.get(), kNCols);
  std::vector<common::CompressedByteT> h_gidx_buffer(maker->page->gidx_buffer.HostVector());

@@ -264,17 +264,17 @@ TEST(GpuHist, UniformSampling) {
  // Create an in-memory DMatrix.
  std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, 0, true));

-  linalg::Matrix<GradientPair> gpair({kRows}, Context{}.MakeCUDA().Ordinal());
+  linalg::Matrix<GradientPair> gpair({kRows}, Context{}.MakeCUDA().Device());
  gpair.Data()->Copy(GenerateRandomGradients(kRows));

  // Build a tree using the in-memory DMatrix.
  RegTree tree;
-  HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
+  HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
  Context ctx(MakeCUDACtx(0));
  UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
  // Build another tree using sampling.
  RegTree tree_sampling;
-  HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
+  HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, DeviceOrd::CUDA(0));
  UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, "uniform",
             kRows);

@@ -295,18 +295,18 @@ TEST(GpuHist, GradientBasedSampling) {
  // Create an in-memory DMatrix.
  std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, 0, true));

-  linalg::Matrix<GradientPair> gpair({kRows}, MakeCUDACtx(0).Ordinal());
+  linalg::Matrix<GradientPair> gpair({kRows}, MakeCUDACtx(0).Device());
  gpair.Data()->Copy(GenerateRandomGradients(kRows));

  // Build a tree using the in-memory DMatrix.
  RegTree tree;
-  HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
+  HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
  Context ctx(MakeCUDACtx(0));
  UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);

  // Build another tree using sampling.
  RegTree tree_sampling;
-  HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
+  HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, DeviceOrd::CUDA(0));
  UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
             "gradient_based", kRows);

@@ -333,16 +333,16 @@ TEST(GpuHist, ExternalMemory) {
  std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrix(kRows, kCols, 1, tmpdir.path + "/cache"));

  Context ctx(MakeCUDACtx(0));
-  linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
+  linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
  gpair.Data()->Copy(GenerateRandomGradients(kRows));

  // Build a tree using the in-memory DMatrix.
  RegTree tree;
-  HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
+  HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
  UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
  // Build another tree using multiple ELLPACK pages.
  RegTree tree_ext;
-  HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
+  HostDeviceVector<bst_float> preds_ext(kRows, 0.0, DeviceOrd::CUDA(0));
  UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows);

  // Make sure the predictions are the same.
@@ -371,20 +371,20 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
      CreateSparsePageDMatrix(kRows, kCols, kRows / kPageSize, tmpdir.path + "/cache"));

  Context ctx(MakeCUDACtx(0));
-  linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
+  linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
  gpair.Data()->Copy(GenerateRandomGradients(kRows));

  // Build a tree using the in-memory DMatrix.
  auto rng = common::GlobalRandom();

  RegTree tree;
-  HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
+  HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
  UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod, kRows);

  // Build another tree using multiple ELLPACK pages.
  common::GlobalRandom() = rng;
  RegTree tree_ext;
-  HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
+  HostDeviceVector<bst_float> preds_ext(kRows, 0.0, DeviceOrd::CUDA(0));
  UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, kSubsample,
             kSamplingMethod, kRows);

@@ -436,7 +436,7 @@ RegTree GetHistTree(Context const* ctx, DMatrix* dmat) {
  TrainParam param;
  param.UpdateAllowUnknown(Args{});

-  linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Ordinal());
+  linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Device());
  gpair.Data()->Copy(GenerateRandomGradients(dmat->Info().num_row_));

  std::vector<HostDeviceVector<bst_node_t>> position(1);
@@ -486,7 +486,7 @@ RegTree GetApproxTree(Context const* ctx, DMatrix* dmat) {
  TrainParam param;
  param.UpdateAllowUnknown(Args{});

-  linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Ordinal());
+  linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Device());
  gpair.Data()->Copy(GenerateRandomGradients(dmat->Info().num_row_));

  std::vector<HostDeviceVector<bst_node_t>> position(1);
--- a/tests/cpp/tree/test_histmaker.cc
+++ b/tests/cpp/tree/test_histmaker.cc
@@ -28,7 +28,7 @@ TEST(GrowHistMaker, InteractionConstraint) {
  auto p_dmat = GenerateDMatrix(kRows, kCols);
  Context ctx;

-  linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
+  linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
  gpair.Data()->Copy(GenerateRandomGradients(kRows));

  ObjInfo task{ObjInfo::kRegression};
@@ -74,7 +74,7 @@ void VerifyColumnSplit(int32_t rows, bst_feature_t cols, bool categorical,
                       RegTree const& expected_tree) {
  Context ctx;
  auto p_dmat = GenerateDMatrix(rows, cols, categorical);
-  linalg::Matrix<GradientPair> gpair({rows}, ctx.Ordinal());
+  linalg::Matrix<GradientPair> gpair({rows}, ctx.Device());
  gpair.Data()->Copy(GenerateRandomGradients(rows));


@@ -107,7 +107,7 @@ void TestColumnSplit(bool categorical) {
  {
    Context ctx;
    auto p_dmat = GenerateDMatrix(kRows, kCols, categorical);
-    linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
+    linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
    gpair.Data()->Copy(GenerateRandomGradients(kRows));
    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
    std::vector<HostDeviceVector<bst_node_t>> position(1);
--- a/tests/cpp/tree/test_multi_target_tree_model.cc
+++ b/tests/cpp/tree/test_multi_target_tree_model.cc
@@ -12,9 +12,9 @@ TEST(MultiTargetTree, JsonIO) {
  bst_feature_t n_features{4};
  RegTree tree{n_targets, n_features};
  ASSERT_TRUE(tree.IsMultiTarget());
-  linalg::Vector<float> base_weight{{1.0f, 2.0f, 3.0f}, {3ul}, Context::kCpuId};
-  linalg::Vector<float> left_weight{{2.0f, 3.0f, 4.0f}, {3ul}, Context::kCpuId};
-  linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, Context::kCpuId};
+  linalg::Vector<float> base_weight{{1.0f, 2.0f, 3.0f}, {3ul}, DeviceOrd::CPU()};
+  linalg::Vector<float> left_weight{{2.0f, 3.0f, 4.0f}, {3ul}, DeviceOrd::CPU()};
+  linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, DeviceOrd::CPU()};
  tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),
                  left_weight.HostView(), right_weight.HostView());
  ASSERT_EQ(tree.NumNodes(), 3);
--- a/tests/cpp/tree/test_tree_stat.cc
+++ b/tests/cpp/tree/test_tree_stat.cc
@@ -33,7 +33,7 @@ class UpdaterTreeStatTest : public ::testing::Test {
    ObjInfo task{ObjInfo::kRegression};
    param.Init(Args{});

-    Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(Context::kCpuId));
+    Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(DeviceOrd::CPUOrdinal()));
    auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
    up->Configure(Args{});
    RegTree tree{1u, kCols};
@@ -78,7 +78,7 @@ class UpdaterEtaTest : public ::testing::Test {
  void RunTest(std::string updater) {
    ObjInfo task{ObjInfo::kClassification};

-    Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(Context::kCpuId));
+    Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(DeviceOrd::CPUOrdinal()));

    float eta = 0.4;
    auto up_0 = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};