Support exporting cut values (#9356)

2023-07-08 15:32:41 +08:00 · 2023-07-08 15:32:41 +08:00 · 20c52f07d2
commit 20c52f07d2
parent c3124813e8
28 changed files with 722 additions and 101 deletions
--- a/include/xgboost/c_api.h
+++ b/include/xgboost/c_api.h
@ -810,7 +810,7 @@ XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle,
 */
 XGB_DLL int XGDMatrixNumNonMissing(DMatrixHandle handle, bst_ulong *out);
-/*!
+/**
 * \brief Get the predictors from DMatrix as CSR matrix for testing.  If this is a
 *        quantized DMatrix, quantized values are returned instead.
 *
@ -819,8 +819,10 @@ XGB_DLL int XGDMatrixNumNonMissing(DMatrixHandle handle, bst_ulong *out);
 * XGBoost. This is to avoid allocating a huge memory buffer that can not be freed until
 * exiting the thread.
 *
 * @since 1.7.0
 *
 * \param handle the handle to the DMatrix
- * \param config Json configuration string. At the moment it should be an empty document,
+ * \param config JSON configuration string. At the moment it should be an empty document,
 *               preserved for future use.
 * \param out_indptr  indptr of output CSR matrix.
 * \param out_indices Column index of output CSR matrix.
@ -831,6 +833,24 @@ XGB_DLL int XGDMatrixNumNonMissing(DMatrixHandle handle, bst_ulong *out);
 XGB_DLL int XGDMatrixGetDataAsCSR(DMatrixHandle const handle, char const *config,
                                  bst_ulong *out_indptr, unsigned *out_indices, float *out_data);
 /**
 * @brief Export the quantile cuts used for training histogram-based models like `hist` and
 *        `approx`. Useful for model compression.
 *
 * @since 2.0.0
 *
 * @param handle the handle to the DMatrix
 * @param config JSON configuration string. At the moment it should be an empty document,
 *               preserved for future use.
 *
 * @param out_indptr indptr of output CSC matrix represented by a JSON encoded
 *                   __(cuda_)array_interface__.
 * @param out_data   Data value of CSC matrix represented by a JSON encoded
 *                   __(cuda_)array_interface__.
 */
 XGB_DLL int XGDMatrixGetQuantileCut(DMatrixHandle const handle, char const *config,
                                     char const **out_indptr, char const **out_data);
 /** @} */  // End of DMatrix
 /**
--- a/include/xgboost/data.h
+++ b/include/xgboost/data.h
@ -282,7 +282,7 @@ struct BatchParam {
  BatchParam(bst_bin_t max_bin, common::Span<float> hessian, bool regenerate)
      : max_bin{max_bin}, hess{hessian}, regen{regenerate} {}
-  bool ParamNotEqual(BatchParam const& other) const {
+  [[nodiscard]] bool ParamNotEqual(BatchParam const& other) const {
    // Check non-floating parameters.
    bool cond = max_bin != other.max_bin;
    // Check sparse thresh.
@ -293,11 +293,11 @@ struct BatchParam {
    return cond;
  }
-  bool Initialized() const { return max_bin != 0; }
+  [[nodiscard]] bool Initialized() const { return max_bin != 0; }
  /**
   * \brief Make a copy of self for DMatrix to describe how its existing index was generated.
   */
-  BatchParam MakeCache() const {
+  [[nodiscard]] BatchParam MakeCache() const {
    auto p = *this;
    // These parameters have nothing to do with how the gradient index was generated in the
    // first place.
@ -319,7 +319,7 @@ struct HostSparsePageView {
            static_cast<Inst::index_type>(size)};
  }
-  size_t Size() const { return offset.size() == 0 ? 0 : offset.size() - 1; }
+  [[nodiscard]] size_t Size() const { return offset.size() == 0 ? 0 : offset.size() - 1; }
 };
 /*!
@ -337,7 +337,7 @@ class SparsePage {
  /*! \brief an instance of sparse vector in the batch */
  using Inst = common::Span<Entry const>;
-  HostSparsePageView GetView() const {
+  [[nodiscard]] HostSparsePageView GetView() const {
    return {offset.ConstHostSpan(), data.ConstHostSpan()};
  }
@ -353,12 +353,12 @@ class SparsePage {
  virtual ~SparsePage() = default;
  /*! \return Number of instances in the page. */
-  inline size_t Size() const {
+  [[nodiscard]] size_t Size() const {
    return offset.Size() == 0 ? 0 : offset.Size() - 1;
  }
  /*! \return estimation of memory cost of this page */
-  inline size_t MemCostBytes() const {
+  [[nodiscard]] size_t MemCostBytes() const {
    return offset.Size() * sizeof(size_t) + data.Size() * sizeof(Entry);
  }
@ -376,7 +376,7 @@ class SparsePage {
    base_rowid = row_id;
  }
-  SparsePage GetTranspose(int num_columns, int32_t n_threads) const;
+  [[nodiscard]] SparsePage GetTranspose(int num_columns, int32_t n_threads) const;
  /**
   * \brief Sort the column index.
@ -385,7 +385,7 @@ class SparsePage {
  /**
   * \brief Check wether the column index is sorted.
   */
-  bool IsIndicesSorted(int32_t n_threads) const;
+  [[nodiscard]] bool IsIndicesSorted(int32_t n_threads) const;
  /**
   * \brief Reindex the column index with an offset.
   */
@ -440,49 +440,7 @@ class SortedCSCPage : public SparsePage {
  explicit SortedCSCPage(SparsePage page) : SparsePage(std::move(page)) {}
 };
-class EllpackPageImpl;
+class EllpackPage;
 /*!
 * \brief A page stored in ELLPACK format.
 *
 * This class uses the PImpl idiom (https://en.cppreference.com/w/cpp/language/pimpl) to avoid
 * including CUDA-specific implementation details in the header.
 */
 class EllpackPage {
 public:
  /*!
   * \brief Default constructor.
   *
   * This is used in the external memory case. An empty ELLPACK page is constructed with its content
   * set later by the reader.
   */
  EllpackPage();
  /*!
   * \brief Constructor from an existing DMatrix.
   *
   * This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix
   * in CSR format.
   */
  explicit EllpackPage(Context const* ctx, DMatrix* dmat, const BatchParam& param);
  /*! \brief Destructor. */
  ~EllpackPage();
  EllpackPage(EllpackPage&& that);
  /*! \return Number of instances in the page. */
  size_t Size() const;
  /*! \brief Set the base row id for this page. */
  void SetBaseRowId(std::size_t row_id);
  const EllpackPageImpl* Impl() const { return impl_.get(); }
  EllpackPageImpl* Impl() { return impl_.get(); }
 private:
  std::unique_ptr<EllpackPageImpl> impl_;
 };
 class GHistIndexMatrix;
 template<typename T>
@ -492,7 +450,7 @@ class BatchIteratorImpl {
  virtual ~BatchIteratorImpl() = default;
  virtual const T& operator*() const = 0;
  virtual BatchIteratorImpl& operator++() = 0;
-  virtual bool AtEnd() const = 0;
+  [[nodiscard]] virtual bool AtEnd() const = 0;
  virtual std::shared_ptr<T const> Page() const = 0;
 };
@ -519,12 +477,12 @@ class BatchIterator {
    return !impl_->AtEnd();
  }
-  bool AtEnd() const {
+  [[nodiscard]] bool AtEnd() const {
    CHECK(impl_ != nullptr);
    return impl_->AtEnd();
  }
-  std::shared_ptr<T const> Page() const {
+  [[nodiscard]] std::shared_ptr<T const> Page() const {
    return impl_->Page();
  }
@ -563,15 +521,15 @@ class DMatrix {
    this->Info().SetInfo(ctx, key, StringView{interface_str});
  }
  /*! \brief meta information of the dataset */
-  virtual const MetaInfo& Info() const = 0;
+  [[nodiscard]] virtual const MetaInfo& Info() const = 0;
  /*! \brief Get thread local memory for returning data from DMatrix. */
-  XGBAPIThreadLocalEntry& GetThreadLocal() const;
+  [[nodiscard]] XGBAPIThreadLocalEntry& GetThreadLocal() const;
  /**
   * \brief Get the context object of this DMatrix.  The context is created during construction of
   *        DMatrix with user specified `nthread` parameter.
   */
-  virtual Context const* Ctx() const = 0;
+  [[nodiscard]] virtual Context const* Ctx() const = 0;
  /**
   * \brief Gets batches. Use range based for loop over BatchSet to access individual batches.
@ -583,16 +541,16 @@ class DMatrix {
  template <typename T>
  BatchSet<T> GetBatches(Context const* ctx, const BatchParam& param);
  template <typename T>
-  bool PageExists() const;
+  [[nodiscard]] bool PageExists() const;
  // the following are column meta data, should be able to answer them fast.
  /*! \return Whether the data columns single column block. */
-  virtual bool SingleColBlock() const = 0;
+  [[nodiscard]] virtual bool SingleColBlock() const = 0;
  /*! \brief virtual destructor */
  virtual ~DMatrix();
  /*! \brief Whether the matrix is dense. */
-  bool IsDense() const {
+  [[nodiscard]] bool IsDense() const {
    return Info().num_nonzero_ == Info().num_row_ * Info().num_col_;
  }
@ -695,9 +653,9 @@ class DMatrix {
                                                      BatchParam const& param) = 0;
  virtual BatchSet<ExtSparsePage> GetExtBatches(Context const* ctx, BatchParam const& param) = 0;
-  virtual bool EllpackExists() const = 0;
+  [[nodiscard]] virtual bool EllpackExists() const = 0;
-  virtual bool GHistIndexExists() const = 0;
+  [[nodiscard]] virtual bool GHistIndexExists() const = 0;
-  virtual bool SparsePageExists() const = 0;
+  [[nodiscard]] virtual bool SparsePageExists() const = 0;
 };
 template <>
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@ -3,6 +3,7 @@
 """Core XGBoost Library."""
 import copy
 import ctypes
 import importlib.util
 import json
 import os
 import re
@ -381,6 +382,54 @@ def c_array(
    return (ctype * len(values))(*values)
 def from_array_interface(interface: dict) -> NumpyOrCupy:
    """Convert array interface to numpy or cupy array"""
    class Array:  # pylint: disable=too-few-public-methods
        """Wrapper type for communicating with numpy and cupy."""
        _interface: Optional[dict] = None
        @property
        def __array_interface__(self) -> Optional[dict]:
            return self._interface
        @__array_interface__.setter
        def __array_interface__(self, interface: dict) -> None:
            self._interface = copy.copy(interface)
            # converts some fields to tuple as required by numpy
            self._interface["shape"] = tuple(self._interface["shape"])
            self._interface["data"] = tuple(self._interface["data"])
            if self._interface.get("strides", None) is not None:
                self._interface["strides"] = tuple(self._interface["strides"])
        @property
        def __cuda_array_interface__(self) -> Optional[dict]:
            return self.__array_interface__
        @__cuda_array_interface__.setter
        def __cuda_array_interface__(self, interface: dict) -> None:
            self.__array_interface__ = interface
    arr = Array()
    if "stream" in interface:
        # CUDA stream is presented, this is a __cuda_array_interface__.
        spec = importlib.util.find_spec("cupy")
        if spec is None:
            raise ImportError("`cupy` is required for handling CUDA buffer.")
        import cupy as cp  # pylint: disable=import-error
        arr.__cuda_array_interface__ = interface
        out = cp.array(arr, copy=True)
    else:
        arr.__array_interface__ = interface
        out = np.array(arr, copy=True)
    return out
 def _prediction_output(
    shape: CNumericPtr, dims: c_bst_ulong, predts: CFloatPtr, is_cuda: bool
 ) -> NumpyOrCupy:
@ -1060,6 +1109,32 @@ class DMatrix:  # pylint: disable=too-many-instance-attributes,too-many-public-m
        )
        return ret
    def get_quantile_cut(self) -> Tuple[np.ndarray, np.ndarray]:
        """Get quantile cuts for quantization."""
        n_features = self.num_col()
        c_sindptr = ctypes.c_char_p()
        c_sdata = ctypes.c_char_p()
        config = make_jcargs()
        _check_call(
            _LIB.XGDMatrixGetQuantileCut(
                self.handle, config, ctypes.byref(c_sindptr), ctypes.byref(c_sdata)
            )
        )
        assert c_sindptr.value is not None
        assert c_sdata.value is not None
        i_indptr = json.loads(c_sindptr.value)
        indptr = from_array_interface(i_indptr)
        assert indptr.size == n_features + 1
        assert indptr.dtype == np.uint64
        i_data = json.loads(c_sdata.value)
        data = from_array_interface(i_data)
        assert data.size == indptr[-1]
        assert data.dtype == np.float32
        return indptr, data
    def num_row(self) -> int:
        """Get the number of rows in the DMatrix."""
        ret = c_bst_ulong()
--- a/python-package/xgboost/testing/init.py
+++ b/python-package/xgboost/testing/init.py
@ -265,6 +265,14 @@ def make_batches(
    return X, y, w
 def make_regression(
    n_samples: int, n_features: int, use_cupy: bool
 ) -> Tuple[ArrayLike, ArrayLike, ArrayLike]:
    """Make a simple regression dataset."""
    X, y, w = make_batches(n_samples, n_features, 1, use_cupy)
    return X[0], y[0], w[0]
 def make_batches_sparse(
    n_samples_per_batch: int, n_features: int, n_batches: int, sparsity: float
 ) -> Tuple[List[sparse.csr_matrix], List[np.ndarray], List[np.ndarray]]:
--- a/python-package/xgboost/testing/updater.py
+++ b/python-package/xgboost/testing/updater.py
@ -1,7 +1,7 @@
 """Tests for updaters."""
 import json
 from functools import partial, update_wrapper
-from typing import Dict
+from typing import Any, Dict
 import numpy as np
@ -159,3 +159,100 @@ def check_quantile_loss(tree_method: str, weighted: bool) -> None:
    for i in range(alpha.shape[0]):
        np.testing.assert_allclose(predts[:, i], predt_multi[:, i])
 def check_cut(
    n_entries: int, indptr: np.ndarray, data: np.ndarray, dtypes: Any
 ) -> None:
    """Check the cut values."""
    from pandas.api.types import is_categorical_dtype
    assert data.shape[0] == indptr[-1]
    assert data.shape[0] == n_entries
    assert indptr.dtype == np.uint64
    for i in range(1, indptr.size):
        beg = int(indptr[i - 1])
        end = int(indptr[i])
        for j in range(beg + 1, end):
            assert data[j] > data[j - 1]
            if is_categorical_dtype(dtypes[i - 1]):
                assert data[j] == data[j - 1] + 1
 def check_get_quantile_cut_device(tree_method: str, use_cupy: bool) -> None:
    """Check with optional cupy."""
    from pandas.api.types import is_categorical_dtype
    n_samples = 1024
    n_features = 14
    max_bin = 16
    dtypes = [np.float32] * n_features
    # numerical
    X, y, w = tm.make_regression(n_samples, n_features, use_cupy=use_cupy)
    # - qdm
    Xyw: xgb.DMatrix = xgb.QuantileDMatrix(X, y, weight=w, max_bin=max_bin)
    indptr, data = Xyw.get_quantile_cut()
    check_cut((max_bin + 1) * n_features, indptr, data, dtypes)
    # - dm
    Xyw = xgb.DMatrix(X, y, weight=w)
    xgb.train({"tree_method": tree_method, "max_bin": max_bin}, Xyw)
    indptr, data = Xyw.get_quantile_cut()
    check_cut((max_bin + 1) * n_features, indptr, data, dtypes)
    # - ext mem
    n_batches = 3
    n_samples_per_batch = 256
    it = tm.IteratorForTest(
        *tm.make_batches(n_samples_per_batch, n_features, n_batches, use_cupy),
        cache="cache",
    )
    Xy: xgb.DMatrix = xgb.DMatrix(it)
    xgb.train({"tree_method": tree_method, "max_bin": max_bin}, Xyw)
    indptr, data = Xyw.get_quantile_cut()
    check_cut((max_bin + 1) * n_features, indptr, data, dtypes)
    # categorical
    n_categories = 32
    X, y = tm.make_categorical(n_samples, n_features, n_categories, False, sparsity=0.8)
    if use_cupy:
        import cudf  # pylint: disable=import-error
        import cupy as cp  # pylint: disable=import-error
        X = cudf.from_pandas(X)
        y = cp.array(y)
    # - qdm
    Xy = xgb.QuantileDMatrix(X, y, max_bin=max_bin, enable_categorical=True)
    indptr, data = Xy.get_quantile_cut()
    check_cut(n_categories * n_features, indptr, data, X.dtypes)
    # - dm
    Xy = xgb.DMatrix(X, y, enable_categorical=True)
    xgb.train({"tree_method": tree_method, "max_bin": max_bin}, Xy)
    indptr, data = Xy.get_quantile_cut()
    check_cut(n_categories * n_features, indptr, data, X.dtypes)
    # mixed
    X, y = tm.make_categorical(
        n_samples, n_features, n_categories, False, sparsity=0.8, cat_ratio=0.5
    )
    n_cat_features = len([0 for dtype in X.dtypes if is_categorical_dtype(dtype)])
    n_num_features = n_features - n_cat_features
    n_entries = n_categories * n_cat_features + (max_bin + 1) * n_num_features
    # - qdm
    Xy = xgb.QuantileDMatrix(X, y, max_bin=max_bin, enable_categorical=True)
    indptr, data = Xy.get_quantile_cut()
    check_cut(n_entries, indptr, data, X.dtypes)
    # - dm
    Xy = xgb.DMatrix(X, y, enable_categorical=True)
    xgb.train({"tree_method": tree_method, "max_bin": max_bin}, Xy)
    indptr, data = Xy.get_quantile_cut()
    check_cut(n_entries, indptr, data, X.dtypes)
 def check_get_quantile_cut(tree_method: str) -> None:
    """Check the quantile cut getter."""
    use_cupy = tree_method == "gpu_hist"
    check_get_quantile_cut_device(tree_method, False)
    if use_cupy:
        check_get_quantile_cut_device(tree_method, True)
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@ -3,7 +3,7 @@
 */
 #include "xgboost/c_api.h"
-#include <algorithm>                         // for copy
+#include <algorithm>                         // for copy, transform
 #include <cinttypes>                         // for strtoimax
 #include <cmath>                             // for nan
 #include <cstring>                           // for strcmp
@ -20,9 +20,11 @@
 #include "../collective/communicator-inl.h"  // for Allreduce, Broadcast, Finalize, GetProcessor...
 #include "../common/api_entry.h"             // for XGBAPIThreadLocalEntry
 #include "../common/charconv.h"              // for from_chars, to_chars, NumericLimits, from_ch...
 #include "../common/hist_util.h"             // for HistogramCuts
 #include "../common/io.h"                    // for FileExtension, LoadSequentialFile, MemoryBuf...
 #include "../common/threading_utils.h"       // for OmpGetNumThreads, ParallelFor
 #include "../data/adapter.h"                 // for ArrayAdapter, DenseAdapter, RecordBatchesIte...
 #include "../data/ellpack_page.h"            // for EllpackPage
 #include "../data/proxy_dmatrix.h"           // for DMatrixProxy
 #include "../data/simple_dmatrix.h"          // for SimpleDMatrix
 #include "c_api_error.h"                     // for xgboost_CHECK_C_ARG_PTR, API_END, API_BEGIN
@ -785,6 +787,104 @@ XGB_DLL int XGDMatrixGetDataAsCSR(DMatrixHandle const handle, char const *config
  API_END();
 }
 namespace {
 template <typename Page>
 void GetCutImpl(Context const *ctx, std::shared_ptr<DMatrix> p_m,
                std::vector<std::uint64_t> *p_indptr, std::vector<float> *p_data) {
  auto &indptr = *p_indptr;
  auto &data = *p_data;
  for (auto const &page : p_m->GetBatches<Page>(ctx, {})) {
    auto const &cut = page.Cuts();
    auto const &ptrs = cut.Ptrs();
    indptr.resize(ptrs.size());
    auto const &vals = cut.Values();
    auto const &mins = cut.MinValues();
    bst_feature_t n_features = p_m->Info().num_col_;
    auto ft = p_m->Info().feature_types.ConstHostSpan();
    std::size_t n_categories = std::count_if(ft.cbegin(), ft.cend(),
                                             [](auto t) { return t == FeatureType::kCategorical; });
    data.resize(vals.size() + n_features - n_categories);  // |vals| + |mins|
    std::size_t i{0}, n_numeric{0};
    for (bst_feature_t fidx = 0; fidx < n_features; ++fidx) {
      CHECK_LT(i, data.size());
      bool is_numeric = !common::IsCat(ft, fidx);
      if (is_numeric) {
        data[i] = mins[fidx];
        i++;
      }
      auto beg = ptrs[fidx];
      auto end = ptrs[fidx + 1];
      CHECK_LE(end, data.size());
      std::copy(vals.cbegin() + beg, vals.cbegin() + end, data.begin() + i);
      i += (end - beg);
      // shift by min values.
      indptr[fidx] = ptrs[fidx] + n_numeric;
      if (is_numeric) {
        n_numeric++;
      }
    }
    CHECK_EQ(n_numeric, n_features - n_categories);
    indptr.back() = data.size();
    CHECK_EQ(indptr.back(), vals.size() + mins.size() - n_categories);
    break;
  }
 }
 }  // namespace
 XGB_DLL int XGDMatrixGetQuantileCut(DMatrixHandle const handle, char const *config,
                                    char const **out_indptr, char const **out_data) {
  API_BEGIN();
  CHECK_HANDLE();
  auto p_m = CastDMatrixHandle(handle);
  xgboost_CHECK_C_ARG_PTR(config);
  xgboost_CHECK_C_ARG_PTR(out_indptr);
  xgboost_CHECK_C_ARG_PTR(out_data);
  auto jconfig = Json::Load(StringView{config});
  if (!p_m->PageExists<GHistIndexMatrix>() && !p_m->PageExists<EllpackPage>()) {
    LOG(FATAL) << "The quantile cut hasn't been generated yet. Unless this is a `QuantileDMatrix`, "
                  "quantile cut is generated during training.";
  }
  // Get return buffer
  auto &data = p_m->GetThreadLocal().ret_vec_float;
  auto &indptr = p_m->GetThreadLocal().ret_vec_u64;
  if (p_m->PageExists<GHistIndexMatrix>()) {
    auto ctx = p_m->Ctx()->IsCPU() ? *p_m->Ctx() : p_m->Ctx()->MakeCPU();
    GetCutImpl<GHistIndexMatrix>(&ctx, p_m, &indptr, &data);
  } else {
    auto ctx = p_m->Ctx()->IsCUDA() ? *p_m->Ctx() : p_m->Ctx()->MakeCUDA(0);
    GetCutImpl<EllpackPage>(&ctx, p_m, &indptr, &data);
  }
  // Create a CPU context
  Context ctx;
  // Get return buffer
  auto &ret_vec_str = p_m->GetThreadLocal().ret_vec_str;
  ret_vec_str.clear();
  ret_vec_str.emplace_back(linalg::ArrayInterfaceStr(
      linalg::MakeTensorView(&ctx, common::Span{indptr.data(), indptr.size()}, indptr.size())));
  ret_vec_str.emplace_back(linalg::ArrayInterfaceStr(
      linalg::MakeTensorView(&ctx, common::Span{data.data(), data.size()}, data.size())));
  auto &charp_vecs = p_m->GetThreadLocal().ret_vec_charp;
  charp_vecs.resize(ret_vec_str.size());
  std::transform(ret_vec_str.cbegin(), ret_vec_str.cend(), charp_vecs.begin(),
                 [](auto const &str) { return str.c_str(); });
  *out_indptr = charp_vecs[0];
  *out_data = charp_vecs[1];
  API_END();
 }
 // xgboost implementation
 XGB_DLL int XGBoosterCreate(const DMatrixHandle dmats[],
                            xgboost::bst_ulong len,
--- a/src/common/api_entry.h
+++ b/src/common/api_entry.h
@ -24,6 +24,8 @@ struct XGBAPIThreadLocalEntry {
  std::vector<const char *> ret_vec_charp;
  /*! \brief returning float vector. */
  std::vector<float> ret_vec_float;
  /*! \brief returning uint vector. */
  std::vector<std::uint64_t> ret_vec_u64;
  /*! \brief temp variable of gradient pairs. */
  std::vector<GradientPair> tmp_gpair;
  /*! \brief Temp variable for returning prediction result. */
--- a/src/data/array_interface.h
+++ b/src/data/array_interface.h
@ -455,7 +455,7 @@ class ArrayInterface {
  explicit ArrayInterface(std::string const &str) : ArrayInterface{StringView{str}} {}
-  explicit ArrayInterface(StringView str) : ArrayInterface<D>{Json::Load(str)} {}
+  explicit ArrayInterface(StringView str) : ArrayInterface{Json::Load(str)} {}
  void AssignType(StringView typestr) {
    using T = ArrayInterfaceHandler::Type;
--- a/src/data/ellpack_page.cc
+++ b/src/data/ellpack_page.cc
@ -3,12 +3,20 @@
 */
 #ifndef XGBOOST_USE_CUDA
 #include "ellpack_page.h"
 #include <xgboost/data.h>
 // dummy implementation of EllpackPage in case CUDA is not used
 namespace xgboost {
-class EllpackPageImpl {};
+class EllpackPageImpl {
  common::HistogramCuts cuts_;
 public:
  [[nodiscard]] common::HistogramCuts& Cuts() { return cuts_; }
  [[nodiscard]] common::HistogramCuts const& Cuts() const { return cuts_; }
 };
 EllpackPage::EllpackPage() = default;
@ -32,6 +40,17 @@ size_t EllpackPage::Size() const {
  return 0;
 }
 [[nodiscard]] common::HistogramCuts& EllpackPage::Cuts() {
  LOG(FATAL) << "Internal Error: XGBoost is not compiled with CUDA but "
                "EllpackPage is required";
  return impl_->Cuts();
 }
 [[nodiscard]] common::HistogramCuts const& EllpackPage::Cuts() const {
  LOG(FATAL) << "Internal Error: XGBoost is not compiled with CUDA but "
                "EllpackPage is required";
  return impl_->Cuts();
 }
 }  // namespace xgboost
 #endif  // XGBOOST_USE_CUDA
--- a/src/data/ellpack_page.cu
+++ b/src/data/ellpack_page.cu
@ -4,6 +4,10 @@
 #include <thrust/iterator/discard_iterator.h>
 #include <thrust/iterator/transform_output_iterator.h>
 #include <algorithm>  // for copy
 #include <utility>    // for move
 #include <vector>     // for vector
 #include "../common/categorical.h"
 #include "../common/cuda_context.cuh"
 #include "../common/hist_util.cuh"
@ -11,6 +15,7 @@
 #include "../common/transform_iterator.h"  // MakeIndexTransformIter
 #include "./ellpack_page.cuh"
 #include "device_adapter.cuh"  // for HasInfInData
 #include "ellpack_page.h"
 #include "gradient_index.h"
 #include "xgboost/data.h"
@ -29,6 +34,16 @@ size_t EllpackPage::Size() const { return impl_->Size(); }
 void EllpackPage::SetBaseRowId(std::size_t row_id) { impl_->SetBaseRowId(row_id); }
 [[nodiscard]] common::HistogramCuts& EllpackPage::Cuts() {
  CHECK(impl_);
  return impl_->Cuts();
 }
 [[nodiscard]] common::HistogramCuts const& EllpackPage::Cuts() const {
  CHECK(impl_);
  return impl_->Cuts();
 }
 // Bin each input data entry, store the bin indices in compressed form.
 __global__ void CompressBinEllpackKernel(
    common::CompressedBufferWriter wr,
--- a/src/data/ellpack_page.cuh
+++ b/src/data/ellpack_page.cuh
@ -1,17 +1,18 @@
-/*!
+/**
- * Copyright 2019 by XGBoost Contributors
+ * Copyright 2019-2023, XGBoost Contributors
 */
-#ifndef XGBOOST_DATA_ELLPACK_PAGE_H_
+#ifndef XGBOOST_DATA_ELLPACK_PAGE_CUH_
-#define XGBOOST_DATA_ELLPACK_PAGE_H_
+#define XGBOOST_DATA_ELLPACK_PAGE_CUH_
 #include <thrust/binary_search.h>
 #include <xgboost/data.h>
 #include "../common/categorical.h"
 #include "../common/compressed_iterator.h"
 #include "../common/device_helpers.cuh"
 #include "../common/hist_util.h"
-#include "../common/categorical.h"
+#include "ellpack_page.h"
 #include <thrust/binary_search.h>
 namespace xgboost {
 /** \brief Struct for accessing and manipulating an ELLPACK matrix on the
@ -194,8 +195,8 @@ class EllpackPageImpl {
    base_rowid = row_id;
  }
-  common::HistogramCuts& Cuts() { return cuts_; }
+  [[nodiscard]] common::HistogramCuts& Cuts() { return cuts_; }
-  common::HistogramCuts const& Cuts() const { return cuts_; }
+  [[nodiscard]] common::HistogramCuts const& Cuts() const { return cuts_; }
  /*! \return Estimation of memory cost of this page. */
  static size_t MemCostBytes(size_t num_rows, size_t row_stride, const common::HistogramCuts&cuts) ;
@ -256,4 +257,4 @@ inline size_t GetRowStride(DMatrix* dmat) {
 }
 }  // namespace xgboost
-#endif  // XGBOOST_DATA_ELLPACK_PAGE_H_
+#endif  // XGBOOST_DATA_ELLPACK_PAGE_CUH_
--- a/src/data/ellpack_page.h
+++ b/src/data/ellpack_page.h
@ -0,0 +1,59 @@
 /**
 * Copyright 2017-2023 by XGBoost Contributors
 */
 #ifndef XGBOOST_DATA_ELLPACK_PAGE_H_
 #define XGBOOST_DATA_ELLPACK_PAGE_H_
 #include <memory>  // for unique_ptr
 #include "../common/hist_util.h"  // for HistogramCuts
 #include "xgboost/context.h"      // for Context
 #include "xgboost/data.h"         // for DMatrix, BatchParam
 namespace xgboost {
 class EllpackPageImpl;
 /**
 * @brief A page stored in ELLPACK format.
 *
 * This class uses the PImpl idiom (https://en.cppreference.com/w/cpp/language/pimpl) to avoid
 * including CUDA-specific implementation details in the header.
 */
 class EllpackPage {
 public:
  /**
   * @brief Default constructor.
   *
   * This is used in the external memory case. An empty ELLPACK page is constructed with its content
   * set later by the reader.
   */
  EllpackPage();
  /**
   * @brief Constructor from an existing DMatrix.
   *
   * This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix
   * in CSR format.
   */
  explicit EllpackPage(Context const* ctx, DMatrix* dmat, const BatchParam& param);
  /*! \brief Destructor. */
  ~EllpackPage();
  EllpackPage(EllpackPage&& that);
  /*! \return Number of instances in the page. */
  [[nodiscard]] size_t Size() const;
  /*! \brief Set the base row id for this page. */
  void SetBaseRowId(std::size_t row_id);
  [[nodiscard]] const EllpackPageImpl* Impl() const { return impl_.get(); }
  EllpackPageImpl* Impl() { return impl_.get(); }
  [[nodiscard]] common::HistogramCuts& Cuts();
  [[nodiscard]] common::HistogramCuts const& Cuts() const;
 private:
  std::unique_ptr<EllpackPageImpl> impl_;
 };
 }  // namespace xgboost
 #endif  // XGBOOST_DATA_ELLPACK_PAGE_H_
--- a/src/data/ellpack_page_source.cu
+++ b/src/data/ellpack_page_source.cu
@ -5,10 +5,10 @@
 #include <utility>
 #include "ellpack_page.cuh"
 #include "ellpack_page.h"  // for EllpackPage
 #include "ellpack_page_source.h"
-namespace xgboost {
+namespace xgboost::data {
 namespace data {
 void EllpackPageSource::Fetch() {
  dh::safe_cuda(cudaSetDevice(device_));
  if (!this->ReadCache()) {
@ -27,5 +27,4 @@ void EllpackPageSource::Fetch() {
    this->WriteCache();
  }
 }
-}  // namespace data
+}  // namespace xgboost::data
 }  // namespace xgboost
--- a/src/data/ellpack_page_source.h
+++ b/src/data/ellpack_page_source.h
@ -6,17 +6,17 @@
 #define XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_
 #include <xgboost/data.h>
 #include <memory>
 #include <string>
 #include <utility>
 #include "../common/common.h"
 #include "../common/hist_util.h"
 #include "ellpack_page.h"  // for EllpackPage
 #include "sparse_page_source.h"
-namespace xgboost {
+namespace xgboost::data {
 namespace data {
 class EllpackPageSource : public PageSourceIncMixIn<EllpackPage> {
  bool is_dense_;
  size_t row_stride_;
@ -53,7 +53,6 @@ inline void EllpackPageSource::Fetch() {
  common::AssertGPUSupport();
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
-}  // namespace data
+}  // namespace xgboost::data
 }  // namespace xgboost
 #endif  // XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_
--- a/src/data/gradient_index.h
+++ b/src/data/gradient_index.h
@ -245,6 +245,9 @@ class GHistIndexMatrix {
                                std::vector<float> const& values, std::vector<float> const& mins,
                                bst_row_t ridx, bst_feature_t fidx, bool is_cat) const;
  [[nodiscard]] common::HistogramCuts& Cuts() { return cut; }
  [[nodiscard]] common::HistogramCuts const& Cuts() const { return cut; }
 private:
  std::unique_ptr<common::ColumnMatrix> columns_;
  std::vector<size_t> hit_count_tloc_;
--- a/src/data/simple_dmatrix.cc
+++ b/src/data/simple_dmatrix.cc
@ -16,7 +16,8 @@
 #include "../common/threading_utils.h"
 #include "./simple_batch_iterator.h"
 #include "adapter.h"
-#include "batch_utils.h"  // for CheckEmpty, RegenGHist
+#include "batch_utils.h"   // for CheckEmpty, RegenGHist
 #include "ellpack_page.h"  // for EllpackPage
 #include "gradient_index.h"
 #include "xgboost/c_api.h"
 #include "xgboost/data.h"
--- a/src/data/sparse_page_dmatrix.cc
+++ b/src/data/sparse_page_dmatrix.cc
@ -165,7 +165,10 @@ BatchSet<SortedCSCPage> SparsePageDMatrix::GetSortedColumnBatches(Context const
 BatchSet<GHistIndexMatrix> SparsePageDMatrix::GetGradientIndex(Context const *ctx,
                                                               const BatchParam &param) {
-  CHECK_GE(param.max_bin, 2);
+  if (param.Initialized()) {
    CHECK_GE(param.max_bin, 2);
  }
  detail::CheckEmpty(batch_param_, param);
  auto id = MakeCache(this, ".gradient_index.page", cache_prefix_, &cache_info_);
  this->InitializeSparsePage(ctx);
  if (!cache_info_.at(id)->written || detail::RegenGHist(batch_param_, param)) {
--- a/src/data/sparse_page_dmatrix.cu
+++ b/src/data/sparse_page_dmatrix.cu
@ -1,6 +1,8 @@
 /**
 * Copyright 2021-2023 by XGBoost contributors
 */
 #include <memory>
 #include "../common/hist_util.cuh"
 #include "batch_utils.h"  // for CheckEmpty, RegenGHist
 #include "ellpack_page.cuh"
@ -11,7 +13,9 @@ namespace xgboost::data {
 BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(Context const* ctx,
                                                           const BatchParam& param) {
  CHECK(ctx->IsCUDA());
-  CHECK_GE(param.max_bin, 2);
+  if (param.Initialized()) {
    CHECK_GE(param.max_bin, 2);
  }
  detail::CheckEmpty(batch_param_, param);
  auto id = MakeCache(this, ".ellpack.page", cache_prefix_, &cache_info_);
  size_t row_stride = 0;
@ -21,8 +25,8 @@ BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(Context const* ctx,
    cache_info_.erase(id);
    MakeCache(this, ".ellpack.page", cache_prefix_, &cache_info_);
    std::unique_ptr<common::HistogramCuts> cuts;
-    cuts.reset(
+    cuts = std::make_unique<common::HistogramCuts>(
-        new common::HistogramCuts{common::DeviceSketch(ctx->gpu_id, this, param.max_bin, 0)});
+        common::DeviceSketch(ctx->gpu_id, this, param.max_bin, 0));
    this->InitializeSparsePage(ctx);  // reset after use.
    row_stride = GetRowStride(this);
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@ -21,6 +21,7 @@
 #include "../common/io.h"
 #include "../common/timer.h"
 #include "../data/ellpack_page.cuh"
 #include "../data/ellpack_page.h"
 #include "constraints.cuh"
 #include "driver.h"
 #include "gpu_hist/evaluate_splits.cuh"
--- a/tests/cpp/c_api/test_c_api.cc
+++ b/tests/cpp/c_api/test_c_api.cc
@ -8,6 +8,7 @@
 #include <xgboost/learner.h>
 #include <xgboost/version_config.h>
 #include <array>    // for array
 #include <cstddef>  // std::size_t
 #include <limits>   // std::numeric_limits
 #include <string>   // std::string
@ -15,6 +16,11 @@
 #include "../../../src/c_api/c_api_error.h"
 #include "../../../src/common/io.h"
 #include "../../../src/data/adapter.h"              // for ArrayAdapter
 #include "../../../src/data/array_interface.h"      // for ArrayInterface
 #include "../../../src/data/gradient_index.h"       // for GHistIndexMatrix
 #include "../../../src/data/iterative_dmatrix.h"    // for IterativeDMatrix
 #include "../../../src/data/sparse_page_dmatrix.h"  // for SparsePageDMatrix
 #include "../helpers.h"
 TEST(CAPI, XGDMatrixCreateFromMatDT) {
@ -137,9 +143,9 @@ TEST(CAPI, ConfigIO) {
  BoosterHandle handle = learner.get();
  learner->UpdateOneIter(0, p_dmat);
-  char const* out[1];
+  std::array<char const* , 1> out;
  bst_ulong len {0};
-  XGBoosterSaveJsonConfig(handle, &len, out);
+  XGBoosterSaveJsonConfig(handle, &len, out.data());
  std::string config_str_0 { out[0] };
  auto config_0 = Json::Load({config_str_0.c_str(), config_str_0.size()});
@ -147,7 +153,7 @@ TEST(CAPI, ConfigIO) {
  bst_ulong len_1 {0};
  std::string config_str_1 { out[0] };
-  XGBoosterSaveJsonConfig(handle, &len_1, out);
+  XGBoosterSaveJsonConfig(handle, &len_1, out.data());
  auto config_1 = Json::Load({config_str_1.c_str(), config_str_1.size()});
  ASSERT_EQ(config_0, config_1);
@ -266,9 +272,9 @@ TEST(CAPI, DMatrixSetFeatureName) {
    ASSERT_EQ(std::to_string(i), c_out_features[i]);
  }
-  char const* feat_types [] {"i", "q"};
+  std::array<char const *, 2> feat_types{"i", "q"};
  static_assert(sizeof(feat_types) / sizeof(feat_types[0]) == kCols);
-  XGDMatrixSetStrFeatureInfo(handle, "feature_type", feat_types, kCols);
+  XGDMatrixSetStrFeatureInfo(handle, "feature_type", feat_types.data(), kCols);
  char const **c_out_types;
  XGDMatrixGetStrFeatureInfo(handle, u8"feature_type", &out_len,
                             &c_out_types);
@ -405,4 +411,210 @@ TEST(CAPI, JArgs) {
    ASSERT_THROW({ RequiredArg<String>(args, "null", __func__); }, dmlc::Error);
  }
 }
 namespace {
 void MakeLabelForTest(std::shared_ptr<DMatrix> Xy, DMatrixHandle cxy) {
  auto n_samples = Xy->Info().num_row_;
  std::vector<float> y(n_samples);
  for (std::size_t i = 0; i < y.size(); ++i) {
    y[i] = static_cast<float>(i);
  }
  Xy->Info().labels.Reshape(n_samples);
  Xy->Info().labels.Data()->HostVector() = y;
  auto y_int = GetArrayInterface(Xy->Info().labels.Data(), n_samples, 1);
  std::string s_y_int;
  Json::Dump(y_int, &s_y_int);
  XGDMatrixSetInfoFromInterface(cxy, "label", s_y_int.c_str());
 }
 auto MakeSimpleDMatrixForTest(bst_row_t n_samples, bst_feature_t n_features, Json dconfig) {
  HostDeviceVector<float> storage;
  auto arr_int = RandomDataGenerator{n_samples, n_features, 0.5f}.GenerateArrayInterface(&storage);
  data::ArrayAdapter adapter{StringView{arr_int}};
  std::shared_ptr<DMatrix> Xy{
      DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), Context{}.Threads())};
  DMatrixHandle p_fmat;
  std::string s_dconfig;
  Json::Dump(dconfig, &s_dconfig);
  CHECK_EQ(XGDMatrixCreateFromDense(arr_int.c_str(), s_dconfig.c_str(), &p_fmat), 0);
  MakeLabelForTest(Xy, p_fmat);
  return std::pair{p_fmat, Xy};
 }
 auto MakeQDMForTest(Context const *ctx, bst_row_t n_samples, bst_feature_t n_features,
                    Json dconfig) {
  bst_bin_t n_bins{16};
  dconfig["max_bin"] = Integer{n_bins};
  std::size_t n_batches{4};
  std::unique_ptr<ArrayIterForTest> iter_0;
  if (ctx->IsCUDA()) {
    iter_0 = std::make_unique<CudaArrayIterForTest>(0.0f, n_samples, n_features, n_batches);
  } else {
    iter_0 = std::make_unique<NumpyArrayIterForTest>(0.0f, n_samples, n_features, n_batches);
  }
  std::string s_dconfig;
  Json::Dump(dconfig, &s_dconfig);
  DMatrixHandle p_fmat;
  CHECK_EQ(XGQuantileDMatrixCreateFromCallback(static_cast<DataIterHandle>(iter_0.get()),
                                               iter_0->Proxy(), nullptr, Reset, Next,
                                               s_dconfig.c_str(), &p_fmat),
           0);
  std::unique_ptr<ArrayIterForTest> iter_1;
  if (ctx->IsCUDA()) {
    iter_1 = std::make_unique<CudaArrayIterForTest>(0.0f, n_samples, n_features, n_batches);
  } else {
    iter_1 = std::make_unique<NumpyArrayIterForTest>(0.0f, n_samples, n_features, n_batches);
  }
  auto Xy =
      std::make_shared<data::IterativeDMatrix>(iter_1.get(), iter_1->Proxy(), nullptr, Reset, Next,
                                               std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
  return std::pair{p_fmat, Xy};
 }
 auto MakeExtMemForTest(bst_row_t n_samples, bst_feature_t n_features, Json dconfig) {
  std::size_t n_batches{4};
  NumpyArrayIterForTest iter_0{0.0f, n_samples, n_features, n_batches};
  std::string s_dconfig;
  dconfig["cache_prefix"] = String{"cache"};
  Json::Dump(dconfig, &s_dconfig);
  DMatrixHandle p_fmat;
  CHECK_EQ(XGDMatrixCreateFromCallback(static_cast<DataIterHandle>(&iter_0), iter_0.Proxy(), Reset,
                                       Next, s_dconfig.c_str(), &p_fmat),
           0);
  NumpyArrayIterForTest iter_1{0.0f, n_samples, n_features, n_batches};
  auto Xy = std::make_shared<data::SparsePageDMatrix>(
      &iter_1, iter_1.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, "");
  MakeLabelForTest(Xy, p_fmat);
  return std::pair{p_fmat, Xy};
 }
 template <typename Page>
 void CheckResult(Context const *ctx, bst_feature_t n_features, std::shared_ptr<DMatrix> Xy,
                 float const *out_data, std::uint64_t const *out_indptr) {
  for (auto const &page : Xy->GetBatches<Page>(ctx, BatchParam{16, 0.2})) {
    auto const &cut = page.Cuts();
    auto const &ptrs = cut.Ptrs();
    auto const &vals = cut.Values();
    auto const &mins = cut.MinValues();
    for (bst_feature_t f = 0; f < Xy->Info().num_col_; ++f) {
      ASSERT_EQ(ptrs[f] + f, out_indptr[f]);
      ASSERT_EQ(mins[f], out_data[out_indptr[f]]);
      auto beg = out_indptr[f];
      auto end = out_indptr[f + 1];
      auto val_beg = ptrs[f];
      for (std::uint64_t i = beg + 1, j = val_beg; i < end; ++i, ++j) {
        ASSERT_EQ(vals[j], out_data[i]);
      }
    }
    ASSERT_EQ(ptrs[n_features] + n_features, out_indptr[n_features]);
  }
 }
 void TestXGDMatrixGetQuantileCut(Context const *ctx) {
  bst_row_t n_samples{1024};
  bst_feature_t n_features{16};
  Json dconfig{Object{}};
  dconfig["ntread"] = Integer{Context{}.Threads()};
  dconfig["missing"] = Number{std::numeric_limits<float>::quiet_NaN()};
  auto check_result = [n_features, &ctx](std::shared_ptr<DMatrix> Xy, StringView s_out_data,
                                         StringView s_out_indptr) {
    auto i_out_data = ArrayInterface<1, false>{s_out_data};
    ASSERT_EQ(i_out_data.type, ArrayInterfaceHandler::kF4);
    auto out_data = static_cast<float const *>(i_out_data.data);
    ASSERT_TRUE(out_data);
    auto i_out_indptr = ArrayInterface<1, false>{s_out_indptr};
    ASSERT_EQ(i_out_indptr.type, ArrayInterfaceHandler::kU8);
    auto out_indptr = static_cast<std::uint64_t const *>(i_out_indptr.data);
    ASSERT_TRUE(out_data);
    if (ctx->IsCPU()) {
      CheckResult<GHistIndexMatrix>(ctx, n_features, Xy, out_data, out_indptr);
    } else {
      CheckResult<EllpackPage>(ctx, n_features, Xy, out_data, out_indptr);
    }
  };
  Json config{Null{}};
  std::string s_config;
  Json::Dump(config, &s_config);
  char const *out_indptr;
  char const *out_data;
  {
    // SimpleDMatrix
    auto [p_fmat, Xy] = MakeSimpleDMatrixForTest(n_samples, n_features, dconfig);
    // assert fail, we don't have the quantile yet.
    ASSERT_EQ(XGDMatrixGetQuantileCut(p_fmat, s_config.c_str(), &out_indptr, &out_data), -1);
    std::array<DMatrixHandle, 1> mats{p_fmat};
    BoosterHandle booster;
    ASSERT_EQ(XGBoosterCreate(mats.data(), 1, &booster), 0);
    ASSERT_EQ(XGBoosterSetParam(booster, "max_bin", "16"), 0);
    if (ctx->IsCUDA()) {
      ASSERT_EQ(XGBoosterSetParam(booster, "tree_method", "gpu_hist"), 0);
    }
    ASSERT_EQ(XGBoosterUpdateOneIter(booster, 0, p_fmat), 0);
    ASSERT_EQ(XGDMatrixGetQuantileCut(p_fmat, s_config.c_str(), &out_indptr, &out_data), 0);
    check_result(Xy, out_data, out_indptr);
    XGDMatrixFree(p_fmat);
    XGBoosterFree(booster);
  }
  {
    // IterativeDMatrix
    auto [p_fmat, Xy] = MakeQDMForTest(ctx, n_samples, n_features, dconfig);
    ASSERT_EQ(XGDMatrixGetQuantileCut(p_fmat, s_config.c_str(), &out_indptr, &out_data), 0);
    check_result(Xy, out_data, out_indptr);
    XGDMatrixFree(p_fmat);
  }
  {
    // SparsePageDMatrix
    auto [p_fmat, Xy] = MakeExtMemForTest(n_samples, n_features, dconfig);
    // assert fail, we don't have the quantile yet.
    ASSERT_EQ(XGDMatrixGetQuantileCut(p_fmat, s_config.c_str(), &out_indptr, &out_data), -1);
    std::array<DMatrixHandle, 1> mats{p_fmat};
    BoosterHandle booster;
    ASSERT_EQ(XGBoosterCreate(mats.data(), 1, &booster), 0);
    ASSERT_EQ(XGBoosterSetParam(booster, "max_bin", "16"), 0);
    if (ctx->IsCUDA()) {
      ASSERT_EQ(XGBoosterSetParam(booster, "tree_method", "gpu_hist"), 0);
    }
    ASSERT_EQ(XGBoosterUpdateOneIter(booster, 0, p_fmat), 0);
    ASSERT_EQ(XGDMatrixGetQuantileCut(p_fmat, s_config.c_str(), &out_indptr, &out_data), 0);
    XGDMatrixFree(p_fmat);
    XGBoosterFree(booster);
  }
 }
 }  // namespace
 TEST(CAPI, XGDMatrixGetQuantileCut) {
  Context ctx;
  TestXGDMatrixGetQuantileCut(&ctx);
 }
 #if defined(XGBOOST_USE_CUDA)
 TEST(CAPI, GPUXGDMatrixGetQuantileCut) {
  auto ctx = MakeCUDACtx(0);
  TestXGDMatrixGetQuantileCut(&ctx);
 }
 #endif  // defined(XGBOOST_USE_CUDA)
 }  // namespace xgboost
--- a/tests/cpp/data/test_ellpack_page.cu
+++ b/tests/cpp/data/test_ellpack_page.cu
@ -8,6 +8,7 @@
 #include "../../../src/common/categorical.h"
 #include "../../../src/common/hist_util.h"
 #include "../../../src/data/ellpack_page.cuh"
 #include "../../../src/data/ellpack_page.h"
 #include "../../../src/tree/param.h"  // TrainParam
 #include "../helpers.h"
 #include "../histogram_helpers.h"
--- a/tests/cpp/data/test_iterative_dmatrix.cu
+++ b/tests/cpp/data/test_iterative_dmatrix.cu
@ -5,6 +5,7 @@
 #include "../../../src/data/device_adapter.cuh"
 #include "../../../src/data/ellpack_page.cuh"
 #include "../../../src/data/ellpack_page.h"
 #include "../../../src/data/iterative_dmatrix.h"
 #include "../../../src/tree/param.h"  // TrainParam
 #include "../helpers.h"
--- a/tests/cpp/data/test_sparse_page_dmatrix.cu
+++ b/tests/cpp/data/test_sparse_page_dmatrix.cu
@ -5,6 +5,7 @@
 #include "../../../src/common/compressed_iterator.h"
 #include "../../../src/data/ellpack_page.cuh"
 #include "../../../src/data/ellpack_page.h"
 #include "../../../src/data/sparse_page_dmatrix.h"
 #include "../../../src/tree/param.h"  // TrainParam
 #include "../filesystem.h"            // dmlc::TemporaryDirectory
--- a/tests/cpp/helpers.cc
+++ b/tests/cpp/helpers.cc
@ -472,6 +472,18 @@ std::shared_ptr<DMatrix> RandomDataGenerator::GenerateQuantileDMatrix(bool with_
  return m;
 }
 #if !defined(XGBOOST_USE_CUDA)
 CudaArrayIterForTest::CudaArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches)
    : ArrayIterForTest{sparsity, rows, cols, batches} {
  common::AssertGPUSupport();
 }
 int CudaArrayIterForTest::Next() {
  common::AssertGPUSupport();
  return 0;
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
 NumpyArrayIterForTest::NumpyArrayIterForTest(float sparsity, size_t rows, size_t cols,
                                             size_t batches)
    : ArrayIterForTest{sparsity, rows, cols, batches} {
@ -650,7 +662,7 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
 ArrayIterForTest::ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches)
    : rows_{rows}, cols_{cols}, n_batches_{batches} {
  XGProxyDMatrixCreate(&proxy_);
-  rng_.reset(new RandomDataGenerator{rows_, cols_, sparsity});
+  rng_ = std::make_unique<RandomDataGenerator>(rows_, cols_, sparsity);
  std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
 }
--- a/tests/cpp/tree/test_gpu_hist.cu
+++ b/tests/cpp/tree/test_gpu_hist.cu
@ -11,6 +11,8 @@
 #include <vector>
 #include "../../../src/common/common.h"
 #include "../../../src/data/ellpack_page.cuh"  // for EllpackPageImpl
 #include "../../../src/data/ellpack_page.h"    // for EllpackPage
 #include "../../../src/data/sparse_page_source.h"
 #include "../../../src/tree/constraints.cuh"
 #include "../../../src/tree/param.h"  // for TrainParam
--- a/tests/python-gpu/test_from_cupy.py
+++ b/tests/python-gpu/test_from_cupy.py
@ -1,3 +1,4 @@
 import json
 import sys
 import numpy as np
@ -10,6 +11,16 @@ from test_dmatrix import set_base_margin_info
 from xgboost import testing as tm
 cupy = pytest.importorskip("cupy")
 def test_array_interface() -> None:
    arr = cupy.array([[1, 2, 3, 4], [1, 2, 3, 4]])
    i_arr = arr.__cuda_array_interface__
    i_arr = json.loads(json.dumps(i_arr))
    ret = xgb.core.from_array_interface(i_arr)
    np.testing.assert_equal(cupy.asnumpy(arr), cupy.asnumpy(ret))
 def dmatrix_from_cupy(input_type, DMatrixT, missing=np.NAN):
    '''Test constructing DMatrix from cupy'''
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@ -8,7 +8,11 @@ from hypothesis import assume, given, note, settings, strategies
 import xgboost as xgb
 from xgboost import testing as tm
 from xgboost.testing.params import cat_parameter_strategy, hist_parameter_strategy
-from xgboost.testing.updater import check_init_estimation, check_quantile_loss
+from xgboost.testing.updater import (
    check_get_quantile_cut,
    check_init_estimation,
    check_quantile_loss,
 )
 sys.path.append("tests/python")
 import test_updaters as test_up
@ -264,3 +268,7 @@ class TestGPUUpdaters:
            },
            num_boost_round=150,
        )
    @pytest.mark.skipif(**tm.no_cudf())
    def test_get_quantile_cut(self) -> None:
        check_get_quantile_cut("gpu_hist")
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@ -14,7 +14,11 @@ from xgboost.testing.params import (
    hist_multi_parameter_strategy,
    hist_parameter_strategy,
 )
-from xgboost.testing.updater import check_init_estimation, check_quantile_loss
+from xgboost.testing.updater import (
    check_get_quantile_cut,
    check_init_estimation,
    check_quantile_loss,
 )
 def train_result(param, dmat, num_rounds):
@ -537,3 +541,8 @@ class TestTreeMethod:
    @pytest.mark.parametrize("weighted", [True, False])
    def test_quantile_loss(self, weighted: bool) -> None:
        check_quantile_loss("hist", weighted)
    @pytest.mark.skipif(**tm.no_pandas())
    @pytest.mark.parametrize("tree_method", ["hist"])
    def test_get_quantile_cut(self, tree_method: str) -> None:
        check_get_quantile_cut(tree_method)