merge latest, Jan 12 2024

2024-01-12 09:57:11 -08:00
parent c42c7d99f1 73b3955dd4
commit 1e1e8be3a5
251 changed files with 9023 additions and 5012 deletions
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -364,49 +364,57 @@ XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle *out) {
  API_END();
 }

-XGB_DLL int
-XGProxyDMatrixSetDataCudaArrayInterface(DMatrixHandle handle,
-                                        char const *c_interface_str) {
+XGB_DLL int XGProxyDMatrixSetDataCudaArrayInterface(DMatrixHandle handle,
+                                                    char const *c_interface_str) {
  API_BEGIN();
  CHECK_HANDLE();
  xgboost_CHECK_C_ARG_PTR(c_interface_str);
  auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
  CHECK(p_m);
-  auto m =   static_cast<xgboost::data::DMatrixProxy*>(p_m->get());
+  auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());
  CHECK(m) << "Current DMatrix type does not support set data.";
  m->SetCUDAArray(c_interface_str);
  API_END();
 }

-XGB_DLL int XGProxyDMatrixSetDataCudaColumnar(DMatrixHandle handle,
-                                              char const *c_interface_str) {
+XGB_DLL int XGProxyDMatrixSetDataCudaColumnar(DMatrixHandle handle, char const *c_interface_str) {
  API_BEGIN();
  CHECK_HANDLE();
  xgboost_CHECK_C_ARG_PTR(c_interface_str);
  auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
  CHECK(p_m);
-  auto m =   static_cast<xgboost::data::DMatrixProxy*>(p_m->get());
+  auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());
  CHECK(m) << "Current DMatrix type does not support set data.";
  m->SetCUDAArray(c_interface_str);
  API_END();
 }

-XGB_DLL int XGProxyDMatrixSetDataDense(DMatrixHandle handle,
-                                       char const *c_interface_str) {
+XGB_DLL int XGProxyDMatrixSetDataColumnar(DMatrixHandle handle, char const *c_interface_str) {
  API_BEGIN();
  CHECK_HANDLE();
  xgboost_CHECK_C_ARG_PTR(c_interface_str);
  auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
  CHECK(p_m);
-  auto m =   static_cast<xgboost::data::DMatrixProxy*>(p_m->get());
+  auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());
+  CHECK(m) << "Current DMatrix type does not support set data.";
+  m->SetColumnarData(c_interface_str);
+  API_END();
+}
+
+XGB_DLL int XGProxyDMatrixSetDataDense(DMatrixHandle handle, char const *c_interface_str) {
+  API_BEGIN();
+  CHECK_HANDLE();
+  xgboost_CHECK_C_ARG_PTR(c_interface_str);
+  auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
+  CHECK(p_m);
+  auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());
  CHECK(m) << "Current DMatrix type does not support set data.";
  m->SetArrayData(c_interface_str);
  API_END();
 }

-XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr,
-                                     char const *indices, char const *data,
-                                     xgboost::bst_ulong ncol) {
+XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr, char const *indices,
+                                     char const *data, xgboost::bst_ulong ncol) {
  API_BEGIN();
  CHECK_HANDLE();
  xgboost_CHECK_C_ARG_PTR(indptr);
@@ -414,7 +422,7 @@ XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr,
  xgboost_CHECK_C_ARG_PTR(data);
  auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
  CHECK(p_m);
-  auto m =   static_cast<xgboost::data::DMatrixProxy*>(p_m->get());
+  auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());
  CHECK(m) << "Current DMatrix type does not support set data.";
  m->SetCSRData(indptr, indices, data, ncol, true);
  API_END();
@@ -432,6 +440,25 @@ XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indic
  API_END();
 }

+XGB_DLL int XGDMatrixCreateFromColumnar(char const *data, char const *c_json_config,
+                                        DMatrixHandle *out) {
+  API_BEGIN();
+  xgboost_CHECK_C_ARG_PTR(c_json_config);
+  xgboost_CHECK_C_ARG_PTR(data);
+
+  auto config = Json::Load(c_json_config);
+  float missing = GetMissing(config);
+  auto n_threads = OptionalArg<Integer, std::int64_t>(config, "nthread", 0);
+  auto data_split_mode =
+      static_cast<DataSplitMode>(OptionalArg<Integer, int64_t>(config, "data_split_mode", 0));
+
+  data::ColumnarAdapter adapter{data};
+  *out = new std::shared_ptr<DMatrix>(
+      DMatrix::Create(&adapter, missing, n_threads, "", data_split_mode));
+
+  API_END();
+}
+
 XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char const *data,
                                   xgboost::bst_ulong ncol, char const *c_json_config,
                                   DMatrixHandle *out) {
@@ -1199,6 +1226,27 @@ XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, char const *array_in
  API_END();
 }

+XGB_DLL int XGBoosterPredictFromColumnar(BoosterHandle handle, char const *array_interface,
+                                         char const *c_json_config, DMatrixHandle m,
+                                         xgboost::bst_ulong const **out_shape,
+                                         xgboost::bst_ulong *out_dim, const float **out_result) {
+  API_BEGIN();
+  CHECK_HANDLE();
+  std::shared_ptr<DMatrix> p_m{nullptr};
+  if (!m) {
+    p_m.reset(new data::DMatrixProxy);
+  } else {
+    p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
+  }
+  auto proxy = dynamic_cast<data::DMatrixProxy *>(p_m.get());
+  CHECK(proxy) << "Invalid input type for inplace predict.";
+  xgboost_CHECK_C_ARG_PTR(array_interface);
+  proxy->SetColumnarData(array_interface);
+  auto *learner = static_cast<xgboost::Learner *>(handle);
+  InplacePredictImpl(p_m, c_json_config, learner, out_shape, out_dim, out_result);
+  API_END();
+}
+
 XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, char const *indices,
                                    char const *data, xgboost::bst_ulong cols,
                                    char const *c_json_config, DMatrixHandle m,
@@ -1268,10 +1316,8 @@ XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char* fname) {

 namespace {
 void WarnOldModel() {
-  if (XGBOOST_VER_MAJOR >= 2) {
-    LOG(WARNING) << "Saving into deprecated binary model format, please consider using `json` or "
-                    "`ubj`. Model format will default to JSON in XGBoost 2.2 if not specified.";
-  }
+  LOG(WARNING) << "Saving into deprecated binary model format, please consider using `json` or "
+                  "`ubj`. Model format is default to UBJSON in XGBoost 2.1 if not specified.";
 }
 }  // anonymous namespace

@@ -1294,14 +1340,14 @@ XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char *fname) {
    save_json(std::ios::out);
  } else if (common::FileExtension(fname) == "ubj") {
    save_json(std::ios::binary);
-  } else if (XGBOOST_VER_MAJOR == 2 && XGBOOST_VER_MINOR >= 2) {
-    LOG(WARNING) << "Saving model to JSON as default.  You can use file extension `json`, `ubj` or "
-                    "`deprecated` to choose between formats.";
-    save_json(std::ios::out);
-  } else {
+  } else if (common::FileExtension(fname) == "deprecated") {
    WarnOldModel();
    auto *bst = static_cast<Learner *>(handle);
    bst->SaveModel(fo.get());
+  } else {
+    LOG(WARNING) << "Saving model in the UBJSON format as default.  You can use file extension:"
+                    " `json`, `ubj` or `deprecated` to choose between formats.";
+    save_json(std::ios::binary);
  }
  API_END();
 }
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -73,8 +73,20 @@ inline std::vector<std::string> Split(const std::string& s, char delim) {
  return ret;
 }

+/**
+ * @brief Add escapes for a UTF-8 string.
+ */
 void EscapeU8(std::string const &string, std::string *p_buffer);

+/**
+ * @brief Add escapes for a UTF-8 string with newly created buffer as return.
+ */
+inline std::string EscapeU8(std::string const &str) {
+  std::string buffer;
+  EscapeU8(str, &buffer);
+  return buffer;
+}
+
 template <typename T>
 XGBOOST_DEVICE T Max(T a, T b) {
  return a < b ? b : a;
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@@ -1099,6 +1099,8 @@ inline void CUDAEvent::Record(CUDAStreamView stream) {  // NOLINT
  dh::safe_cuda(cudaEventRecord(event_, cudaStream_t{stream}));
 }

+// Changing this has effect on prediction return, where we need to pass the pointer to
+// third-party libraries like cuPy
 inline CUDAStreamView DefaultStream() {
 #ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM
  return CUDAStreamView{cudaStreamPerThread};
--- a/src/common/error_msg.h
+++ b/src/common/error_msg.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023 by XGBoost contributors
+ * Copyright 2023-2024, XGBoost contributors
 *
 * \brief Common error message for various checks.
 */
@@ -99,5 +99,9 @@ constexpr StringView InvalidCUDAOrdinal() {
 void MismatchedDevices(Context const* booster, Context const* data);

 inline auto NoFederated() { return "XGBoost is not compiled with federated learning support."; }
+
+inline auto NoCategorical(std::string name) {
+  return name + " doesn't support categorical features.";
+}
 }  // namespace xgboost::error
 #endif  // XGBOOST_COMMON_ERROR_MSG_H_
--- a/src/common/linalg_op.cuh
+++ b/src/common/linalg_op.cuh
@@ -1,32 +1,48 @@
-/*!
- * Copyright 2021-2022 by XGBoost Contributors
+/**
+ * Copyright 2021-2023, XGBoost Contributors
 */
 #ifndef XGBOOST_COMMON_LINALG_OP_CUH_
 #define XGBOOST_COMMON_LINALG_OP_CUH_

-#include "device_helpers.cuh"
+#include <cstdint>  // for int32_t
+#include <cstdlib>  // for size_t
+#include <tuple>    // for apply
+
+#include "device_helpers.cuh"  // for LaunchN
 #include "linalg_op.h"
-#include "xgboost/context.h"
-#include "xgboost/linalg.h"
+#include "xgboost/context.h"  // for Context
+#include "xgboost/linalg.h"   // for TensorView

 namespace xgboost {
 namespace linalg {
-template <typename T, int32_t D, typename Fn>
-void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr)
-{
-  dh::safe_cuda(cudaSetDevice(t.Device().ordinal));
-  static_assert(std::is_void<std::result_of_t<Fn(size_t, T&)>>::value,
-                "For function with return, use transform instead.");
-  if (t.Contiguous()) {
-    auto ptr = t.Values().data();
-    dh::LaunchN(t.Size(), s, [=] __device__(size_t i) mutable { fn(i, ptr[i]); });
-  } else {
-    dh::LaunchN(t.Size(), s, [=] __device__(size_t i) mutable {
-      T& v = detail::Apply(t, linalg::UnravelIndex(i, t.Shape()));
-      fn(i, v);
+namespace cuda_impl {
+// Use template specialization to dispatch, Windows + CUDA 11.8 doesn't support extended
+// lambda inside constexpr if
+template <typename T, std::int32_t D>
+struct ElementWiseImpl {
+  template <typename Fn>
+  void operator()(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s) {
+    static_assert(D > 1);
+    dh::LaunchN(t.Size(), s, [=] __device__(std::size_t i) mutable {
+      std::apply(fn, linalg::UnravelIndex(i, t.Shape()));
    });
  }
+};
+
+template <typename T>
+struct ElementWiseImpl<T, 1> {
+  template <typename Fn>
+  void operator()(linalg::TensorView<T, 1> t, Fn&& fn, cudaStream_t s) {
+    dh::LaunchN(t.Size(), s, [=] __device__(std::size_t i) { fn(i); });
+  }
+};
+
+template <typename T, std::int32_t D, typename Fn>
+void ElementWiseKernel(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
+  dh::safe_cuda(cudaSetDevice(t.Device().ordinal));
+  cuda_impl::ElementWiseImpl<T, D>{}(t, fn, s);
 }
+}  // namespace cuda_impl

 template <typename T, int32_t D, typename Fn>
 void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr)
@@ -44,7 +60,8 @@ void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_

 template <typename T, int32_t D, typename Fn>
 void ElementWiseKernel(Context const* ctx, linalg::TensorView<T, D> t, Fn&& fn) {
-  ctx->IsCUDA() ? ElementWiseKernelDevice(t, fn) : ElementWiseKernelHost(t, ctx->Threads(), fn);
+  ctx->IsCUDA() ? cuda_impl::ElementWiseKernel(t, fn)
+                : ElementWiseKernelHost(t, ctx->Threads(), fn);
 }
 }  // namespace linalg
 }  // namespace xgboost
--- a/src/common/linalg_op.h
+++ b/src/common/linalg_op.h
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2021-2022 by XGBoost Contributors
+/**
+ * Copyright 2021-2023, XGBoost Contributors
 */
 #ifndef XGBOOST_COMMON_LINALG_OP_H_
 #define XGBOOST_COMMON_LINALG_OP_H_
@@ -27,17 +27,23 @@ void ElementWiseTransformHost(linalg::TensorView<T, D> t, int32_t n_threads, Fn&
  }
 }

-template <typename T, int32_t D, typename Fn>
-void ElementWiseKernelHost(linalg::TensorView<T, D> t, int32_t n_threads, Fn&& fn) {
-  static_assert(std::is_void<std::result_of_t<Fn(size_t, T&)>>::value,
-                "For function with return, use transform instead.");
-  if (t.Contiguous()) {
-    auto ptr = t.Values().data();
-    common::ParallelFor(t.Size(), n_threads, [&](size_t i) { fn(i, ptr[i]); });
+template <typename T, std::int32_t D, typename Fn>
+void ElementWiseKernelHost(linalg::TensorView<T, D> t, std::int32_t n_threads, Fn &&fn) {
+  if constexpr (D == 1) {
+    common::ParallelFor(t.Size(), n_threads, [&](std::size_t i) { fn(i); });
+  } else if (D == 2 && t.CContiguous() && t.Shape(0) > t.Shape(1) * 64) {
+    // Heuristic. Tall, c-contiguous matrix,
+    auto n_rows = t.Shape(0);
+    auto n_columns = t.Shape(1);
+    common::ParallelFor(n_rows, n_threads, [&](std::size_t i) {
+      for (std::size_t j = 0; j < n_columns; ++j) {
+        fn(i, j);
+      }
+    });
  } else {
-    common::ParallelFor(t.Size(), n_threads, [&](size_t i) {
-      auto& v = detail::Apply(t, linalg::UnravelIndex(i, t.Shape()));
-      fn(i, v);
+    common::ParallelFor(t.Size(), n_threads, [&](std::size_t i) {
+      auto idx = linalg::UnravelIndex(i, t.Shape());
+      std::apply(fn, idx);
    });
  }
 }
--- a/src/common/quantile.cc
+++ b/src/common/quantile.cc
@@ -97,6 +97,7 @@ void HostSketchContainer::PushAdapterBatch(Batch const &batch, size_t base_rowid
  // the nnz from info is not reliable as sketching might be the first place to go through
  // the data.
  auto is_dense = info.num_nonzero_ == info.num_col_ * info.num_row_;
+  CHECK(!this->columns_size_.empty());
  this->PushRowPageImpl(batch, base_rowid, weights, info.num_nonzero_, info.num_col_, is_dense,
                        is_valid);
 }
@@ -110,6 +111,7 @@ INSTANTIATE(CSRArrayAdapterBatch)
 INSTANTIATE(CSCAdapterBatch)
 INSTANTIATE(DataTableAdapterBatch)
 INSTANTIATE(SparsePageAdapterBatch)
+INSTANTIATE(ColumnarAdapterBatch)

 namespace {
 /**
--- a/src/data/adapter.h
+++ b/src/data/adapter.h
@@ -25,9 +25,7 @@
 #include "xgboost/span.h"
 #include "xgboost/string_view.h"

-namespace xgboost {
-namespace data {
-
+namespace xgboost::data {
 /**  External data formats should implement an adapter as below. The
 * adapter provides a uniform access to data outside xgboost, allowing
 * construction of DMatrix objects from a range of sources without duplicating
@@ -279,9 +277,9 @@ class ArrayAdapterBatch : public detail::NoMetaInfo {
    return Line{array_interface_, idx};
  }

-  size_t NumRows() const { return array_interface_.Shape(0); }
-  size_t NumCols() const { return array_interface_.Shape(1); }
-  size_t Size() const { return this->NumRows(); }
+  [[nodiscard]] std::size_t NumRows() const { return array_interface_.Shape(0); }
+  [[nodiscard]] std::size_t NumCols() const { return array_interface_.Shape(1); }
+  [[nodiscard]] std::size_t Size() const { return this->NumRows(); }

  explicit ArrayAdapterBatch(ArrayInterface<2> array_interface)
      : array_interface_{std::move(array_interface)} {}
@@ -326,11 +324,11 @@ class CSRArrayAdapterBatch : public detail::NoMetaInfo {
        : indices_{std::move(indices)}, values_{std::move(values)}, ridx_{ridx},
          offset_{offset} {}

-    COOTuple GetElement(std::size_t idx) const {
+    [[nodiscard]] COOTuple GetElement(std::size_t idx) const {
      return {ridx_, TypedIndex<std::size_t, 1>{indices_}(offset_ + idx), values_(offset_ + idx)};
    }

-    size_t Size() const {
+    [[nodiscard]] std::size_t Size() const {
      return values_.Shape(0);
    }
  };
@@ -539,9 +537,11 @@ class CSCArrayAdapter : public detail::SingleBatchDataIter<CSCArrayAdapterBatch>
        batch_{CSCArrayAdapterBatch{indptr_, indices_, values_}} {}

  // JVM package sends 0 as unknown
-  size_t NumRows() const { return num_rows_ == 0 ? kAdapterUnknownSize : num_rows_; }
-  size_t NumColumns() const { return indptr_.n - 1; }
-  const CSCArrayAdapterBatch& Value() const override { return batch_; }
+  [[nodiscard]] std::size_t NumRows() const {
+    return num_rows_ == 0 ? kAdapterUnknownSize : num_rows_;
+  }
+  [[nodiscard]] std::size_t NumColumns() const { return indptr_.n - 1; }
+  [[nodiscard]] const CSCArrayAdapterBatch& Value() const override { return batch_; }
 };

 class DataTableAdapterBatch : public detail::NoMetaInfo {
@@ -634,15 +634,15 @@ class DataTableAdapterBatch : public detail::NoMetaInfo {
   public:
    Line(std::size_t ridx, void const* const* const data, std::vector<DTType> const& ft)
        : row_idx_{ridx}, data_{data}, feature_types_{ft} {}
-    std::size_t Size() const { return feature_types_.size(); }
-    COOTuple GetElement(std::size_t idx) const {
+    [[nodiscard]] std::size_t Size() const { return feature_types_.size(); }
+    [[nodiscard]] COOTuple GetElement(std::size_t idx) const {
      return COOTuple{row_idx_, idx, DTGetValue(data_[idx], feature_types_[idx], row_idx_)};
    }
  };

 public:
-  size_t Size() const { return num_rows_; }
-  const Line GetLine(std::size_t ridx) const { return {ridx, data_, feature_types_}; }
+  [[nodiscard]] size_t Size() const { return num_rows_; }
+  [[nodiscard]] const Line GetLine(std::size_t ridx) const { return {ridx, data_, feature_types_}; }
  static constexpr bool kIsRowMajor = true;

 private:
@@ -659,9 +659,9 @@ class DataTableAdapter : public detail::SingleBatchDataIter<DataTableAdapterBatc
      : batch_(data, feature_stypes, num_rows, num_features),
        num_rows_(num_rows),
        num_columns_(num_features) {}
-  const DataTableAdapterBatch& Value() const override { return batch_; }
-  std::size_t NumRows() const { return num_rows_; }
-  std::size_t NumColumns() const { return num_columns_; }
+  [[nodiscard]] const DataTableAdapterBatch& Value() const override { return batch_; }
+  [[nodiscard]] std::size_t NumRows() const { return num_rows_; }
+  [[nodiscard]] std::size_t NumColumns() const { return num_columns_; }

 private:
  DataTableAdapterBatch batch_;
@@ -669,6 +669,74 @@ class DataTableAdapter : public detail::SingleBatchDataIter<DataTableAdapterBatc
  std::size_t num_columns_;
 };

+class ColumnarAdapterBatch : public detail::NoMetaInfo {
+  common::Span<ArrayInterface<1, false>> columns_;
+
+  class Line {
+    common::Span<ArrayInterface<1, false>> const& columns_;
+    std::size_t ridx_;
+
+   public:
+    explicit Line(common::Span<ArrayInterface<1, false>> const& columns, std::size_t ridx)
+        : columns_{columns}, ridx_{ridx} {}
+    [[nodiscard]] std::size_t Size() const { return columns_.empty() ? 0 : columns_.size(); }
+
+    [[nodiscard]] COOTuple GetElement(std::size_t idx) const {
+      return {ridx_, idx, columns_[idx](ridx_)};
+    }
+  };
+
+ public:
+  ColumnarAdapterBatch() = default;
+  explicit ColumnarAdapterBatch(common::Span<ArrayInterface<1, false>> columns)
+      : columns_{columns} {}
+  [[nodiscard]] Line GetLine(std::size_t ridx) const { return Line{columns_, ridx}; }
+  [[nodiscard]] std::size_t Size() const {
+    return columns_.empty() ? 0 : columns_.front().Shape(0);
+  }
+  [[nodiscard]] std::size_t NumCols() const { return columns_.empty() ? 0 : columns_.size(); }
+  [[nodiscard]] std::size_t NumRows() const { return this->Size(); }
+
+  static constexpr bool kIsRowMajor = true;
+};
+
+class ColumnarAdapter : public detail::SingleBatchDataIter<ColumnarAdapterBatch> {
+  std::vector<ArrayInterface<1, false>> columns_;
+  ColumnarAdapterBatch batch_;
+
+ public:
+  explicit ColumnarAdapter(StringView columns) {
+    auto jarray = Json::Load(columns);
+    CHECK(IsA<Array>(jarray));
+    auto const& array = get<Array const>(jarray);
+    for (auto col : array) {
+      columns_.emplace_back(get<Object const>(col));
+    }
+    bool consistent =
+        columns_.empty() ||
+        std::all_of(columns_.cbegin(), columns_.cend(), [&](ArrayInterface<1, false> const& array) {
+          return array.Shape(0) == columns_[0].Shape(0);
+        });
+    CHECK(consistent) << "Size of columns should be the same.";
+    batch_ = ColumnarAdapterBatch{columns_};
+  }
+
+  [[nodiscard]] ColumnarAdapterBatch const& Value() const override { return batch_; }
+
+  [[nodiscard]] std::size_t NumRows() const {
+    if (!columns_.empty()) {
+      return columns_.front().shape[0];
+    }
+    return 0;
+  }
+  [[nodiscard]] std::size_t NumColumns() const {
+    if (!columns_.empty()) {
+      return columns_.size();
+    }
+    return 0;
+  }
+};
+
 class FileAdapterBatch {
 public:
  class Line {
@@ -851,6 +919,5 @@ class SparsePageAdapterBatch {
  Line GetLine(size_t ridx) const { return Line{page_[ridx].data(), page_[ridx].size(), ridx}; }
  size_t Size() const { return page_.Size(); }
 };
-};  // namespace data
-}  // namespace xgboost
+}  // namespace xgboost::data
 #endif  // XGBOOST_DATA_ADAPTER_H_
--- a/src/data/array_interface.cc
+++ b/src/data/array_interface.cc
@@ -0,0 +1,13 @@
+/**
+ * Copyright 2019-2024, XGBoost Contributors
+ */
+#include "array_interface.h"
+
+#include "../common/common.h"  // for AssertGPUSupport
+
+namespace xgboost {
+#if !defined(XGBOOST_USE_CUDA)
+void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); }
+bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; }
+#endif  // !defined(XGBOOST_USE_CUDA)
+}  // namespace xgboost
--- a/src/data/array_interface.h
+++ b/src/data/array_interface.h
@@ -377,11 +377,6 @@ struct ToDType<int64_t> {
  static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kI8;
 };

-#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
-inline void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); }
-inline bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; }
-#endif  // !defined(XGBOOST_USE_CUDA)
-
 /**
 * \brief A type erased view over __array_interface__ protocol defined by numpy
 *
--- a/src/data/data.cc
+++ b/src/data/data.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2015-2023 by XGBoost Contributors
+ * Copyright 2015-2024, XGBoost Contributors
 * \file data.cc
 */
 #include "xgboost/data.h"
@@ -260,9 +260,14 @@ void MetaInfo::SaveBinary(dmlc::Stream *fo) const {
  CHECK_EQ(field_cnt, kNumField) << "Wrong number of fields";
 }

-void LoadFeatureType(std::vector<std::string>const& type_names, std::vector<FeatureType>* types) {
+/**
+ * @brief Load feature type info from names, returns whether there's categorical features.
+ */
+[[nodiscard]] bool LoadFeatureType(std::vector<std::string> const& type_names,
+                                   std::vector<FeatureType>* types) {
  types->clear();
-  for (auto const &elem : type_names) {
+  bool has_cat{false};
+  for (auto const& elem : type_names) {
    if (elem == "int") {
      types->emplace_back(FeatureType::kNumerical);
    } else if (elem == "float") {
@@ -273,10 +278,12 @@ void LoadFeatureType(std::vector<std::string>const& type_names, std::vector<Feat
      types->emplace_back(FeatureType::kNumerical);
    } else if (elem == "c") {
      types->emplace_back(FeatureType::kCategorical);
+      has_cat = true;
    } else {
      LOG(FATAL) << "All feature_types must be one of {int, float, i, q, c}.";
    }
  }
+  return has_cat;
 }

 const std::vector<size_t>& MetaInfo::LabelAbsSort(Context const* ctx) const {
@@ -340,7 +347,8 @@ void MetaInfo::LoadBinary(dmlc::Stream *fi) {
  LoadVectorField(fi, u8"feature_names", DataType::kStr, &feature_names);
  LoadVectorField(fi, u8"feature_types", DataType::kStr, &feature_type_names);
  LoadVectorField(fi, u8"feature_weights", DataType::kFloat32, &feature_weights);
-  LoadFeatureType(feature_type_names, &feature_types.HostVector());
+
+  this->has_categorical_ = LoadFeatureType(feature_type_names, &feature_types.HostVector());
 }

 template <typename T>
@@ -639,6 +647,7 @@ void MetaInfo::SetFeatureInfo(const char* key, const char **info, const bst_ulon
    CHECK_EQ(size, this->num_col_) << "Length of " << key << " must be equal to number of columns.";
    CHECK(info);
  }
+
  if (!std::strcmp(key, "feature_type")) {
    feature_type_names.clear();
    for (size_t i = 0; i < size; ++i) {
@@ -651,7 +660,7 @@ void MetaInfo::SetFeatureInfo(const char* key, const char **info, const bst_ulon
          << "Length of " << key << " must be equal to number of columns.";
    }
    auto& h_feature_types = feature_types.HostVector();
-    LoadFeatureType(feature_type_names, &h_feature_types);
+    this->has_categorical_ = LoadFeatureType(feature_type_names, &h_feature_types);
  } else if (!std::strcmp(key, "feature_name")) {
    if (IsColumnSplit()) {
      std::vector<std::string> local_feature_names{};
@@ -674,9 +683,8 @@ void MetaInfo::SetFeatureInfo(const char* key, const char **info, const bst_ulon
  }
 }

-void MetaInfo::GetFeatureInfo(const char *field,
-                              std::vector<std::string> *out_str_vecs) const {
-  auto &str_vecs = *out_str_vecs;
+void MetaInfo::GetFeatureInfo(const char* field, std::vector<std::string>* out_str_vecs) const {
+  auto& str_vecs = *out_str_vecs;
  if (!std::strcmp(field, "feature_type")) {
    str_vecs.resize(feature_type_names.size());
    std::copy(feature_type_names.cbegin(), feature_type_names.cend(), str_vecs.begin());
@@ -689,6 +697,9 @@ void MetaInfo::GetFeatureInfo(const char *field,
 }

 void MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_column) {
+  /**
+   * shape
+   */
  if (accumulate_rows) {
    this->num_row_ += that.num_row_;
  }
@@ -702,6 +713,9 @@ void MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_col
  }
  this->num_col_ = that.num_col_;

+  /**
+   * info with n_samples
+   */
  linalg::Stack(&this->labels, that.labels);

  this->weights_.SetDevice(that.weights_.Device());
@@ -715,6 +729,9 @@ void MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_col

  linalg::Stack(&this->base_margin_, that.base_margin_);

+  /**
+   * group
+   */
  if (this->group_ptr_.size() == 0) {
    this->group_ptr_ = that.group_ptr_;
  } else {
@@ -727,17 +744,25 @@ void MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_col
                            group_ptr.end());
  }

+  /**
+   * info with n_features
+   */
  if (!that.feature_names.empty()) {
    this->feature_names = that.feature_names;
  }
+
  if (!that.feature_type_names.empty()) {
    this->feature_type_names = that.feature_type_names;
-    auto &h_feature_types = feature_types.HostVector();
-    LoadFeatureType(this->feature_type_names, &h_feature_types);
+    auto& h_feature_types = feature_types.HostVector();
+    this->has_categorical_ = LoadFeatureType(this->feature_type_names, &h_feature_types);
  } else if (!that.feature_types.Empty()) {
+    // FIXME(jiamingy): https://github.com/dmlc/xgboost/pull/9171/files#r1440188612
    this->feature_types.Resize(that.feature_types.Size());
    this->feature_types.Copy(that.feature_types);
+    auto const& ft = this->feature_types.ConstHostVector();
+    this->has_categorical_ = std::any_of(ft.cbegin(), ft.cend(), common::IsCatOp{});
  }
+
  if (!that.feature_weights.Empty()) {
    this->feature_weights.Resize(that.feature_weights.Size());
    this->feature_weights.SetDevice(that.feature_weights.Device());
@@ -947,38 +972,24 @@ DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, const st
  return new data::SimpleDMatrix(adapter, missing, nthread, data_split_mode);
 }

-template DMatrix* DMatrix::Create<data::DenseAdapter>(data::DenseAdapter* adapter, float missing,
-                                                      std::int32_t nthread,
-                                                      const std::string& cache_prefix,
-                                                      DataSplitMode data_split_mode);
-template DMatrix* DMatrix::Create<data::ArrayAdapter>(data::ArrayAdapter* adapter, float missing,
-                                                      std::int32_t nthread,
-                                                      const std::string& cache_prefix,
-                                                      DataSplitMode data_split_mode);
-template DMatrix* DMatrix::Create<data::CSRAdapter>(data::CSRAdapter* adapter, float missing,
-                                                    std::int32_t nthread,
-                                                    const std::string& cache_prefix,
-                                                    DataSplitMode data_split_mode);
-template DMatrix* DMatrix::Create<data::CSCAdapter>(data::CSCAdapter* adapter, float missing,
-                                                    std::int32_t nthread,
-                                                    const std::string& cache_prefix,
-                                                    DataSplitMode data_split_mode);
-template DMatrix* DMatrix::Create<data::DataTableAdapter>(data::DataTableAdapter* adapter,
-                                                          float missing, std::int32_t nthread,
-                                                          const std::string& cache_prefix,
-                                                          DataSplitMode data_split_mode);
-template DMatrix* DMatrix::Create<data::FileAdapter>(data::FileAdapter* adapter, float missing,
-                                                     std::int32_t nthread,
-                                                     const std::string& cache_prefix,
-                                                     DataSplitMode data_split_mode);
-template DMatrix* DMatrix::Create<data::CSRArrayAdapter>(data::CSRArrayAdapter* adapter,
-                                                         float missing, std::int32_t nthread,
-                                                         const std::string& cache_prefix,
-                                                         DataSplitMode data_split_mode);
-template DMatrix* DMatrix::Create<data::CSCArrayAdapter>(data::CSCArrayAdapter* adapter,
-                                                         float missing, std::int32_t nthread,
-                                                         const std::string& cache_prefix,
-                                                         DataSplitMode data_split_mode);
+// Instantiate the factory function for various adapters
+#define INSTANTIATION_CREATE(_AdapterT)                               \
+  template DMatrix* DMatrix::Create<data::_AdapterT>(                 \
+      data::_AdapterT * adapter, float missing, std::int32_t nthread, \
+      const std::string& cache_prefix, DataSplitMode data_split_mode);
+
+INSTANTIATION_CREATE(DenseAdapter)
+INSTANTIATION_CREATE(ArrayAdapter)
+INSTANTIATION_CREATE(CSRAdapter)
+INSTANTIATION_CREATE(CSCAdapter)
+INSTANTIATION_CREATE(DataTableAdapter)
+INSTANTIATION_CREATE(FileAdapter)
+INSTANTIATION_CREATE(CSRArrayAdapter)
+INSTANTIATION_CREATE(CSCArrayAdapter)
+INSTANTIATION_CREATE(ColumnarAdapter)
+
+#undef INSTANTIATION_CREATE
+
 template DMatrix* DMatrix::Create(
    data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>* adapter,
    float missing, int nthread, const std::string& cache_prefix, DataSplitMode data_split_mode);
@@ -1156,7 +1167,6 @@ uint64_t SparsePage::Push(const AdapterBatchT& batch, float missing, int nthread
  builder.InitStorage();

  // Second pass over batch, placing elements in correct position
-
  auto is_valid = data::IsValidFunctor{missing};
 #pragma omp parallel num_threads(nthread)
  {
@@ -1253,9 +1263,10 @@ template uint64_t SparsePage::Push(const data::CSCAdapterBatch& batch, float mis
 template uint64_t SparsePage::Push(const data::DataTableAdapterBatch& batch, float missing,
                                   int nthread);
 template uint64_t SparsePage::Push(const data::FileAdapterBatch& batch, float missing, int nthread);
+template uint64_t SparsePage::Push(const data::ColumnarAdapterBatch& batch, float missing,
+                                   std::int32_t nthread);

 namespace data {
-
 // List of files that will be force linked in static links.
 DMLC_REGISTRY_LINK_TAG(sparse_page_raw_format);
 DMLC_REGISTRY_LINK_TAG(gradient_index_format);
--- a/src/data/gradient_index.cc
+++ b/src/data/gradient_index.cc
@@ -120,7 +120,7 @@ void GHistIndexMatrix::PushAdapterBatchColumns(Context const *ctx, Batch const &
 INSTANTIATION_PUSH(data::CSRArrayAdapterBatch)
 INSTANTIATION_PUSH(data::ArrayAdapterBatch)
 INSTANTIATION_PUSH(data::SparsePageAdapterBatch)
-
+INSTANTIATION_PUSH(data::ColumnarAdapterBatch)
 #undef INSTANTIATION_PUSH

 void GHistIndexMatrix::ResizeIndex(const size_t n_index, const bool isDense) {
--- a/src/data/iterative_dmatrix.h
+++ b/src/data/iterative_dmatrix.h
@@ -93,7 +93,7 @@ class IterativeDMatrix : public DMatrix {
    return nullptr;
  }
  BatchSet<SparsePage> GetRowBatches() override {
-    LOG(FATAL) << "Not implemented.";
+    LOG(FATAL) << "Not implemented for `QuantileDMatrix`.";
    return BatchSet<SparsePage>(BatchIterator<SparsePage>(nullptr));
  }
  BatchSet<CSCPage> GetColumnBatches(Context const *) override {
--- a/src/data/proxy_dmatrix.cc
+++ b/src/data/proxy_dmatrix.cc
@@ -5,7 +5,22 @@

 #include "proxy_dmatrix.h"

+#include <memory>  // for shared_ptr
+
+#include "xgboost/context.h"  // for Context
+#include "xgboost/data.h"     // for DMatrix
+#include "xgboost/logging.h"
+#include "xgboost/string_view.h"  // for StringView
+
 namespace xgboost::data {
+void DMatrixProxy::SetColumnarData(StringView interface_str) {
+  std::shared_ptr<ColumnarAdapter> adapter{new ColumnarAdapter{interface_str}};
+  this->batch_ = adapter;
+  this->Info().num_col_ = adapter->NumColumns();
+  this->Info().num_row_ = adapter->NumRows();
+  this->ctx_.Init(Args{{"device", "cpu"}});
+}
+
 void DMatrixProxy::SetArrayData(StringView interface_str) {
  std::shared_ptr<ArrayAdapter> adapter{new ArrayAdapter{interface_str}};
  this->batch_ = adapter;
--- a/src/data/proxy_dmatrix.h
+++ b/src/data/proxy_dmatrix.h
@@ -62,6 +62,8 @@ class DMatrixProxy : public DMatrix {
 #endif  // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
  }

+  void SetColumnarData(StringView interface_str);
+
  void SetArrayData(StringView interface_str);
  void SetCSRData(char const* c_indptr, char const* c_indices, char const* c_values,
                  bst_feature_t n_features, bool on_host);
@@ -151,6 +153,17 @@ decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_
    if (type_error) {
      *type_error = false;
    }
+  } else if (proxy->Adapter().type() == typeid(std::shared_ptr<ColumnarAdapter>)) {
+    if constexpr (get_value) {
+      auto value = std::any_cast<std::shared_ptr<ColumnarAdapter>>(proxy->Adapter())->Value();
+      return fn(value);
+    } else {
+      auto value = std::any_cast<std::shared_ptr<ColumnarAdapter>>(proxy->Adapter());
+      return fn(value);
+    }
+    if (type_error) {
+      *type_error = false;
+    }
  } else {
    if (type_error) {
      *type_error = true;
--- a/src/data/simple_dmatrix.cc
+++ b/src/data/simple_dmatrix.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2014~2023 by XGBoost Contributors
+ * Copyright 2014~2023, XGBoost Contributors
 * \file simple_dmatrix.cc
 * \brief the input data structure for gradient boosting
 * \author Tianqi Chen
@@ -356,6 +356,8 @@ template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing,
                                      DataSplitMode data_split_mode);
 template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread,
                                      DataSplitMode data_split_mode);
+template SimpleDMatrix::SimpleDMatrix(ColumnarAdapter* adapter, float missing, int nthread,
+                                      DataSplitMode data_split_mode);
 template SimpleDMatrix::SimpleDMatrix(
    IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>* adapter,
    float missing, int nthread, DataSplitMode data_split_mode);
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2014-2023, XGBoost Contributors
+ * Copyright 2014-2024, XGBoost Contributors
 * \file gblinear.cc
 * \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
 *        the update rule is parallel coordinate descent (shotgun)
@@ -8,25 +8,24 @@
 #include <dmlc/omp.h>
 #include <dmlc/parameter.h>

-#include <vector>
-#include <string>
-#include <sstream>
 #include <algorithm>
 #include <numeric>
+#include <sstream>
+#include <string>
+#include <vector>

+#include "../common/common.h"
+#include "../common/error_msg.h"  // NoCategorical, DeprecatedFunc
+#include "../common/threading_utils.h"
+#include "../common/timer.h"
+#include "gblinear_model.h"
 #include "xgboost/gbm.h"
 #include "xgboost/json.h"
-#include "xgboost/predictor.h"
-#include "xgboost/linear_updater.h"
-#include "xgboost/logging.h"
 #include "xgboost/learner.h"
 #include "xgboost/linalg.h"
-
-#include "gblinear_model.h"
-#include "../common/timer.h"
-#include "../common/common.h"
-#include "../common/threading_utils.h"
-#include "../common/error_msg.h"
+#include "xgboost/linear_updater.h"
+#include "xgboost/logging.h"
+#include "xgboost/predictor.h"

 namespace xgboost::gbm {
 DMLC_REGISTRY_FILE_TAG(gblinear);
@@ -145,6 +144,7 @@ class GBLinear : public GradientBooster {
               ObjFunction const*) override {
    monitor_.Start("DoBoost");

+    CHECK(!p_fmat->Info().HasCategorical()) << error::NoCategorical("`gblinear`");
    model_.LazyInitModel();
    this->LazySumWeights(p_fmat);

--- a/src/learner.cc
+++ b/src/learner.cc
@@ -535,8 +535,7 @@ class LearnerConfiguration : public Learner {

    tparam_.booster = get<String>(gradient_booster["name"]);
    if (!gbm_) {
-      gbm_.reset(GradientBooster::Create(tparam_.booster,
-                                         &ctx_, &learner_model_param_));
+      gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_, &learner_model_param_));
    }
    gbm_->LoadConfig(gradient_booster);

@@ -1095,6 +1094,11 @@ class LearnerIO : public LearnerConfiguration {
    std::vector<std::pair<std::string, std::string> > extra_attr;
    mparam.contain_extra_attrs = 1;

+    if (!this->feature_names_.empty() || !this->feature_types_.empty()) {
+      LOG(WARNING) << "feature names and feature types are being disregarded, use JSON/UBJSON "
+                      "format instead.";
+    }
+
    {
      // Similar to JSON model IO, we save the objective.
      Json j_obj { Object() };
--- a/src/objective/hinge.cu
+++ b/src/objective/hinge.cu
@@ -4,71 +4,85 @@
 * \brief Provides an implementation of the hinge loss function
 * \author Henry Gouk
 */
-#include "xgboost/objective.h"
-#include "xgboost/json.h"
-#include "xgboost/span.h"
-#include "xgboost/host_device_vector.h"
+#include <algorithm>  // for max
+#include <cstddef>    // for size_t
+#include <cstdint>    // for int32_t

-#include "../common/math.h"
-#include "../common/transform.h"
-#include "../common/common.h"
+#include "../common/common.h"  // for Range
+#if defined(XGBOOST_USE_CUDA)
+#include "../common/linalg_op.cuh"
+#endif
+#include "../common/linalg_op.h"
+#include "../common/optional_weight.h"   // for OptionalWeights
+#include "../common/transform.h"         // for Transform
+#include "init_estimation.h"             // for FitIntercept
+#include "xgboost/data.h"                // for MetaInfo
+#include "xgboost/host_device_vector.h"  // HostDeviceVector
+#include "xgboost/json.h"                // for Json
+#include "xgboost/linalg.h"              // for UnravelIndex
+#include "xgboost/span.h"                // for Span

 namespace xgboost::obj {
-
 #if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
 DMLC_REGISTRY_FILE_TAG(hinge_obj_gpu);
 #endif  // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)

-class HingeObj : public ObjFunction {
+class HingeObj : public FitIntercept {
 public:
  HingeObj() = default;

-  void Configure(Args const&) override {}
+  void Configure(Args const &) override {}
  ObjInfo Task() const override { return ObjInfo::kRegression; }

-  void GetGradient(const HostDeviceVector<bst_float> &preds, const MetaInfo &info,
-                   std::int32_t /*iter*/, linalg::Matrix<GradientPair> *out_gpair) override {
-    CHECK_NE(info.labels.Size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds.Size(), info.labels.Size())
-        << "labels are not correctly provided"
-        << "preds.size=" << preds.Size()
-        << ", label.size=" << info.labels.Size();
-
-    const size_t ndata = preds.Size();
-    const bool is_null_weight = info.weights_.Size() == 0;
-    if (!is_null_weight) {
-      CHECK_EQ(info.weights_.Size(), ndata)
-          << "Number of weights should be equal to number of data points.";
-    }
-    CHECK_EQ(info.labels.Shape(1), 1) << "Multi-target for `binary:hinge` is not yet supported.";
-    out_gpair->Reshape(ndata, 1);
-    common::Transform<>::Init(
-        [=] XGBOOST_DEVICE(size_t _idx,
-                           common::Span<GradientPair> _out_gpair,
-                           common::Span<const bst_float> _preds,
-                           common::Span<const bst_float> _labels,
-                           common::Span<const bst_float> _weights) {
-          bst_float p = _preds[_idx];
-          bst_float w = is_null_weight ? 1.0f : _weights[_idx];
-          bst_float y = _labels[_idx] * 2.0 - 1.0;
-          bst_float g, h;
-          if (p * y < 1.0) {
-            g = -y * w;
-            h = w;
-          } else {
-            g = 0.0;
-            h = std::numeric_limits<bst_float>::min();
-          }
-          _out_gpair[_idx] = GradientPair(g, h);
-        },
-        common::Range{0, static_cast<int64_t>(ndata)}, this->ctx_->Threads(),
-        ctx_->Device()).Eval(
-            out_gpair->Data(), &preds, info.labels.Data(), &info.weights_);
+  [[nodiscard]] bst_target_t Targets(MetaInfo const &info) const override {
+    // Multi-target regression.
+    return std::max(static_cast<std::size_t>(1), info.labels.Shape(1));
  }

-  void PredTransform(HostDeviceVector<bst_float> *io_preds) const override {
+  void GetGradient(HostDeviceVector<float> const &preds, MetaInfo const &info,
+                   std::int32_t /*iter*/, linalg::Matrix<GradientPair> *out_gpair) override {
+    CheckInitInputs(info);
+    CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels.";
+    if (!info.weights_.Empty()) {
+      CHECK_EQ(info.weights_.Size(), info.num_row_)
+          << "Number of weights should be equal to number of data points.";
+    }
+
+    bst_target_t n_targets = this->Targets(info);
+    out_gpair->Reshape(info.num_row_, n_targets);
+    auto gpair = out_gpair->View(ctx_->Device());
+
+    preds.SetDevice(ctx_->Device());
+    auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, n_targets);
+
+    auto labels = info.labels.View(ctx_->Device());
+
+    info.weights_.SetDevice(ctx_->Device());
+    common::OptionalWeights weight{ctx_->IsCUDA() ? info.weights_.ConstDeviceSpan()
+                                                  : info.weights_.ConstHostSpan()};
+
+    linalg::ElementWiseKernel(this->ctx_, labels,
+                              [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {
+                                auto w = weight[i];
+
+                                auto p = predt(i, j);
+                                auto y = labels(i, j) * 2.0 - 1.0;
+
+                                float g, h;
+                                if (p * y < 1.0) {
+                                  g = -y * w;
+                                  h = w;
+                                } else {
+                                  g = 0.0;
+                                  h = std::numeric_limits<float>::min();
+                                }
+                                gpair(i, j) = GradientPair{g, h};
+                              });
+  }
+
+  void PredTransform(HostDeviceVector<float> *io_preds) const override {
    common::Transform<>::Init(
-        [] XGBOOST_DEVICE(size_t _idx, common::Span<bst_float> _preds) {
+        [] XGBOOST_DEVICE(std::size_t _idx, common::Span<float> _preds) {
          _preds[_idx] = _preds[_idx] > 0.0 ? 1.0 : 0.0;
        },
        common::Range{0, static_cast<int64_t>(io_preds->Size()), 1}, this->ctx_->Threads(),
@@ -76,12 +90,10 @@ class HingeObj : public ObjFunction {
        .Eval(io_preds);
  }

-  [[nodiscard]] const char* DefaultEvalMetric() const override {
-    return "error";
-  }
+  [[nodiscard]] const char *DefaultEvalMetric() const override { return "error"; }

-  void SaveConfig(Json* p_out) const override {
-    auto& out = *p_out;
+  void SaveConfig(Json *p_out) const override {
+    auto &out = *p_out;
    out["name"] = String("binary:hinge");
  }
  void LoadConfig(Json const &) override {}
@@ -89,7 +101,7 @@ class HingeObj : public ObjFunction {

 // register the objective functions
 XGBOOST_REGISTER_OBJECTIVE(HingeObj, "binary:hinge")
-.describe("Hinge loss. Expects labels to be in [0,1f]")
-.set_body([]() { return new HingeObj(); });
+    .describe("Hinge loss. Expects labels to be in [0,1f]")
+    .set_body([]() { return new HingeObj(); });

 }  // namespace xgboost::obj
--- a/src/objective/multiclass_obj.cu
+++ b/src/objective/multiclass_obj.cu
@@ -21,6 +21,8 @@
 #include "../common/math.h"
 #include "../common/transform.h"

+#include "multiclass_param.h"
+
 namespace xgboost {
 namespace obj {

@@ -28,15 +30,6 @@ namespace obj {
 DMLC_REGISTRY_FILE_TAG(multiclass_obj_gpu);
 #endif  // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)

-struct SoftmaxMultiClassParam : public XGBoostParameter<SoftmaxMultiClassParam> {
-  int num_class;
-  // declare parameters
-  DMLC_DECLARE_PARAMETER(SoftmaxMultiClassParam) {
-    DMLC_DECLARE_FIELD(num_class).set_lower_bound(1)
-        .describe("Number of output class in the multi-class classification.");
-  }
-};
-
 class SoftmaxMultiClassObj : public ObjFunction {
 public:
  explicit SoftmaxMultiClassObj(bool output_prob)
--- a/src/objective/multiclass_param.h
+++ b/src/objective/multiclass_param.h
@@ -0,0 +1,25 @@
+/*!
+ * Copyright 2015-2023 by Contributors
+ * \file multiclass_param.h
+ * \brief Definition of multi-class classification parameters.
+ */
+#ifndef XGBOOST_OBJECTIVE_MULTICLASS_PARAM_H_
+#define XGBOOST_OBJECTIVE_MULTICLASS_PARAM_H_
+
+#include "xgboost/parameter.h"
+
+namespace xgboost {
+namespace obj {
+
+struct SoftmaxMultiClassParam : public XGBoostParameter<SoftmaxMultiClassParam> {
+  int num_class;
+  // declare parameters
+  DMLC_DECLARE_PARAMETER(SoftmaxMultiClassParam) {
+    DMLC_DECLARE_FIELD(num_class).set_lower_bound(1)
+        .describe("Number of output class in the multi-class classification.");
+  }
+};
+
+}  // namespace obj
+}  // namespace xgboost
+#endif  // XGBOOST_OBJECTIVE_MULTICLASS_PARAM_H_
--- a/src/objective/objective.cc
+++ b/src/objective/objective.cc
@@ -18,7 +18,11 @@ DMLC_REGISTRY_ENABLE(::xgboost::ObjFunctionReg);
 namespace xgboost {
 // implement factory functions
 ObjFunction* ObjFunction::Create(const std::string& name, Context const* ctx) {
-  auto *e = ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->Find(name);
+  std::string obj_name = name;
+  if (ctx->IsSycl()) {
+    obj_name = GetSyclImplementationName(obj_name);
+  }
+  auto *e = ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->Find(obj_name);
  if (e == nullptr) {
    std::stringstream ss;
    for (const auto& entry : ::dmlc::Registry< ::xgboost::ObjFunctionReg>::List()) {
@@ -32,6 +36,22 @@ ObjFunction* ObjFunction::Create(const std::string& name, Context const* ctx) {
  return pobj;
 }

+/* If the objective function has sycl-specific implementation,
+ * returns the specific implementation name.
+ * Otherwise return the orginal name without modifications.
+ */
+std::string ObjFunction::GetSyclImplementationName(const std::string& name) {
+  const std::string sycl_postfix = "_sycl";
+  auto *e = ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->Find(name + sycl_postfix);
+  if (e != nullptr) {
+    // Function has specific sycl implementation
+    return name + sycl_postfix;
+  } else {
+    // Function hasn't specific sycl implementation
+    return name;
+  }
+}
+
 void ObjFunction::InitEstimation(MetaInfo const&, linalg::Tensor<float, 1>* base_score) const {
  CHECK(base_score);
  base_score->Reshape(1);
--- a/src/objective/quantile_obj.cu
+++ b/src/objective/quantile_obj.cu
@@ -75,28 +75,25 @@ class QuantileRegression : public ObjFunction {
                                                  : info.weights_.ConstHostSpan()};

    preds.SetDevice(ctx_->Device());
-    auto predt = linalg::MakeVec(&preds);
-    auto n_samples = info.num_row_;
+    auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, n_targets);

    alpha_.SetDevice(ctx_->Device());
    auto alpha = ctx_->IsCUDA() ? alpha_.ConstDeviceSpan() : alpha_.ConstHostSpan();

-    linalg::ElementWiseKernel(
-        ctx_, gpair, [=] XGBOOST_DEVICE(std::size_t i, GradientPair const&) mutable {
-          auto [sample_id, quantile_id, target_id] =
-              linalg::UnravelIndex(i, n_samples, alpha.size(), n_targets / alpha.size());
-          assert(target_id == 0);
-
-          auto d = predt(i) - labels(sample_id, target_id);
-          auto h = weight[sample_id];
-          if (d >= 0) {
-            auto g = (1.0f - alpha[quantile_id]) * weight[sample_id];
-            gpair(sample_id, quantile_id) = GradientPair{g, h};
-          } else {
-            auto g = (-alpha[quantile_id] * weight[sample_id]);
-            gpair(sample_id, quantile_id) = GradientPair{g, h};
-          }
-        });
+    linalg::ElementWiseKernel(ctx_, gpair,
+                              [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {
+                                // j is the quantile index
+                                // 0 is the target index
+                                auto d = predt(i, j) - labels(i, 0);
+                                auto h = weight[i];
+                                if (d >= 0) {
+                                  auto g = (1.0f - alpha[j]) * weight[i];
+                                  gpair(i, j) = GradientPair{g, h};
+                                } else {
+                                  auto g = (-alpha[j] * weight[i]);
+                                  gpair(i, j) = GradientPair{g, h};
+                                }
+                              });
  }

  void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const override {
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -35,6 +35,8 @@
 #include "xgboost/span.h"
 #include "xgboost/tree_model.h"  // RegTree

+#include "regression_param.h"
+
 #if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
 #include "../common/cuda_context.cuh"  // for CUDAContext
 #include "../common/device_helpers.cuh"
@@ -53,14 +55,7 @@ void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& pre
 DMLC_REGISTRY_FILE_TAG(regression_obj_gpu);
 #endif  // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)

-struct RegLossParam : public XGBoostParameter<RegLossParam> {
-  float scale_pos_weight;
-  // declare parameters
-  DMLC_DECLARE_PARAMETER(RegLossParam) {
-    DMLC_DECLARE_FIELD(scale_pos_weight).set_default(1.0f).set_lower_bound(0.0f)
-      .describe("Scale the weight of positive examples by this factor");
-  }
-};
+

 template<typename Loss>
 class RegLossObj : public FitIntercept {
@@ -255,24 +250,24 @@ class PseudoHuberRegression : public FitIntercept {
    auto gpair = out_gpair->View(ctx_->Device());

    preds.SetDevice(ctx_->Device());
-    auto predt = linalg::MakeVec(&preds);
+    auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, this->Targets(info));

    info.weights_.SetDevice(ctx_->Device());
    common::OptionalWeights weight{ctx_->IsCUDA() ? info.weights_.ConstDeviceSpan()
                                                  : info.weights_.ConstHostSpan()};

-    linalg::ElementWiseKernel(ctx_, labels, [=] XGBOOST_DEVICE(size_t i, float const y) mutable {
-      auto sample_id = std::get<0>(linalg::UnravelIndex(i, labels.Shape()));
-      const float z = predt(i) - y;
-      const float scale_sqrt = std::sqrt(1 + common::Sqr(z) / common::Sqr(slope));
-      float grad = z / scale_sqrt;
+    linalg::ElementWiseKernel(
+        ctx_, labels, [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {
+          float z = predt(i, j) - labels(i, j);
+          float scale_sqrt = std::sqrt(1 + common::Sqr(z) / common::Sqr(slope));
+          float grad = z / scale_sqrt;

-      auto scale = common::Sqr(slope) + common::Sqr(z);
-      float hess = common::Sqr(slope) / (scale * scale_sqrt);
+          auto scale = common::Sqr(slope) + common::Sqr(z);
+          float hess = common::Sqr(slope) / (scale * scale_sqrt);

-      auto w = weight[sample_id];
-      gpair(i) = {grad * w, hess * w};
-    });
+          auto w = weight[i];
+          gpair(i) = {grad * w, hess * w};
+        });
  }

  [[nodiscard]] const char* DefaultEvalMetric() const override { return "mphe"; }
@@ -635,20 +630,21 @@ class MeanAbsoluteError : public ObjFunction {
    auto gpair = out_gpair->View(ctx_->Device());

    preds.SetDevice(ctx_->Device());
-    auto predt = linalg::MakeVec(&preds);
+    auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, this->Targets(info));
    info.weights_.SetDevice(ctx_->Device());
    common::OptionalWeights weight{ctx_->IsCUDA() ? info.weights_.ConstDeviceSpan()
                                                  : info.weights_.ConstHostSpan()};

-    linalg::ElementWiseKernel(ctx_, labels, [=] XGBOOST_DEVICE(std::size_t i, float y) mutable {
-      auto sign = [](auto x) {
-        return (x > static_cast<decltype(x)>(0)) - (x < static_cast<decltype(x)>(0));
-      };
-      auto [sample_id, target_id] = linalg::UnravelIndex(i, labels.Shape());
-      auto grad = sign(predt(i) - y) * weight[sample_id];
-      auto hess = weight[sample_id];
-      gpair(sample_id, target_id) = GradientPair{grad, hess};
-    });
+    linalg::ElementWiseKernel(
+        ctx_, labels, [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {
+          auto sign = [](auto x) {
+            return (x > static_cast<decltype(x)>(0)) - (x < static_cast<decltype(x)>(0));
+          };
+          auto y = labels(i, j);
+          auto hess = weight[i];
+          auto grad = sign(predt(i, j) - y) * hess;
+          gpair(i, j) = GradientPair{grad, hess};
+        });
  }

  void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
--- a/src/objective/regression_param.h
+++ b/src/objective/regression_param.h
@@ -0,0 +1,25 @@
+/*!
+ * Copyright 2015-2023 by Contributors
+ * \file multiclass_param.h
+ * \brief Definition of single-value regression and classification parameters.
+ */
+#ifndef XGBOOST_OBJECTIVE_REGRESSION_PARAM_H_
+#define XGBOOST_OBJECTIVE_REGRESSION_PARAM_H_
+
+#include "xgboost/parameter.h"
+
+namespace xgboost {
+namespace obj {
+
+struct RegLossParam : public XGBoostParameter<RegLossParam> {
+  float scale_pos_weight;
+  // declare parameters
+  DMLC_DECLARE_PARAMETER(RegLossParam) {
+    DMLC_DECLARE_FIELD(scale_pos_weight).set_default(1.0f).set_lower_bound(0.0f)
+      .describe("Scale the weight of positive examples by this factor");
+  }
+};
+
+}  // namespace obj
+}  // namespace xgboost
+#endif  // XGBOOST_OBJECTIVE_REGRESSION_PARAM_H_
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@@ -761,6 +761,9 @@ class CPUPredictor : public Predictor {
    } else if (x.type() == typeid(std::shared_ptr<data::CSRArrayAdapter>)) {
      this->DispatchedInplacePredict<data::CSRArrayAdapter, 1>(x, p_m, model, missing, out_preds,
                                                               tree_begin, tree_end);
+    } else if (x.type() == typeid(std::shared_ptr<data::ColumnarAdapter>)) {
+      this->DispatchedInplacePredict<data::ColumnarAdapter, kBlockOfRowsSize>(
+          x, p_m, model, missing, out_preds, tree_begin, tree_end);
    } else {
      return false;
    }
--- a/src/tree/tree_model.cc
+++ b/src/tree/tree_model.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2015-2023 by Contributors
+ * Copyright 2015-2023, XGBoost Contributors
 * \file tree_model.cc
 * \brief model structure for tree
 */
@@ -15,9 +15,9 @@
 #include <type_traits>

 #include "../common/categorical.h"
-#include "../common/common.h"
+#include "../common/common.h"  // for EscapeU8
 #include "../predictor/predict_fn.h"
-#include "io_utils.h"  // GetElem
+#include "io_utils.h"  // for GetElem
 #include "param.h"
 #include "xgboost/base.h"
 #include "xgboost/data.h"
@@ -207,8 +207,9 @@ TreeGenerator* TreeGenerator::Create(std::string const& attrs, FeatureMap const&
  __make_ ## TreeGenReg ## _ ## UniqueId ## __ =                        \
                  ::dmlc::Registry< ::xgboost::TreeGenReg>::Get()->__REGISTER__(Name)

-std::vector<bst_cat_t> GetSplitCategories(RegTree const &tree, int32_t nidx) {
-  auto const &csr = tree.GetCategoriesMatrix();
+namespace {
+std::vector<bst_cat_t> GetSplitCategories(RegTree const& tree, int32_t nidx) {
+  auto const& csr = tree.GetCategoriesMatrix();
  auto seg = csr.node_ptr[nidx];
  auto split = common::KCatBitField{csr.categories.subspan(seg.beg, seg.size)};

@@ -221,7 +222,7 @@ std::vector<bst_cat_t> GetSplitCategories(RegTree const &tree, int32_t nidx) {
  return cats;
 }

-std::string PrintCatsAsSet(std::vector<bst_cat_t> const &cats) {
+std::string PrintCatsAsSet(std::vector<bst_cat_t> const& cats) {
  std::stringstream ss;
  ss << "{";
  for (size_t i = 0; i < cats.size(); ++i) {
@@ -234,6 +235,15 @@ std::string PrintCatsAsSet(std::vector<bst_cat_t> const &cats) {
  return ss.str();
 }

+std::string GetFeatureName(FeatureMap const& fmap, bst_feature_t split_index) {
+  CHECK_LE(fmap.Size(), std::numeric_limits<decltype(split_index)>::max());
+  auto fname = split_index < static_cast<decltype(split_index)>(fmap.Size())
+                   ? fmap.Name(split_index)
+                   : ('f' + std::to_string(split_index));
+  return common::EscapeU8(fname);
+}
+}  // anonymous namespace
+
 class TextGenerator : public TreeGenerator {
  using SuperT = TreeGenerator;

@@ -263,7 +273,7 @@ class TextGenerator : public TreeGenerator {
    std::string result = SuperT::Match(
        kIndicatorTemplate,
        {{"{nid}",   std::to_string(nid)},
-         {"{fname}", fmap_.Name(split_index)},
+         {"{fname}", GetFeatureName(fmap_, split_index)},
         {"{yes}",   std::to_string(nyes)},
         {"{no}",    std::to_string(tree[nid].DefaultChild())}});
    return result;
@@ -277,8 +287,7 @@ class TextGenerator : public TreeGenerator {
        template_str,
        {{"{tabs}",    SuperT::Tabs(depth)},
         {"{nid}",     std::to_string(nid)},
-         {"{fname}",   split_index < fmap_.Size() ? fmap_.Name(split_index) :
-                                                    std::to_string(split_index)},
+         {"{fname}",   GetFeatureName(fmap_, split_index)},
         {"{cond}",    cond},
         {"{left}",    std::to_string(tree[nid].LeftChild())},
         {"{right}",   std::to_string(tree[nid].RightChild())},
@@ -308,7 +317,7 @@ class TextGenerator : public TreeGenerator {
  std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
    auto cond = tree[nid].SplitCond();
    static std::string const kNodeTemplate =
-        "{tabs}{nid}:[f{fname}<{cond}] yes={left},no={right},missing={missing}";
+        "{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
    return SplitNodeImpl(tree, nid, kNodeTemplate, SuperT::ToStr(cond), depth);
  }

@@ -376,7 +385,7 @@ class JsonGenerator : public TreeGenerator {
    return result;
  }

-  std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t) const override {
+  std::string LeafNode(RegTree const& tree, bst_node_t nid, uint32_t) const override {
    static std::string const kLeafTemplate =
        R"L({ "nodeid": {nid}, "leaf": {leaf} {stat}})L";
    static std::string const kStatTemplate =
@@ -392,26 +401,22 @@ class JsonGenerator : public TreeGenerator {
    return result;
  }

-  std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) const override {
+  std::string Indicator(RegTree const& tree, bst_node_t nid, uint32_t depth) const override {
    int32_t nyes = tree[nid].DefaultLeft() ?
                   tree[nid].RightChild() : tree[nid].LeftChild();
    static std::string const kIndicatorTemplate =
        R"ID( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", "yes": {yes}, "no": {no})ID";
    auto split_index = tree[nid].SplitIndex();
-    auto fname = fmap_.Name(split_index);
-    std::string qfname;  // quoted
-    common::EscapeU8(fname, &qfname);
-    auto result = SuperT::Match(
-        kIndicatorTemplate,
-        {{"{nid}",   std::to_string(nid)},
-         {"{depth}", std::to_string(depth)},
-         {"{fname}", qfname},
-         {"{yes}",   std::to_string(nyes)},
-         {"{no}",    std::to_string(tree[nid].DefaultChild())}});
+    auto result =
+        SuperT::Match(kIndicatorTemplate, {{"{nid}", std::to_string(nid)},
+                                           {"{depth}", std::to_string(depth)},
+                                           {"{fname}", GetFeatureName(fmap_, split_index)},
+                                           {"{yes}", std::to_string(nyes)},
+                                           {"{no}", std::to_string(tree[nid].DefaultChild())}});
    return result;
  }

-  std::string Categorical(RegTree const& tree, int32_t nid, uint32_t depth) const override {
+  std::string Categorical(RegTree const& tree, bst_node_t nid, uint32_t depth) const override {
    auto cats = GetSplitCategories(tree, nid);
    static std::string const kCategoryTemplate =
        R"I( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", )I"
@@ -429,22 +434,17 @@ class JsonGenerator : public TreeGenerator {
    return results;
  }

-  std::string SplitNodeImpl(RegTree const &tree, int32_t nid,
-                            std::string const &template_str, std::string cond,
-                            uint32_t depth) const {
+  std::string SplitNodeImpl(RegTree const& tree, bst_node_t nid, std::string const& template_str,
+                            std::string cond, uint32_t depth) const {
    auto split_index = tree[nid].SplitIndex();
-    auto fname = split_index < fmap_.Size() ? fmap_.Name(split_index) : std::to_string(split_index);
-    std::string qfname;  // quoted
-    common::EscapeU8(fname, &qfname);
-    std::string const result = SuperT::Match(
-        template_str,
-        {{"{nid}",     std::to_string(nid)},
-         {"{depth}",   std::to_string(depth)},
-         {"{fname}",   qfname},
-         {"{cond}",    cond},
-         {"{left}",    std::to_string(tree[nid].LeftChild())},
-         {"{right}",   std::to_string(tree[nid].RightChild())},
-         {"{missing}", std::to_string(tree[nid].DefaultChild())}});
+    std::string const result =
+        SuperT::Match(template_str, {{"{nid}", std::to_string(nid)},
+                                     {"{depth}", std::to_string(depth)},
+                                     {"{fname}", GetFeatureName(fmap_, split_index)},
+                                     {"{cond}", cond},
+                                     {"{left}", std::to_string(tree[nid].LeftChild())},
+                                     {"{right}", std::to_string(tree[nid].RightChild())},
+                                     {"{missing}", std::to_string(tree[nid].DefaultChild())}});
    return result;
  }

@@ -605,9 +605,8 @@ class GraphvizGenerator : public TreeGenerator {
    auto const& extra = kwargs["graph_attrs"];
    static std::string const kGraphTemplate = "    graph [ {key}=\"{value}\" ]\n";
    for (auto const& kv : extra) {
-      param_.graph_attrs += SuperT::Match(kGraphTemplate,
-                                     {{"{key}", kv.first},
-                                      {"{value}", kv.second}});
+      param_.graph_attrs +=
+          SuperT::Match(kGraphTemplate, {{"{key}", kv.first}, {"{value}", kv.second}});
    }

    kwargs.erase("graph_attrs");
@@ -646,20 +645,18 @@ class GraphvizGenerator : public TreeGenerator {
  // Only indicator is different, so we combine all different node types into this
  // function.
  std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t) const override {
-    auto split = tree[nid].SplitIndex();
+    auto split_index = tree[nid].SplitIndex();
    auto cond = tree[nid].SplitCond();
-    static std::string const kNodeTemplate =
-        "    {nid} [ label=\"{fname}{<}{cond}\" {params}]\n";
+    static std::string const kNodeTemplate = "    {nid} [ label=\"{fname}{<}{cond}\" {params}]\n";

-    // Indicator only has fname.
-    bool has_less = (split >= fmap_.Size()) || fmap_.TypeOf(split) != FeatureMap::kIndicator;
-    std::string result = SuperT::Match(kNodeTemplate, {
-        {"{nid}",    std::to_string(nid)},
-        {"{fname}",  split < fmap_.Size() ? fmap_.Name(split) :
-                                           'f' + std::to_string(split)},
-        {"{<}",      has_less ? "<" : ""},
-        {"{cond}",   has_less ? SuperT::ToStr(cond) : ""},
-        {"{params}", param_.condition_node_params}});
+    bool has_less =
+        (split_index >= fmap_.Size()) || fmap_.TypeOf(split_index) != FeatureMap::kIndicator;
+    std::string result =
+        SuperT::Match(kNodeTemplate, {{"{nid}", std::to_string(nid)},
+                                      {"{fname}", GetFeatureName(fmap_, split_index)},
+                                      {"{<}", has_less ? "<" : ""},
+                                      {"{cond}", has_less ? SuperT::ToStr(cond) : ""},
+                                      {"{params}", param_.condition_node_params}});

    result += BuildEdge<false>(tree, nid, tree[nid].LeftChild(), true);
    result += BuildEdge<false>(tree, nid, tree[nid].RightChild(), false);
@@ -672,14 +669,13 @@ class GraphvizGenerator : public TreeGenerator {
        "    {nid} [ label=\"{fname}:{cond}\" {params}]\n";
    auto cats = GetSplitCategories(tree, nid);
    auto cats_str = PrintCatsAsSet(cats);
-    auto split = tree[nid].SplitIndex();
-    std::string result = SuperT::Match(
-        kLabelTemplate,
-        {{"{nid}", std::to_string(nid)},
-         {"{fname}", split < fmap_.Size() ? fmap_.Name(split)
-                                          : 'f' + std::to_string(split)},
-         {"{cond}", cats_str},
-         {"{params}", param_.condition_node_params}});
+    auto split_index = tree[nid].SplitIndex();
+
+    std::string result =
+        SuperT::Match(kLabelTemplate, {{"{nid}", std::to_string(nid)},
+                                       {"{fname}", GetFeatureName(fmap_, split_index)},
+                                       {"{cond}", cats_str},
+                                       {"{params}", param_.condition_node_params}});

    result += BuildEdge<true>(tree, nid, tree[nid].LeftChild(), true);
    result += BuildEdge<true>(tree, nid, tree[nid].RightChild(), false);
--- a/src/tree/updater_colmaker.cc
+++ b/src/tree/updater_colmaker.cc
@@ -1,21 +1,22 @@
 /**
- * Copyright 2014-2023 by XGBoost Contributors
+ * Copyright 2014-2024, XGBoost Contributors
 * \file updater_colmaker.cc
 * \brief use columnwise update to construct a tree
 * \author Tianqi Chen
 */
-#include <vector>
-#include <cmath>
 #include <algorithm>
+#include <cmath>
+#include <vector>

+#include "../common/error_msg.h"  // for NoCategorical
+#include "../common/random.h"
+#include "constraints.h"
+#include "param.h"
+#include "split_evaluator.h"
+#include "xgboost/json.h"
+#include "xgboost/logging.h"
 #include "xgboost/parameter.h"
 #include "xgboost/tree_updater.h"
-#include "xgboost/logging.h"
-#include "xgboost/json.h"
-#include "param.h"
-#include "constraints.h"
-#include "../common/random.h"
-#include "split_evaluator.h"

 namespace xgboost::tree {

@@ -102,6 +103,9 @@ class ColMaker: public TreeUpdater {
      LOG(FATAL) << "Updater `grow_colmaker` or `exact` tree method doesn't "
                    "support external memory training.";
    }
+    if (dmat->Info().HasCategorical()) {
+      LOG(FATAL) << error::NoCategorical("Updater `grow_colmaker` or `exact` tree method");
+    }
    this->LazyGetColumnDensity(dmat);
    // rescale learning rate according to size of trees
    interaction_constraints_.Configure(*param, dmat->Info().num_row_);
--- a/src/tree/updater_quantile_hist.cc
+++ b/src/tree/updater_quantile_hist.cc
@@ -545,12 +545,12 @@ class QuantileHistMaker : public TreeUpdater {
  }

  bool UpdatePredictionCache(const DMatrix *data, linalg::MatrixView<float> out_preds) override {
-    if (p_impl_) {
-      return p_impl_->UpdatePredictionCache(data, out_preds);
-    } else if (p_mtimpl_) {
+    if (out_preds.Shape(1) > 1) {
+      CHECK(p_mtimpl_);
      return p_mtimpl_->UpdatePredictionCache(data, out_preds);
    } else {
-      return false;
+      CHECK(p_impl_);
+      return p_impl_->UpdatePredictionCache(data, out_preds);
    }
  }