merge latest, Jan 12 2024

2024-01-12 09:57:11 -08:00
parent c42c7d99f1 73b3955dd4
commit 1e1e8be3a5
251 changed files with 9023 additions and 5012 deletions
--- a/include/xgboost/c_api.h
+++ b/include/xgboost/c_api.h
@@ -159,6 +159,16 @@ XGB_DLL int XGDMatrixCreateFromURI(char const *config, DMatrixHandle *out);
 XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indices,
                                     const float *data, size_t nindptr, size_t nelem,
                                     size_t num_col, DMatrixHandle *out);
+/**
+ * @brief Create a DMatrix from columnar data. (table)
+ *
+ * @param data   See @ref XGBoosterPredictFromColumnar for details.
+ * @param config See @ref XGDMatrixCreateFromDense for details.
+ * @param out    The created dmatrix.
+ *
+ * @return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGDMatrixCreateFromColumnar(char const *data, char const *config, DMatrixHandle *out);

 /**
 * @example c-api-demo.c
@@ -514,6 +524,16 @@ XGB_DLL int
 XGProxyDMatrixSetDataCudaArrayInterface(DMatrixHandle handle,
                                        const char *c_interface_str);

+/**
+ * @brief Set columnar (table) data on a DMatrix proxy.
+ *
+ * @param handle          A DMatrix proxy created by @ref XGProxyDMatrixCreate
+ * @param c_interface_str See @ref XGBoosterPredictFromColumnar for details.
+ *
+ * @return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGProxyDMatrixSetDataColumnar(DMatrixHandle handle, char const *c_interface_str);
+
 /*!
 * \brief Set data on a DMatrix proxy.
 *
@@ -1113,6 +1133,31 @@ XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, char const *values,
 * @example inference.c
 */

+/**
+ * @brief Inplace prediction from CPU columnar data. (Table)
+ *
+ * @note If the booster is configured to run on a CUDA device, XGBoost falls back to run
+ *       prediction with DMatrix with a performance warning.
+ *
+ * @param handle        Booster handle.
+ * @param values        An JSON array of __array_interface__ for each column.
+ * @param config        See @ref XGBoosterPredictFromDMatrix for more info.
+ *   Additional fields for inplace prediction are:
+ *     - "missing": float
+ * @param m             An optional (NULL if not available) proxy DMatrix instance
+ *                      storing meta info.
+ *
+ * @param out_shape     See @ref XGBoosterPredictFromDMatrix for more info.
+ * @param out_dim       See @ref XGBoosterPredictFromDMatrix for more info.
+ * @param out_result    See @ref XGBoosterPredictFromDMatrix for more info.
+ *
+ * @return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterPredictFromColumnar(BoosterHandle handle, char const *array_interface,
+                                         char const *c_json_config, DMatrixHandle m,
+                                         bst_ulong const **out_shape, bst_ulong *out_dim,
+                                         const float **out_result);
+
 /**
 * \brief Inplace prediction from CPU CSR matrix.
 *
--- a/include/xgboost/data.h
+++ b/include/xgboost/data.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2015-2023 by XGBoost Contributors
+ * Copyright 2015-2024, XGBoost Contributors
 * \file data.h
 * \brief The input data structure of xgboost.
 * \author Tianqi Chen
@@ -158,15 +158,15 @@ class MetaInfo {
  void SetFeatureInfo(const char *key, const char **info, const bst_ulong size);
  void GetFeatureInfo(const char *field, std::vector<std::string>* out_str_vecs) const;

-  /*
-   * \brief Extend with other MetaInfo.
+  /**
+   * @brief Extend with other MetaInfo.
   *
-   * \param that The other MetaInfo object.
+   * @param that The other MetaInfo object.
   *
-   * \param accumulate_rows Whether rows need to be accumulated in this function.  If
+   * @param accumulate_rows Whether rows need to be accumulated in this function.  If
   *                        client code knows number of rows in advance, set this
   *                        parameter to false.
-   * \param check_column Whether the extend method should check the consistency of
+   * @param check_column Whether the extend method should check the consistency of
   *                     columns.
   */
  void Extend(MetaInfo const& that, bool accumulate_rows, bool check_column);
@@ -203,6 +203,10 @@ class MetaInfo {
   * learning where labels are only available on worker 0.
   */
  bool ShouldHaveLabels() const;
+  /**
+   * @brief Flag for whether the DMatrix has categorical features.
+   */
+  bool HasCategorical() const { return has_categorical_; }

 private:
  void SetInfoFromHost(Context const& ctx, StringView key, Json arr);
@@ -210,6 +214,7 @@ class MetaInfo {

  /*! \brief argsort of labels */
  mutable std::vector<size_t> label_order_cache_;
+  bool has_categorical_{false};
 };

 /*! \brief Element from a sparse vector */
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@@ -683,7 +683,7 @@ using MatrixView = TensorView<T, 2>;
 *
 * `stream` is optionally included when data is on CUDA device.
 */
-template <typename T, int32_t D>
+template <typename T, std::int32_t D>
 Json ArrayInterface(TensorView<T const, D> const &t) {
  Json array_interface{Object{}};
  array_interface["data"] = std::vector<Json>(2);
@@ -691,7 +691,7 @@ Json ArrayInterface(TensorView<T const, D> const &t) {
  array_interface["data"][1] = Boolean{true};
  if (t.Device().IsCUDA()) {
    // Change this once we have different CUDA stream.
-    array_interface["stream"] = Null{};
+    array_interface["stream"] = Integer{2};
  }
  std::vector<Json> shape(t.Shape().size());
  std::vector<Json> stride(t.Stride().size());
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@@ -129,6 +129,12 @@ class ObjFunction : public Configurable {
   * \param name Name of the objective.
   */
  static ObjFunction* Create(const std::string& name, Context const* ctx);
+
+  /*!
+   * \brief Return sycl specific implementation name if possible.
+   * \param name Name of the objective.
+   */
+  static std::string GetSyclImplementationName(const std::string& name);
 };

 /*!
--- a/include/xgboost/parameter.h
+++ b/include/xgboost/parameter.h
@@ -53,7 +53,7 @@ namespace parameter {  \
 template <>  \
 class FieldEntry<EnumClass> : public FieldEntry<int> {  \
 public:  \
-  FieldEntry<EnumClass>() {  \
+  FieldEntry() {  \
    static_assert(  \
      std::is_same<int, typename std::underlying_type<EnumClass>::type>::value,  \
      "enum class must be backed by int");  \