merge latest, Jan 12 2024

This commit is contained in:
Hui Liu
2024-01-12 09:57:11 -08:00
251 changed files with 9023 additions and 5012 deletions

View File

@@ -159,6 +159,16 @@ XGB_DLL int XGDMatrixCreateFromURI(char const *config, DMatrixHandle *out);
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indices,
const float *data, size_t nindptr, size_t nelem,
size_t num_col, DMatrixHandle *out);
/**
* @brief Create a DMatrix from columnar data. (table)
*
* @param data See @ref XGBoosterPredictFromColumnar for details.
* @param config See @ref XGDMatrixCreateFromDense for details.
* @param out The created dmatrix.
*
* @return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromColumnar(char const *data, char const *config, DMatrixHandle *out);
/**
* @example c-api-demo.c
@@ -514,6 +524,16 @@ XGB_DLL int
XGProxyDMatrixSetDataCudaArrayInterface(DMatrixHandle handle,
const char *c_interface_str);
/**
* @brief Set columnar (table) data on a DMatrix proxy.
*
* @param handle A DMatrix proxy created by @ref XGProxyDMatrixCreate
* @param c_interface_str See @ref XGBoosterPredictFromColumnar for details.
*
* @return 0 when success, -1 when failure happens
*/
XGB_DLL int XGProxyDMatrixSetDataColumnar(DMatrixHandle handle, char const *c_interface_str);
/*!
* \brief Set data on a DMatrix proxy.
*
@@ -1113,6 +1133,31 @@ XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, char const *values,
* @example inference.c
*/
/**
* @brief Inplace prediction from CPU columnar data. (Table)
*
* @note If the booster is configured to run on a CUDA device, XGBoost falls back to run
* prediction with DMatrix with a performance warning.
*
* @param handle Booster handle.
* @param values An JSON array of __array_interface__ for each column.
* @param config See @ref XGBoosterPredictFromDMatrix for more info.
* Additional fields for inplace prediction are:
* - "missing": float
* @param m An optional (NULL if not available) proxy DMatrix instance
* storing meta info.
*
* @param out_shape See @ref XGBoosterPredictFromDMatrix for more info.
* @param out_dim See @ref XGBoosterPredictFromDMatrix for more info.
* @param out_result See @ref XGBoosterPredictFromDMatrix for more info.
*
* @return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterPredictFromColumnar(BoosterHandle handle, char const *array_interface,
char const *c_json_config, DMatrixHandle m,
bst_ulong const **out_shape, bst_ulong *out_dim,
const float **out_result);
/**
* \brief Inplace prediction from CPU CSR matrix.
*

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2015-2023 by XGBoost Contributors
* Copyright 2015-2024, XGBoost Contributors
* \file data.h
* \brief The input data structure of xgboost.
* \author Tianqi Chen
@@ -158,15 +158,15 @@ class MetaInfo {
void SetFeatureInfo(const char *key, const char **info, const bst_ulong size);
void GetFeatureInfo(const char *field, std::vector<std::string>* out_str_vecs) const;
/*
* \brief Extend with other MetaInfo.
/**
* @brief Extend with other MetaInfo.
*
* \param that The other MetaInfo object.
* @param that The other MetaInfo object.
*
* \param accumulate_rows Whether rows need to be accumulated in this function. If
* @param accumulate_rows Whether rows need to be accumulated in this function. If
* client code knows number of rows in advance, set this
* parameter to false.
* \param check_column Whether the extend method should check the consistency of
* @param check_column Whether the extend method should check the consistency of
* columns.
*/
void Extend(MetaInfo const& that, bool accumulate_rows, bool check_column);
@@ -203,6 +203,10 @@ class MetaInfo {
* learning where labels are only available on worker 0.
*/
bool ShouldHaveLabels() const;
/**
* @brief Flag for whether the DMatrix has categorical features.
*/
bool HasCategorical() const { return has_categorical_; }
private:
void SetInfoFromHost(Context const& ctx, StringView key, Json arr);
@@ -210,6 +214,7 @@ class MetaInfo {
/*! \brief argsort of labels */
mutable std::vector<size_t> label_order_cache_;
bool has_categorical_{false};
};
/*! \brief Element from a sparse vector */

View File

@@ -683,7 +683,7 @@ using MatrixView = TensorView<T, 2>;
*
* `stream` is optionally included when data is on CUDA device.
*/
template <typename T, int32_t D>
template <typename T, std::int32_t D>
Json ArrayInterface(TensorView<T const, D> const &t) {
Json array_interface{Object{}};
array_interface["data"] = std::vector<Json>(2);
@@ -691,7 +691,7 @@ Json ArrayInterface(TensorView<T const, D> const &t) {
array_interface["data"][1] = Boolean{true};
if (t.Device().IsCUDA()) {
// Change this once we have different CUDA stream.
array_interface["stream"] = Null{};
array_interface["stream"] = Integer{2};
}
std::vector<Json> shape(t.Shape().size());
std::vector<Json> stride(t.Stride().size());

View File

@@ -129,6 +129,12 @@ class ObjFunction : public Configurable {
* \param name Name of the objective.
*/
static ObjFunction* Create(const std::string& name, Context const* ctx);
/*!
* \brief Return sycl specific implementation name if possible.
* \param name Name of the objective.
*/
static std::string GetSyclImplementationName(const std::string& name);
};
/*!

View File

@@ -53,7 +53,7 @@ namespace parameter { \
template <> \
class FieldEntry<EnumClass> : public FieldEntry<int> { \
public: \
FieldEntry<EnumClass>() { \
FieldEntry() { \
static_assert( \
std::is_same<int, typename std::underlying_type<EnumClass>::type>::value, \
"enum class must be backed by int"); \