Cleanup set info. (#10139)

- Use the array interface internally.
- Deprecate `XGDMatrixSetDenseInfo`.
- Deprecate `XGDMatrixSetUIntInfo`.
- Move the handling of `DataType` into the deprecated C function.

---------

Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Jiaming Yuan 2024-03-26 23:26:24 +08:00 committed by GitHub
parent 6a7c6a8ae6
commit 230010d9a0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
37 changed files with 246 additions and 268 deletions

View File

@ -110,7 +110,7 @@ jobs:
name: Test R package on Debian name: Test R package on Debian
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
image: rhub/debian-gcc-devel image: rhub/debian-gcc-release
steps: steps:
- name: Install system dependencies - name: Install system dependencies
@ -130,12 +130,12 @@ jobs:
- name: Install dependencies - name: Install dependencies
shell: bash -l {0} shell: bash -l {0}
run: | run: |
/tmp/R-devel/bin/Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')" Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')"
- name: Test R - name: Test R
shell: bash -l {0} shell: bash -l {0}
run: | run: |
python3 tests/ci_build/test_r_package.py --r=/tmp/R-devel/bin/R --build-tool=autotools --task=check python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --build-tool=autotools --task=check
- uses: dorny/paths-filter@v2 - uses: dorny/paths-filter@v2
id: changes id: changes
@ -147,4 +147,4 @@ jobs:
- name: Run document check - name: Run document check
if: steps.changes.outputs.r_package == 'true' if: steps.changes.outputs.r_package == 'true'
run: | run: |
python3 tests/ci_build/test_r_package.py --r=/tmp/R-devel/bin/R --task=doc python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --task=doc

View File

@ -1,5 +1,5 @@
/** /**
* Copyright 2015~2023 by XGBoost Contributors * Copyright 2015-2024, XGBoost Contributors
* \file c_api.h * \file c_api.h
* \author Tianqi Chen * \author Tianqi Chen
* \brief C API of XGBoost, used for interfacing to other languages. * \brief C API of XGBoost, used for interfacing to other languages.
@ -639,21 +639,14 @@ XGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle,
* \param len length of array * \param len length of array
* \return 0 when success, -1 when failure happens * \return 0 when success, -1 when failure happens
*/ */
XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const float *array,
const char *field,
const float *array,
bst_ulong len); bst_ulong len);
/*! /**
* \brief set uint32 vector to a content in info * @deprecated since 2.1.0
* \param handle a instance of data matrix *
* \param field field name * Use @ref XGDMatrixSetInfoFromInterface instead.
* \param array pointer to unsigned int vector
* \param len length of array
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const unsigned *array,
const char *field,
const unsigned *array,
bst_ulong len); bst_ulong len);
/*! /*!
@ -725,42 +718,13 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,
bst_ulong *size, bst_ulong *size,
const char ***out_features); const char ***out_features);
/*! /**
* \brief Set meta info from dense matrix. Valid field names are: * @deprecated since 2.1.0
* *
* - label * Use @ref XGDMatrixSetInfoFromInterface instead.
* - weight
* - base_margin
* - group
* - label_lower_bound
* - label_upper_bound
* - feature_weights
*
* \param handle An instance of data matrix
* \param field Field name
* \param data Pointer to consecutive memory storing data.
* \param size Size of the data, this is relative to size of type. (Meaning NOT number
* of bytes.)
* \param type Indicator of data type. This is defined in xgboost::DataType enum class.
* - float = 1
* - double = 2
* - uint32_t = 3
* - uint64_t = 4
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void const *data,
void const *data, bst_ulong size, int type); bst_ulong size, int type);
/*!
* \brief (deprecated) Use XGDMatrixSetUIntInfo instead. Set group of the training matrix
* \param handle a instance of data matrix
* \param group pointer to group size
* \param len length of array
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle,
const unsigned *group,
bst_ulong len);
/*! /*!
* \brief get float info vector from matrix. * \brief get float info vector from matrix.

View File

@ -19,7 +19,6 @@
#include <algorithm> #include <algorithm>
#include <limits> #include <limits>
#include <memory> #include <memory>
#include <numeric>
#include <string> #include <string>
#include <utility> #include <utility>
#include <vector> #include <vector>
@ -137,14 +136,6 @@ class MetaInfo {
* \param fo The output stream. * \param fo The output stream.
*/ */
void SaveBinary(dmlc::Stream* fo) const; void SaveBinary(dmlc::Stream* fo) const;
/*!
* \brief Set information in the meta info.
* \param key The key of the information.
* \param dptr The data pointer of the source array.
* \param dtype The type of the source data.
* \param num Number of elements in the source array.
*/
void SetInfo(Context const& ctx, const char* key, const void* dptr, DataType dtype, size_t num);
/*! /*!
* \brief Set information in the meta info with array interface. * \brief Set information in the meta info with array interface.
* \param key The key of the information. * \param key The key of the information.
@ -517,10 +508,6 @@ class DMatrix {
DMatrix() = default; DMatrix() = default;
/*! \brief meta information of the dataset */ /*! \brief meta information of the dataset */
virtual MetaInfo& Info() = 0; virtual MetaInfo& Info() = 0;
virtual void SetInfo(const char* key, const void* dptr, DataType dtype, size_t num) {
auto const& ctx = *this->Ctx();
this->Info().SetInfo(ctx, key, dptr, dtype, num);
}
virtual void SetInfo(const char* key, std::string const& interface_str) { virtual void SetInfo(const char* key, std::string const& interface_str) {
auto const& ctx = *this->Ctx(); auto const& ctx = *this->Ctx();
this->Info().SetInfo(ctx, key, StringView{interface_str}); this->Info().SetInfo(ctx, key, StringView{interface_str});

View File

@ -190,13 +190,14 @@ constexpr auto ArrToTuple(T (&arr)[N]) {
// uint division optimization inspired by the CIndexer in cupy. Division operation is // uint division optimization inspired by the CIndexer in cupy. Division operation is
// slow on both CPU and GPU, especially 64 bit integer. So here we first try to avoid 64 // slow on both CPU and GPU, especially 64 bit integer. So here we first try to avoid 64
// bit when the index is smaller, then try to avoid division when it's exp of 2. // bit when the index is smaller, then try to avoid division when it's exp of 2.
template <typename I, int32_t D> template <typename I, std::int32_t D>
LINALG_HD auto UnravelImpl(I idx, common::Span<size_t const, D> shape) { LINALG_HD auto UnravelImpl(I idx, common::Span<size_t const, D> shape) {
size_t index[D]{0}; std::size_t index[D]{0};
static_assert(std::is_signed<decltype(D)>::value, static_assert(std::is_signed<decltype(D)>::value,
"Don't change the type without changing the for loop."); "Don't change the type without changing the for loop.");
auto const sptr = shape.data();
for (int32_t dim = D; --dim > 0;) { for (int32_t dim = D; --dim > 0;) {
auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(shape[dim]); auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(sptr[dim]);
if (s & (s - 1)) { if (s & (s - 1)) {
auto t = idx / s; auto t = idx / s;
index[dim] = idx - t * s; index[dim] = idx - t * s;
@ -745,6 +746,14 @@ auto ArrayInterfaceStr(TensorView<T, D> const &t) {
return str; return str;
} }
template <typename T>
auto Make1dInterface(T const *vec, std::size_t len) {
Context ctx;
auto t = linalg::MakeTensorView(&ctx, common::Span{vec, len}, len);
auto str = linalg::ArrayInterfaceStr(t);
return str;
}
/** /**
* \brief A tensor storage. To use it for other functionality like slicing one needs to * \brief A tensor storage. To use it for other functionality like slicing one needs to
* obtain a view first. This way we can use it on both host and device. * obtain a view first. This way we can use it on both host and device.

View File

@ -30,9 +30,8 @@
#define XGBOOST_SPAN_H_ #define XGBOOST_SPAN_H_
#include <xgboost/base.h> #include <xgboost/base.h>
#include <xgboost/logging.h>
#include <cinttypes> // size_t #include <cstddef> // size_t
#include <cstdio> #include <cstdio>
#include <iterator> #include <iterator>
#include <limits> // numeric_limits #include <limits> // numeric_limits
@ -73,8 +72,7 @@
#endif // defined(_MSC_VER) && _MSC_VER < 1910 #endif // defined(_MSC_VER) && _MSC_VER < 1910
namespace xgboost { namespace xgboost::common {
namespace common {
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__)
// Usual logging facility is not available inside device code. // Usual logging facility is not available inside device code.
@ -707,8 +705,8 @@ class IterSpan {
return it_ + size(); return it_ + size();
} }
}; };
} // namespace common } // namespace xgboost::common
} // namespace xgboost
#if defined(_MSC_VER) &&_MSC_VER < 1910 #if defined(_MSC_VER) &&_MSC_VER < 1910
#undef constexpr #undef constexpr

View File

@ -408,7 +408,8 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetFloatI
jfloat* array = jenv->GetFloatArrayElements(jarray, NULL); jfloat* array = jenv->GetFloatArrayElements(jarray, NULL);
bst_ulong len = (bst_ulong)jenv->GetArrayLength(jarray); bst_ulong len = (bst_ulong)jenv->GetArrayLength(jarray);
int ret = XGDMatrixSetFloatInfo(handle, field, (float const *)array, len); auto str = xgboost::linalg::Make1dInterface(array, len);
int ret = XGDMatrixSetInfoFromInterface(handle, field, str.c_str());
JVM_CHECK_CALL(ret); JVM_CHECK_CALL(ret);
//release //release
if (field) jenv->ReleaseStringUTFChars(jfield, field); if (field) jenv->ReleaseStringUTFChars(jfield, field);
@ -427,7 +428,8 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetUIntIn
const char* field = jenv->GetStringUTFChars(jfield, 0); const char* field = jenv->GetStringUTFChars(jfield, 0);
jint* array = jenv->GetIntArrayElements(jarray, NULL); jint* array = jenv->GetIntArrayElements(jarray, NULL);
bst_ulong len = (bst_ulong)jenv->GetArrayLength(jarray); bst_ulong len = (bst_ulong)jenv->GetArrayLength(jarray);
int ret = XGDMatrixSetUIntInfo(handle, (char const *)field, (unsigned int const *)array, len); auto str = xgboost::linalg::Make1dInterface(array, len);
int ret = XGDMatrixSetInfoFromInterface(handle, field, str.c_str());
JVM_CHECK_CALL(ret); JVM_CHECK_CALL(ret);
//release //release
if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field); if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field);
@ -730,8 +732,8 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterPredictFr
if (jmargin) { if (jmargin) {
margin = jenv->GetFloatArrayElements(jmargin, nullptr); margin = jenv->GetFloatArrayElements(jmargin, nullptr);
JVM_CHECK_CALL(XGProxyDMatrixCreate(&proxy)); JVM_CHECK_CALL(XGProxyDMatrixCreate(&proxy));
JVM_CHECK_CALL( auto str = xgboost::linalg::Make1dInterface(margin, jenv->GetArrayLength(jmargin));
XGDMatrixSetFloatInfo(proxy, "base_margin", margin, jenv->GetArrayLength(jmargin))); JVM_CHECK_CALL(XGDMatrixSetInfoFromInterface(proxy, "base_margin", str.c_str()));
} }
bst_ulong const *out_shape; bst_ulong const *out_shape;

View File

@ -1,5 +1,5 @@
/** /**
* Copyright 2014-2024 by XGBoost Contributors * Copyright 2014-2024, XGBoost Contributors
*/ */
#include "xgboost/c_api.h" #include "xgboost/c_api.h"
@ -614,8 +614,8 @@ XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const
API_BEGIN(); API_BEGIN();
CHECK_HANDLE(); CHECK_HANDLE();
xgboost_CHECK_C_ARG_PTR(field); xgboost_CHECK_C_ARG_PTR(field);
auto const& p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle); auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
p_fmat->SetInfo(field, info, xgboost::DataType::kFloat32, len); p_fmat->SetInfo(field, linalg::Make1dInterface(info, len));
API_END(); API_END();
} }
@ -634,8 +634,9 @@ XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const
API_BEGIN(); API_BEGIN();
CHECK_HANDLE(); CHECK_HANDLE();
xgboost_CHECK_C_ARG_PTR(field); xgboost_CHECK_C_ARG_PTR(field);
LOG(WARNING) << error::DeprecatedFunc(__func__, "2.1.0", "XGDMatrixSetInfoFromInterface");
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle); auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
p_fmat->SetInfo(field, info, xgboost::DataType::kUInt32, len); p_fmat->SetInfo(field, linalg::Make1dInterface(info, len));
API_END(); API_END();
} }
@ -679,19 +680,52 @@ XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void
xgboost::bst_ulong size, int type) { xgboost::bst_ulong size, int type) {
API_BEGIN(); API_BEGIN();
CHECK_HANDLE(); CHECK_HANDLE();
LOG(WARNING) << error::DeprecatedFunc(__func__, "2.1.0", "XGDMatrixSetInfoFromInterface");
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle); auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
CHECK(type >= 1 && type <= 4); CHECK(type >= 1 && type <= 4);
xgboost_CHECK_C_ARG_PTR(field); xgboost_CHECK_C_ARG_PTR(field);
p_fmat->SetInfo(field, data, static_cast<DataType>(type), size);
API_END();
}
XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle, const unsigned *group, xgboost::bst_ulong len) { Context ctx;
API_BEGIN(); auto dtype = static_cast<DataType>(type);
CHECK_HANDLE(); std::string str;
LOG(WARNING) << "XGDMatrixSetGroup is deprecated, use `XGDMatrixSetUIntInfo` instead."; auto proc = [&](auto cast_d_ptr) {
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle); using T = std::remove_pointer_t<decltype(cast_d_ptr)>;
p_fmat->SetInfo("group", group, xgboost::DataType::kUInt32, len); auto t = linalg::TensorView<T, 1>(
common::Span<T>{cast_d_ptr, static_cast<typename common::Span<T>::index_type>(size)},
{size}, DeviceOrd::CPU());
CHECK(t.CContiguous());
Json interface{linalg::ArrayInterface(t)};
CHECK(ArrayInterface<1>{interface}.is_contiguous);
str = Json::Dump(interface);
return str;
};
// Legacy code using XGBoost dtype, which is a small subset of array interface types.
switch (dtype) {
case xgboost::DataType::kFloat32: {
auto cast_ptr = reinterpret_cast<const float *>(data);
p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr));
break;
}
case xgboost::DataType::kDouble: {
auto cast_ptr = reinterpret_cast<const double *>(data);
p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr));
break;
}
case xgboost::DataType::kUInt32: {
auto cast_ptr = reinterpret_cast<const uint32_t *>(data);
p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr));
break;
}
case xgboost::DataType::kUInt64: {
auto cast_ptr = reinterpret_cast<const uint64_t *>(data);
p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr));
break;
}
default:
LOG(FATAL) << "Unknown data type" << static_cast<uint8_t>(dtype);
}
API_END(); API_END();
} }
@ -987,7 +1021,7 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, bs
bst_float *hess, xgboost::bst_ulong len) { bst_float *hess, xgboost::bst_ulong len) {
API_BEGIN(); API_BEGIN();
CHECK_HANDLE(); CHECK_HANDLE();
error::DeprecatedFunc(__func__, "2.1.0", "XGBoosterTrainOneIter"); LOG(WARNING) << error::DeprecatedFunc(__func__, "2.1.0", "XGBoosterTrainOneIter");
auto *learner = static_cast<Learner *>(handle); auto *learner = static_cast<Learner *>(handle);
auto ctx = learner->Ctx()->MakeCPU(); auto ctx = learner->Ctx()->MakeCPU();

View File

@ -1,17 +1,18 @@
/** /**
* Copyright 2021-2023, XGBoost Contributors * Copyright 2021-2024, XGBoost Contributors
*/ */
#ifndef XGBOOST_C_API_C_API_UTILS_H_ #ifndef XGBOOST_C_API_C_API_UTILS_H_
#define XGBOOST_C_API_C_API_UTILS_H_ #define XGBOOST_C_API_C_API_UTILS_H_
#include <algorithm> #include <algorithm> // for min
#include <cstddef> #include <cstddef> // for size_t
#include <functional> #include <functional> // for multiplies
#include <memory> // for shared_ptr #include <memory> // for shared_ptr
#include <string> // for string #include <numeric> // for accumulate
#include <tuple> // for make_tuple #include <string> // for string
#include <utility> // for move #include <tuple> // for make_tuple
#include <vector> #include <utility> // for move
#include <vector> // for vector
#include "../common/json_utils.h" // for TypeCheck #include "../common/json_utils.h" // for TypeCheck
#include "xgboost/c_api.h" #include "xgboost/c_api.h"

View File

@ -2,6 +2,8 @@
* Copyright 2023 XGBoost contributors * Copyright 2023 XGBoost contributors
*/ */
#if defined(XGBOOST_USE_NCCL) #if defined(XGBOOST_USE_NCCL)
#include <numeric> // for accumulate
#include "comm.cuh" #include "comm.cuh"
#include "nccl_device_communicator.cuh" #include "nccl_device_communicator.cuh"

View File

@ -11,7 +11,7 @@
#include "xgboost/logging.h" #include "xgboost/logging.h"
namespace xgboost::error { namespace xgboost::error {
std::string DeprecatedFunc(StringView old, StringView since, StringView replacement) { [[nodiscard]] std::string DeprecatedFunc(StringView old, StringView since, StringView replacement) {
std::stringstream ss; std::stringstream ss;
ss << "`" << old << "` is deprecated since" << since << ", use `" << replacement << "` instead."; ss << "`" << old << "` is deprecated since" << since << ", use `" << replacement << "` instead.";
return ss.str(); return ss.str();

View File

@ -89,7 +89,7 @@ void WarnDeprecatedGPUId();
void WarnEmptyDataset(); void WarnEmptyDataset();
std::string DeprecatedFunc(StringView old, StringView since, StringView replacement); [[nodiscard]] std::string DeprecatedFunc(StringView old, StringView since, StringView replacement);
constexpr StringView InvalidCUDAOrdinal() { constexpr StringView InvalidCUDAOrdinal() {
return "Invalid device. `device` is required to be CUDA and there must be at least one GPU " return "Invalid device. `device` is required to be CUDA and there must be at least one GPU "

View File

@ -6,7 +6,6 @@
#include <algorithm> #include <algorithm>
#include <cstdint> #include <cstdint>
#include <mutex>
#include "xgboost/data.h" #include "xgboost/data.h"
#include "xgboost/host_device_vector.h" #include "xgboost/host_device_vector.h"

View File

@ -4,6 +4,7 @@
#include "quantile.h" #include "quantile.h"
#include <limits> #include <limits>
#include <numeric> // for partial_sum
#include <utility> #include <utility>
#include "../collective/aggregator.h" #include "../collective/aggregator.h"

View File

@ -1,5 +1,5 @@
/** /**
* Copyright 2020-2023 by XGBoost Contributors * Copyright 2020-2024, XGBoost Contributors
*/ */
#include <thrust/binary_search.h> #include <thrust/binary_search.h>
#include <thrust/execution_policy.h> #include <thrust/execution_policy.h>
@ -8,8 +8,8 @@
#include <thrust/transform_scan.h> #include <thrust/transform_scan.h>
#include <thrust/unique.h> #include <thrust/unique.h>
#include <limits> // std::numeric_limits #include <limits> // std::numeric_limits
#include <memory> #include <numeric> // for partial_sum
#include <utility> #include <utility>
#include "../collective/communicator-inl.cuh" #include "../collective/communicator-inl.cuh"

View File

@ -1,8 +1,9 @@
/**
* Copyright 2020-2024, XGBoost Contributors
*/
#ifndef XGBOOST_COMMON_QUANTILE_CUH_ #ifndef XGBOOST_COMMON_QUANTILE_CUH_
#define XGBOOST_COMMON_QUANTILE_CUH_ #define XGBOOST_COMMON_QUANTILE_CUH_
#include <memory>
#include "xgboost/span.h" #include "xgboost/span.h"
#include "xgboost/data.h" #include "xgboost/data.h"
#include "device_helpers.cuh" #include "device_helpers.cuh"

View File

@ -11,7 +11,6 @@
#include <cmath> // for abs #include <cmath> // for abs
#include <cstdint> // for uint64_t, int32_t, uint8_t, uint32_t #include <cstdint> // for uint64_t, int32_t, uint8_t, uint32_t
#include <cstring> // for size_t, strcmp, memcpy #include <cstring> // for size_t, strcmp, memcpy
#include <exception> // for exception
#include <iostream> // for operator<<, basic_ostream, basic_ostream::op... #include <iostream> // for operator<<, basic_ostream, basic_ostream::op...
#include <map> // for map, operator!= #include <map> // for map, operator!=
#include <numeric> // for accumulate, partial_sum #include <numeric> // for accumulate, partial_sum
@ -22,7 +21,6 @@
#include "../collective/communicator.h" // for Operation #include "../collective/communicator.h" // for Operation
#include "../common/algorithm.h" // for StableSort #include "../common/algorithm.h" // for StableSort
#include "../common/api_entry.h" // for XGBAPIThreadLocalEntry #include "../common/api_entry.h" // for XGBAPIThreadLocalEntry
#include "../common/common.h" // for Split
#include "../common/error_msg.h" // for GroupSize, GroupWeight, InfInData #include "../common/error_msg.h" // for GroupSize, GroupWeight, InfInData
#include "../common/group_data.h" // for ParallelGroupBuilder #include "../common/group_data.h" // for ParallelGroupBuilder
#include "../common/io.h" // for PeekableInStream #include "../common/io.h" // for PeekableInStream
@ -473,11 +471,11 @@ void MetaInfo::SetInfo(Context const& ctx, StringView key, StringView interface_
<< ", must have at least 1 column even if it's empty."; << ", must have at least 1 column even if it's empty.";
auto const& first = get<Object const>(array.front()); auto const& first = get<Object const>(array.front());
auto ptr = ArrayInterfaceHandler::GetPtrFromArrayData<void*>(first); auto ptr = ArrayInterfaceHandler::GetPtrFromArrayData<void*>(first);
is_cuda = ArrayInterfaceHandler::IsCudaPtr(ptr); is_cuda = first.find("stream") != first.cend() || ArrayInterfaceHandler::IsCudaPtr(ptr);
} else { } else {
auto const& first = get<Object const>(j_interface); auto const& first = get<Object const>(j_interface);
auto ptr = ArrayInterfaceHandler::GetPtrFromArrayData<void*>(first); auto ptr = ArrayInterfaceHandler::GetPtrFromArrayData<void*>(first);
is_cuda = ArrayInterfaceHandler::IsCudaPtr(ptr); is_cuda = first.find("stream") != first.cend() || ArrayInterfaceHandler::IsCudaPtr(ptr);
} }
if (is_cuda) { if (is_cuda) {
@ -567,46 +565,6 @@ void MetaInfo::SetInfoFromHost(Context const& ctx, StringView key, Json arr) {
} }
} }
void MetaInfo::SetInfo(Context const& ctx, const char* key, const void* dptr, DataType dtype,
size_t num) {
CHECK(key);
auto proc = [&](auto cast_d_ptr) {
using T = std::remove_pointer_t<decltype(cast_d_ptr)>;
auto t = linalg::TensorView<T, 1>(common::Span<T>{cast_d_ptr, num}, {num}, DeviceOrd::CPU());
CHECK(t.CContiguous());
Json interface {
linalg::ArrayInterface(t)
};
assert(ArrayInterface<1>{interface}.is_contiguous);
return interface;
};
// Legacy code using XGBoost dtype, which is a small subset of array interface types.
switch (dtype) {
case xgboost::DataType::kFloat32: {
auto cast_ptr = reinterpret_cast<const float*>(dptr);
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
break;
}
case xgboost::DataType::kDouble: {
auto cast_ptr = reinterpret_cast<const double*>(dptr);
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
break;
}
case xgboost::DataType::kUInt32: {
auto cast_ptr = reinterpret_cast<const uint32_t*>(dptr);
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
break;
}
case xgboost::DataType::kUInt64: {
auto cast_ptr = reinterpret_cast<const uint64_t*>(dptr);
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
break;
}
default:
LOG(FATAL) << "Unknown data type" << static_cast<uint8_t>(dtype);
}
}
void MetaInfo::GetInfo(char const* key, bst_ulong* out_len, DataType dtype, void MetaInfo::GetInfo(char const* key, bst_ulong* out_len, DataType dtype,
const void** out_dptr) const { const void** out_dptr) const {
if (dtype == DataType::kFloat32) { if (dtype == DataType::kFloat32) {

View File

@ -1,5 +1,5 @@
/** /**
* Copyright 2021-2023, XGBoost contributors * Copyright 2021-2024, XGBoost contributors
*/ */
#include "file_iterator.h" #include "file_iterator.h"
@ -10,7 +10,10 @@
#include <ostream> // for operator<<, basic_ostream, istringstream #include <ostream> // for operator<<, basic_ostream, istringstream
#include <vector> // for vector #include <vector> // for vector
#include "../common/common.h" // for Split #include "../common/common.h" // for Split
#include "xgboost/linalg.h" // for ArrayInterfaceStr, MakeVec
#include "xgboost/linalg.h"
#include "xgboost/logging.h" // for CHECK
#include "xgboost/string_view.h" // for operator<<, StringView #include "xgboost/string_view.h" // for operator<<, StringView
namespace xgboost::data { namespace xgboost::data {
@ -28,10 +31,10 @@ std::string ValidateFileFormat(std::string const& uri) {
for (size_t i = 0; i < arg_list.size(); ++i) { for (size_t i = 0; i < arg_list.size(); ++i) {
std::istringstream is(arg_list[i]); std::istringstream is(arg_list[i]);
std::pair<std::string, std::string> kv; std::pair<std::string, std::string> kv;
CHECK(std::getline(is, kv.first, '=')) << "Invalid uri argument format" CHECK(std::getline(is, kv.first, '='))
<< " for key in arg " << i + 1; << "Invalid uri argument format" << " for key in arg " << i + 1;
CHECK(std::getline(is, kv.second)) << "Invalid uri argument format" CHECK(std::getline(is, kv.second))
<< " for value in arg " << i + 1; << "Invalid uri argument format" << " for value in arg " << i + 1;
args.insert(kv); args.insert(kv);
} }
if (args.find("format") == args.cend()) { if (args.find("format") == args.cend()) {
@ -48,4 +51,41 @@ std::string ValidateFileFormat(std::string const& uri) {
return name_args[0] + "?" + name_args[1] + '#' + name_args_cache[1]; return name_args[0] + "?" + name_args[1] + '#' + name_args_cache[1];
} }
} }
int FileIterator::Next() {
CHECK(parser_);
if (parser_->Next()) {
row_block_ = parser_->Value();
indptr_ = linalg::Make1dInterface(row_block_.offset, row_block_.size + 1);
values_ = linalg::Make1dInterface(row_block_.value, row_block_.offset[row_block_.size]);
indices_ = linalg::Make1dInterface(row_block_.index, row_block_.offset[row_block_.size]);
size_t n_columns =
*std::max_element(row_block_.index, row_block_.index + row_block_.offset[row_block_.size]);
// dmlc parser converts 1-based indexing back to 0-based indexing so we can ignore
// this condition and just add 1 to n_columns
n_columns += 1;
XGProxyDMatrixSetDataCSR(proxy_, indptr_.c_str(), indices_.c_str(), values_.c_str(), n_columns);
if (row_block_.label) {
auto str = linalg::Make1dInterface(row_block_.label, row_block_.size);
XGDMatrixSetInfoFromInterface(proxy_, "label", str.c_str());
}
if (row_block_.qid) {
auto str = linalg::Make1dInterface(row_block_.qid, row_block_.size);
XGDMatrixSetInfoFromInterface(proxy_, "qid", str.c_str());
}
if (row_block_.weight) {
auto str = linalg::Make1dInterface(row_block_.weight, row_block_.size);
XGDMatrixSetInfoFromInterface(proxy_, "weight", str.c_str());
}
// Continue iteration
return true;
} else {
// Stop iteration
return false;
}
}
} // namespace xgboost::data } // namespace xgboost::data

View File

@ -1,20 +1,16 @@
/** /**
* Copyright 2021-2023, XGBoost contributors * Copyright 2021-2024, XGBoost contributors
*/ */
#ifndef XGBOOST_DATA_FILE_ITERATOR_H_ #ifndef XGBOOST_DATA_FILE_ITERATOR_H_
#define XGBOOST_DATA_FILE_ITERATOR_H_ #define XGBOOST_DATA_FILE_ITERATOR_H_
#include <algorithm> // for max_element
#include <cstddef> // for size_t
#include <cstdint> // for uint32_t #include <cstdint> // for uint32_t
#include <memory> // for unique_ptr #include <memory> // for unique_ptr
#include <string> // for string #include <string> // for string
#include <utility> // for move #include <utility> // for move
#include "dmlc/data.h" // for RowBlock, Parser #include "dmlc/data.h" // for RowBlock, Parser
#include "xgboost/c_api.h" // for XGDMatrixSetDenseInfo, XGDMatrixFree, XGProxyDMatrixCreate #include "xgboost/c_api.h" // for XGDMatrixFree, XGProxyDMatrixCreate
#include "xgboost/linalg.h" // for ArrayInterfaceStr, MakeVec
#include "xgboost/logging.h" // for CHECK
namespace xgboost::data { namespace xgboost::data {
[[nodiscard]] std::string ValidateFileFormat(std::string const& uri); [[nodiscard]] std::string ValidateFileFormat(std::string const& uri);
@ -53,41 +49,7 @@ class FileIterator {
XGDMatrixFree(proxy_); XGDMatrixFree(proxy_);
} }
int Next() { int Next();
CHECK(parser_);
if (parser_->Next()) {
row_block_ = parser_->Value();
using linalg::MakeVec;
indptr_ = ArrayInterfaceStr(MakeVec(row_block_.offset, row_block_.size + 1));
values_ = ArrayInterfaceStr(MakeVec(row_block_.value, row_block_.offset[row_block_.size]));
indices_ = ArrayInterfaceStr(MakeVec(row_block_.index, row_block_.offset[row_block_.size]));
size_t n_columns = *std::max_element(row_block_.index,
row_block_.index + row_block_.offset[row_block_.size]);
// dmlc parser converts 1-based indexing back to 0-based indexing so we can ignore
// this condition and just add 1 to n_columns
n_columns += 1;
XGProxyDMatrixSetDataCSR(proxy_, indptr_.c_str(), indices_.c_str(),
values_.c_str(), n_columns);
if (row_block_.label) {
XGDMatrixSetDenseInfo(proxy_, "label", row_block_.label, row_block_.size, 1);
}
if (row_block_.qid) {
XGDMatrixSetDenseInfo(proxy_, "qid", row_block_.qid, row_block_.size, 1);
}
if (row_block_.weight) {
XGDMatrixSetDenseInfo(proxy_, "weight", row_block_.weight, row_block_.size, 1);
}
// Continue iteration
return true;
} else {
// Stop iteration
return false;
}
}
auto Proxy() -> decltype(proxy_) { return proxy_; } auto Proxy() -> decltype(proxy_) { return proxy_; }

View File

@ -1,5 +1,5 @@
/** /**
* Copyright 2014-2023 by Contributors * Copyright 2014-2024, XGBoost Contributors
* \file gbtree.cc * \file gbtree.cc
* \brief gradient boosted tree implementation. * \brief gradient boosted tree implementation.
* \author Tianqi Chen * \author Tianqi Chen
@ -11,14 +11,12 @@
#include <algorithm> #include <algorithm>
#include <cstdint> // std::int32_t #include <cstdint> // std::int32_t
#include <map>
#include <memory> #include <memory>
#include <numeric> // for iota
#include <string> #include <string>
#include <unordered_map>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "../common/common.h"
#include "../common/timer.h" #include "../common/timer.h"
#include "../tree/param.h" // TrainParam #include "../tree/param.h" // TrainParam
#include "gbtree_model.h" #include "gbtree_model.h"

View File

@ -10,15 +10,15 @@
#include <array> #include <array>
#include <cmath> #include <cmath>
#include <numeric> // for accumulate
#include "../collective/communicator-inl.h" #include "../common/common.h" // for AssertGPUSupport
#include "../common/common.h" // MetricNoCache
#include "../common/math.h" #include "../common/math.h"
#include "../common/optional_weight.h" // OptionalWeights #include "../common/optional_weight.h" // OptionalWeights
#include "../common/pseudo_huber.h" #include "../common/pseudo_huber.h"
#include "../common/quantile_loss_utils.h" // QuantileLossParam #include "../common/quantile_loss_utils.h" // QuantileLossParam
#include "../common/threading_utils.h" #include "../common/threading_utils.h"
#include "metric_common.h" #include "metric_common.h" // MetricNoCache
#include "xgboost/collective/result.h" // for SafeColl #include "xgboost/collective/result.h" // for SafeColl
#include "xgboost/metric.h" #include "xgboost/metric.h"

View File

@ -9,8 +9,6 @@
#include <string> #include <string>
#include "../collective/aggregator.h" #include "../collective/aggregator.h"
#include "../collective/communicator-inl.h"
#include "../common/common.h"
#include "xgboost/metric.h" #include "xgboost/metric.h"
namespace xgboost { namespace xgboost {

View File

@ -9,8 +9,8 @@
#include <array> #include <array>
#include <atomic> #include <atomic>
#include <cmath> #include <cmath>
#include <numeric> // for accumulate
#include "../collective/communicator-inl.h"
#include "../common/math.h" #include "../common/math.h"
#include "../common/threading_utils.h" #include "../common/threading_utils.h"
#include "metric_common.h" // MetricNoCache #include "metric_common.h" // MetricNoCache

View File

@ -9,10 +9,9 @@
#include <array> #include <array>
#include <memory> #include <memory>
#include <numeric> // for accumulate
#include <vector> #include <vector>
#include "../collective/communicator-inl.h"
#include "../common/math.h"
#include "../common/survival_util.h" #include "../common/survival_util.h"
#include "../common/threading_utils.h" #include "../common/threading_utils.h"
#include "metric_common.h" // MetricNoCache #include "metric_common.h" // MetricNoCache

View File

@ -3,6 +3,8 @@
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <numeric> // for iota
#include "../../../src/collective/allreduce.h" #include "../../../src/collective/allreduce.h"
#include "../../../src/collective/coll.h" // for Coll #include "../../../src/collective/coll.h" // for Coll
#include "../../../src/collective/tracker.h" #include "../../../src/collective/tracker.h"

View File

@ -1,11 +1,12 @@
/** /**
* Copyright 2023, XGBoost Contributors * Copyright 2023-2024, XGBoost Contributors
*/ */
#pragma once #pragma once
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <chrono> // for seconds #include <chrono> // for seconds
#include <cstdint> // for int32_t #include <cstdint> // for int32_t
#include <fstream> // for ifstream
#include <string> // for string #include <string> // for string
#include <thread> // for thread #include <thread> // for thread
#include <utility> // for move #include <utility> // for move

View File

@ -1,10 +1,9 @@
/** /**
* Copyright 2019-2023 by XGBoost Contributors * Copyright 2019-2024, XGBoost Contributors
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <vector> #include <vector>
#include <string> #include <string>
#include <utility>
#include "../../../src/common/hist_util.h" #include "../../../src/common/hist_util.h"
#include "../../../src/data/gradient_index.h" #include "../../../src/data/gradient_index.h"
@ -135,7 +134,7 @@ TEST(CutsBuilder, SearchGroupInd) {
group[2] = 7; group[2] = 7;
group[3] = 5; group[3] = 5;
p_mat->SetInfo("group", group.data(), DataType::kUInt32, kNumGroups); p_mat->SetInfo("group", Make1dInterfaceTest(group.data(), group.size()));
HistogramCuts hmat; HistogramCuts hmat;
@ -348,7 +347,8 @@ void TestSketchFromWeights(bool with_group) {
for (size_t i = 0; i < kGroups; ++i) { for (size_t i = 0; i < kGroups; ++i) {
groups[i] = kRows / kGroups; groups[i] = kRows / kGroups;
} }
info.SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups); auto sg = linalg::Make1dInterface(groups.data(), kGroups);
info.SetInfo(ctx, "group", sg.c_str());
} }
info.num_row_ = kRows; info.num_row_ = kRows;
@ -356,10 +356,10 @@ void TestSketchFromWeights(bool with_group) {
// Assign weights. // Assign weights.
if (with_group) { if (with_group) {
m->SetInfo("group", groups.data(), DataType::kUInt32, kGroups); m->SetInfo("group", Make1dInterfaceTest(groups.data(), kGroups));
} }
m->SetInfo("weight", h_weights.data(), DataType::kFloat32, h_weights.size()); m->SetInfo("weight", Make1dInterfaceTest(h_weights.data(), h_weights.size()));
m->Info().num_col_ = kCols; m->Info().num_col_ = kCols;
m->Info().num_row_ = kRows; m->Info().num_row_ = kRows;
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1); ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);

View File

@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2023 by XGBoost Contributors * Copyright 2019-2024, XGBoost Contributors
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
@ -682,7 +682,7 @@ TEST(HistUtil, DeviceSketchFromGroupWeights) {
for (size_t i = 0; i < kGroups; ++i) { for (size_t i = 0; i < kGroups; ++i) {
groups[i] = kRows / kGroups; groups[i] = kRows / kGroups;
} }
m->SetInfo("group", groups.data(), DataType::kUInt32, kGroups); m->SetInfo("group", Make1dInterfaceTest(groups.data(), kGroups));
HistogramCuts weighted_cuts = DeviceSketch(&ctx, m.get(), kBins, 0); HistogramCuts weighted_cuts = DeviceSketch(&ctx, m.get(), kBins, 0);
// sketch with no weight // sketch with no weight
@ -727,7 +727,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
for (size_t i = 0; i < kGroups; ++i) { for (size_t i = 0; i < kGroups; ++i) {
groups[i] = kRows / kGroups; groups[i] = kRows / kGroups;
} }
info.SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups); info.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), kGroups));
} }
info.weights_.SetDevice(DeviceOrd::CUDA(0)); info.weights_.SetDevice(DeviceOrd::CUDA(0));
@ -746,10 +746,10 @@ void TestAdapterSketchFromWeights(bool with_group) {
auto dmat = GetDMatrixFromData(storage.HostVector(), kRows, kCols); auto dmat = GetDMatrixFromData(storage.HostVector(), kRows, kCols);
if (with_group) { if (with_group) {
dmat->Info().SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups); dmat->Info().SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), kGroups));
} }
dmat->Info().SetInfo(ctx, "weight", h_weights.data(), DataType::kFloat32, h_weights.size()); dmat->Info().SetInfo(ctx, "weight", Make1dInterfaceTest(h_weights.data(), h_weights.size()));
dmat->Info().num_col_ = kCols; dmat->Info().num_col_ = kCols;
dmat->Info().num_row_ = kRows; dmat->Info().num_row_ = kRows;
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1); ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);

View File

@ -1,11 +1,12 @@
/** /**
* Copyright 2018-2023 by XGBoost Contributors * Copyright 2018-2024, XGBoost Contributors
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <xgboost/base.h> #include <xgboost/base.h>
#include <xgboost/span.h>
#include <xgboost/host_device_vector.h> #include <xgboost/host_device_vector.h>
#include <xgboost/span.h>
#include <numeric> // for iota
#include <vector> #include <vector>
#include "../../../src/common/transform.h" #include "../../../src/common/transform.h"

View File

@ -1,10 +1,11 @@
/** /**
* Copyright 2021-2023, XGBoost Contributors * Copyright 2021-2024, XGBoost Contributors
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <xgboost/host_device_vector.h> #include <xgboost/host_device_vector.h>
#include "../helpers.h"
#include "../../../src/data/array_interface.h" #include "../../../src/data/array_interface.h"
#include "../helpers.h"
namespace xgboost { namespace xgboost {

View File

@ -10,7 +10,6 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include "../../../src/common/version.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory #include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h" // for GMockTHrow #include "../helpers.h" // for GMockTHrow
#include "xgboost/base.h" #include "xgboost/base.h"
@ -23,23 +22,22 @@ TEST(MetaInfo, GetSet) {
double double2[2] = {1.0, 2.0}; double double2[2] = {1.0, 2.0};
EXPECT_EQ(info.labels.Size(), 0); EXPECT_EQ(info.labels.Size(), 0);
info.SetInfo(ctx, "label", double2, xgboost::DataType::kFloat32, 2); info.SetInfo(ctx, "label", Make1dInterfaceTest(double2, 2));
EXPECT_EQ(info.labels.Size(), 2); EXPECT_EQ(info.labels.Size(), 2);
float float2[2] = {1.0f, 2.0f}; float float2[2] = {1.0f, 2.0f};
EXPECT_EQ(info.GetWeight(1), 1.0f) EXPECT_EQ(info.GetWeight(1), 1.0f) << "When no weights are given, was expecting default value 1";
<< "When no weights are given, was expecting default value 1"; info.SetInfo(ctx, "weight", Make1dInterfaceTest(float2, 2));
info.SetInfo(ctx, "weight", float2, xgboost::DataType::kFloat32, 2);
EXPECT_EQ(info.GetWeight(1), 2.0f); EXPECT_EQ(info.GetWeight(1), 2.0f);
uint32_t uint32_t2[2] = {1U, 2U}; uint32_t uint32_t2[2] = {1U, 2U};
EXPECT_EQ(info.base_margin_.Size(), 0); EXPECT_EQ(info.base_margin_.Size(), 0);
info.SetInfo(ctx, "base_margin", uint32_t2, xgboost::DataType::kUInt32, 2); info.SetInfo(ctx, "base_margin", Make1dInterfaceTest(uint32_t2, 2));
EXPECT_EQ(info.base_margin_.Size(), 2); EXPECT_EQ(info.base_margin_.Size(), 2);
uint64_t uint64_t2[2] = {1U, 2U}; uint64_t uint64_t2[2] = {1U, 2U};
EXPECT_EQ(info.group_ptr_.size(), 0); EXPECT_EQ(info.group_ptr_.size(), 0);
info.SetInfo(ctx, "group", uint64_t2, xgboost::DataType::kUInt64, 2); info.SetInfo(ctx, "group", Make1dInterfaceTest(uint64_t2, 2));
ASSERT_EQ(info.group_ptr_.size(), 3); ASSERT_EQ(info.group_ptr_.size(), 3);
EXPECT_EQ(info.group_ptr_[2], 3); EXPECT_EQ(info.group_ptr_[2], 3);
@ -135,9 +133,9 @@ TEST(MetaInfo, SaveLoadBinary) {
}; };
std::vector<float> values (kRows); std::vector<float> values (kRows);
std::generate(values.begin(), values.end(), generator); std::generate(values.begin(), values.end(), generator);
info.SetInfo(ctx, "label", values.data(), xgboost::DataType::kFloat32, kRows); info.SetInfo(ctx, "label", Make1dInterfaceTest(values.data(), kRows));
info.SetInfo(ctx, "weight", values.data(), xgboost::DataType::kFloat32, kRows); info.SetInfo(ctx, "weight", Make1dInterfaceTest(values.data(), kRows));
info.SetInfo(ctx, "base_margin", values.data(), xgboost::DataType::kFloat32, kRows); info.SetInfo(ctx, "base_margin", Make1dInterfaceTest(values.data(), kRows));
info.num_row_ = kRows; info.num_row_ = kRows;
info.num_col_ = kCols; info.num_col_ = kCols;
@ -271,7 +269,7 @@ TEST(MetaInfo, CPUQid) {
qid[i] = i; qid[i] = i;
} }
info.SetInfo(ctx, "qid", qid.data(), xgboost::DataType::kUInt32, info.num_row_); info.SetInfo(ctx, "qid", Make1dInterfaceTest(qid.data(), info.num_row_));
ASSERT_EQ(info.group_ptr_.size(), info.num_row_ + 1); ASSERT_EQ(info.group_ptr_.size(), info.num_row_ + 1);
ASSERT_EQ(info.group_ptr_.front(), 0); ASSERT_EQ(info.group_ptr_.front(), 0);
ASSERT_EQ(info.group_ptr_.back(), info.num_row_); ASSERT_EQ(info.group_ptr_.back(), info.num_row_);
@ -288,14 +286,12 @@ TEST(MetaInfo, Validate) {
info.num_col_ = 3; info.num_col_ = 3;
std::vector<xgboost::bst_group_t> groups (11); std::vector<xgboost::bst_group_t> groups (11);
Context ctx; Context ctx;
info.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, 11); info.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size()));
EXPECT_THROW(info.Validate(FstCU()), dmlc::Error); EXPECT_THROW(info.Validate(FstCU()), dmlc::Error);
std::vector<float> labels(info.num_row_ + 1); std::vector<float> labels(info.num_row_ + 1);
EXPECT_THROW( EXPECT_THROW(
{ { info.SetInfo(ctx, "label", Make1dInterfaceTest(labels.data(), info.num_row_ + 1)); },
info.SetInfo(ctx, "label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1);
},
dmlc::Error); dmlc::Error);
// Make overflow data, which can happen when users pass group structure as int // Make overflow data, which can happen when users pass group structure as int
@ -305,13 +301,13 @@ TEST(MetaInfo, Validate) {
groups.push_back(1562500); groups.push_back(1562500);
} }
groups.push_back(static_cast<xgboost::bst_group_t>(-1)); groups.push_back(static_cast<xgboost::bst_group_t>(-1));
EXPECT_THROW(info.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, groups.size()), EXPECT_THROW(info.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size())),
dmlc::Error); dmlc::Error);
#if defined(XGBOOST_USE_CUDA) #if defined(XGBOOST_USE_CUDA)
info.group_ptr_.clear(); info.group_ptr_.clear();
labels.resize(info.num_row_); labels.resize(info.num_row_);
info.SetInfo(ctx, "label", labels.data(), xgboost::DataType::kFloat32, info.num_row_); info.SetInfo(ctx, "label", Make1dInterfaceTest(labels.data(), info.num_row_));
info.labels.SetDevice(FstCU()); info.labels.SetDevice(FstCU());
EXPECT_THROW(info.Validate(DeviceOrd::CUDA(1)), dmlc::Error); EXPECT_THROW(info.Validate(DeviceOrd::CUDA(1)), dmlc::Error);
@ -340,8 +336,8 @@ TEST(MetaInfo, HostExtend) {
for (size_t g = 0; g < kRows / per_group; ++g) { for (size_t g = 0; g < kRows / per_group; ++g) {
groups.emplace_back(per_group); groups.emplace_back(per_group);
} }
lhs.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, groups.size()); lhs.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size()));
rhs.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, groups.size()); rhs.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size()));
lhs.Extend(rhs, true, true); lhs.Extend(rhs, true, true);
ASSERT_EQ(lhs.num_row_, kRows * 2); ASSERT_EQ(lhs.num_row_, kRows * 2);

View File

@ -408,7 +408,7 @@ class Dart : public testing::TestWithParam<char const*> {
for (size_t i = 0; i < kRows; ++i) { for (size_t i = 0; i < kRows; ++i) {
labels[i] = i % 2; labels[i] = i % 2;
} }
p_mat->SetInfo("label", labels.data(), DataType::kFloat32, kRows); p_mat->SetInfo("label", Make1dInterfaceTest(labels.data(), kRows));
auto learner = std::unique_ptr<Learner>(Learner::Create({p_mat})); auto learner = std::unique_ptr<Learner>(Learner::Create({p_mat}));
learner->SetParam("booster", "dart"); learner->SetParam("booster", "dart");

View File

@ -1,8 +1,11 @@
/**
* Copyright 2020-2024, XGBoost contributors
*/
#include <xgboost/c_api.h> #include <xgboost/c_api.h>
#include "helpers.h"
#include "../../src/data/device_adapter.cuh" #include "../../src/data/device_adapter.cuh"
#include "../../src/data/iterative_dmatrix.h" #include "../../src/data/iterative_dmatrix.h"
#include "helpers.h"
namespace xgboost { namespace xgboost {

View File

@ -15,19 +15,18 @@
#include <cstdint> // std::int32_t #include <cstdint> // std::int32_t
#include <cstdio> #include <cstdio>
#include <fstream>
#include <iostream>
#include <memory> #include <memory>
#include <string> #include <string>
#include <thread>
#include <vector> #include <vector>
#include "../../src/collective/communicator-inl.h" #include "../../src/collective/communicator-inl.h"
#include "../../src/common/common.h" #include "../../src/common/common.h"
#include "../../src/common/threading_utils.h" #include "../../src/common/threading_utils.h"
#include "../../src/data/array_interface.h"
#include "filesystem.h" // dmlc::TemporaryDirectory #include "filesystem.h" // dmlc::TemporaryDirectory
#include "xgboost/linalg.h" #include "xgboost/linalg.h"
#if !defined(_OPENMP)
#include <thread>
#endif
#if defined(__CUDACC__) #if defined(__CUDACC__)
#define DeclareUnifiedTest(name) GPU ## name #define DeclareUnifiedTest(name) GPU ## name
@ -333,7 +332,7 @@ inline std::vector<float> GenerateRandomCategoricalSingleColumn(int n, size_t nu
std::vector<float> x(n); std::vector<float> x(n);
std::mt19937 rng(0); std::mt19937 rng(0);
std::uniform_int_distribution<size_t> dist(0, num_categories - 1); std::uniform_int_distribution<size_t> dist(0, num_categories - 1);
std::generate(x.begin(), x.end(), [&]() { return dist(rng); }); std::generate(x.begin(), x.end(), [&]() { return static_cast<float>(dist(rng)); });
// Make sure each category is present // Make sure each category is present
for (size_t i = 0; i < num_categories; i++) { for (size_t i = 0; i < num_categories; i++) {
x[i] = static_cast<decltype(x)::value_type>(i); x[i] = static_cast<decltype(x)::value_type>(i);
@ -494,6 +493,16 @@ inline int Next(DataIterHandle self) {
return static_cast<ArrayIterForTest*>(self)->Next(); return static_cast<ArrayIterForTest*>(self)->Next();
} }
/**
* @brief Create an array interface for host vector.
*/
template <typename T>
char const* Make1dInterfaceTest(T const* vec, std::size_t len) {
static thread_local std::string str;
str = linalg::Make1dInterface(vec, len);
return str.c_str();
}
class RMMAllocator; class RMMAllocator;
using RMMAllocatorPtr = std::unique_ptr<RMMAllocator, void(*)(RMMAllocator*)>; using RMMAllocatorPtr = std::unique_ptr<RMMAllocator, void(*)(RMMAllocator*)>;
RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv); RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv);

View File

@ -5,10 +5,9 @@
#include <xgboost/json.h> #include <xgboost/json.h>
#include <xgboost/metric.h> #include <xgboost/metric.h>
#include <map>
#include <memory> #include <memory>
#include <numeric> // for iota
#include "../../../src/common/linalg_op.h"
#include "../helpers.h" #include "../helpers.h"
namespace xgboost::metric { namespace xgboost::metric {

View File

@ -1,14 +1,15 @@
/*! /**
* Copyright 2018-2023 XGBoost contributors * Copyright 2018-2024, XGBoost contributors
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <xgboost/context.h> #include <xgboost/context.h>
#include <xgboost/objective.h> #include <xgboost/objective.h>
#include "../../../src/objective/adaptive.h" #include <numeric> // for iota
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
#include "../../../src/objective/adaptive.h"
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
#include "test_regression_obj.h" #include "test_regression_obj.h"
namespace xgboost { namespace xgboost {

View File

@ -12,7 +12,6 @@
#include <cinttypes> // for int32_t, int64_t, uint32_t #include <cinttypes> // for int32_t, int64_t, uint32_t
#include <cstddef> // for size_t #include <cstddef> // for size_t
#include <iosfwd> // for ofstream #include <iosfwd> // for ofstream
#include <iterator> // for back_insert_iterator, back_inserter
#include <limits> // for numeric_limits #include <limits> // for numeric_limits
#include <map> // for map #include <map> // for map
#include <memory> // for unique_ptr, shared_ptr, __shared_ptr_... #include <memory> // for unique_ptr, shared_ptr, __shared_ptr_...
@ -30,7 +29,6 @@
#include "../../src/common/random.h" // for GlobalRandom #include "../../src/common/random.h" // for GlobalRandom
#include "dmlc/io.h" // for Stream #include "dmlc/io.h" // for Stream
#include "dmlc/omp.h" // for omp_get_max_threads #include "dmlc/omp.h" // for omp_get_max_threads
#include "dmlc/registry.h" // for Registry
#include "filesystem.h" // for TemporaryDirectory #include "filesystem.h" // for TemporaryDirectory
#include "helpers.h" // for GetBaseScore, RandomDataGenerator #include "helpers.h" // for GetBaseScore, RandomDataGenerator
#include "objective_helpers.h" // for MakeObjNamesForTest, ObjTestNameGenerator #include "objective_helpers.h" // for MakeObjNamesForTest, ObjTestNameGenerator
@ -103,9 +101,9 @@ TEST(Learner, CheckGroup) {
labels[i] = i % 2; labels[i] = i % 2;
} }
p_mat->SetInfo("weight", static_cast<void *>(weight.data()), DataType::kFloat32, kNumGroups); p_mat->SetInfo("weight", Make1dInterfaceTest(weight.data(), kNumGroups));
p_mat->SetInfo("group", group.data(), DataType::kUInt32, kNumGroups); p_mat->SetInfo("group", Make1dInterfaceTest(group.data(), kNumGroups));
p_mat->SetInfo("label", labels.data(), DataType::kFloat32, kNumRows); p_mat->SetInfo("label", Make1dInterfaceTest(labels.data(), kNumRows));
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_mat}; std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_mat};
auto learner = std::unique_ptr<Learner>(Learner::Create(mat)); auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
@ -115,7 +113,7 @@ TEST(Learner, CheckGroup) {
group.resize(kNumGroups+1); group.resize(kNumGroups+1);
group[3] = 4; group[3] = 4;
group[4] = 1; group[4] = 1;
p_mat->SetInfo("group", group.data(), DataType::kUInt32, kNumGroups+1); p_mat->SetInfo("group", Make1dInterfaceTest(group.data(), kNumGroups+1));
EXPECT_ANY_THROW(learner->UpdateOneIter(0, p_mat)); EXPECT_ANY_THROW(learner->UpdateOneIter(0, p_mat));
} }
@ -132,7 +130,7 @@ TEST(Learner, SLOW_CheckMultiBatch) { // NOLINT
for (size_t i = 0; i < num_row; ++i) { for (size_t i = 0; i < num_row; ++i) {
labels[i] = i % 2; labels[i] = i % 2;
} }
dmat->SetInfo("label", labels.data(), DataType::kFloat32, num_row); dmat->SetInfo("label", Make1dInterfaceTest(labels.data(), num_row));
std::vector<std::shared_ptr<DMatrix>> mat{dmat}; std::vector<std::shared_ptr<DMatrix>> mat{dmat};
auto learner = std::unique_ptr<Learner>(Learner::Create(mat)); auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
learner->SetParams(Args{{"objective", "binary:logistic"}}); learner->SetParams(Args{{"objective", "binary:logistic"}});

View File

@ -239,4 +239,18 @@ void TestAtomicAdd() {
TEST(Histogram, AtomicAddInt64) { TEST(Histogram, AtomicAddInt64) {
TestAtomicAdd(); TestAtomicAdd();
} }
TEST(Histogram, Quantiser) {
auto ctx = MakeCUDACtx(0);
std::size_t n_samples{16};
HostDeviceVector<GradientPair> gpair(n_samples, GradientPair{1.0, 1.0});
gpair.SetDevice(ctx.Device());
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
for (auto v : gpair.ConstHostVector()) {
auto gh = quantiser.ToFloatingPoint(quantiser.ToFixedPoint(v));
ASSERT_EQ(gh.GetGrad(), 1.0);
ASSERT_EQ(gh.GetHess(), 1.0);
}
}
} // namespace xgboost::tree } // namespace xgboost::tree