From 230010d9a0ddd0334b34f30dfed183f51653baca Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 26 Mar 2024 23:26:24 +0800 Subject: [PATCH] Cleanup set info. (#10139) - Use the array interface internally. - Deprecate `XGDMatrixSetDenseInfo`. - Deprecate `XGDMatrixSetUIntInfo`. - Move the handling of `DataType` into the deprecated C function. --------- Co-authored-by: Philip Hyunsu Cho --- .github/workflows/r_tests.yml | 8 +-- include/xgboost/c_api.h | 60 ++++-------------- include/xgboost/data.h | 13 ---- include/xgboost/linalg.h | 15 ++++- include/xgboost/span.h | 10 ++- .../xgboost4j/src/native/xgboost4j.cpp | 10 +-- src/c_api/c_api.cc | 62 ++++++++++++++----- src/c_api/c_api_utils.h | 19 +++--- src/collective/nccl_device_communicator.cu | 2 + src/common/error_msg.cc | 2 +- src/common/error_msg.h | 2 +- src/common/host_device_vector.cu | 1 - src/common/quantile.cc | 1 + src/common/quantile.cu | 6 +- src/common/quantile.cuh | 5 +- src/data/data.cc | 46 +------------- src/data/file_iterator.cc | 52 ++++++++++++++-- src/data/file_iterator.h | 44 +------------ src/gbm/gbtree.h | 6 +- src/metric/elementwise_metric.cu | 6 +- src/metric/metric_common.h | 2 - src/metric/multiclass_metric.cu | 2 +- src/metric/survival_metric.cu | 3 +- tests/cpp/collective/test_allreduce.cc | 2 + tests/cpp/collective/test_worker.h | 3 +- tests/cpp/common/test_hist_util.cc | 12 ++-- tests/cpp/common/test_hist_util.cu | 10 +-- tests/cpp/common/test_transform_range.cc | 5 +- tests/cpp/data/test_array_interface.cu | 5 +- tests/cpp/data/test_metainfo.cc | 34 +++++----- tests/cpp/gbm/test_gbtree.cc | 2 +- tests/cpp/helpers.cu | 5 +- tests/cpp/helpers.h | 19 ++++-- tests/cpp/metric/test_elementwise_metric.h | 3 +- .../cpp/objective/test_regression_obj_cpu.cc | 11 ++-- tests/cpp/test_learner.cc | 12 ++-- tests/cpp/tree/gpu_hist/test_histogram.cu | 14 +++++ 37 files changed, 246 insertions(+), 268 deletions(-) diff --git a/.github/workflows/r_tests.yml b/.github/workflows/r_tests.yml index 045dac575..7dbdf3a84 100644 --- a/.github/workflows/r_tests.yml +++ b/.github/workflows/r_tests.yml @@ -110,7 +110,7 @@ jobs: name: Test R package on Debian runs-on: ubuntu-latest container: - image: rhub/debian-gcc-devel + image: rhub/debian-gcc-release steps: - name: Install system dependencies @@ -130,12 +130,12 @@ jobs: - name: Install dependencies shell: bash -l {0} run: | - /tmp/R-devel/bin/Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')" + Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')" - name: Test R shell: bash -l {0} run: | - python3 tests/ci_build/test_r_package.py --r=/tmp/R-devel/bin/R --build-tool=autotools --task=check + python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --build-tool=autotools --task=check - uses: dorny/paths-filter@v2 id: changes @@ -147,4 +147,4 @@ jobs: - name: Run document check if: steps.changes.outputs.r_package == 'true' run: | - python3 tests/ci_build/test_r_package.py --r=/tmp/R-devel/bin/R --task=doc + python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --task=doc diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index 795c78946..e065d8ba1 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -1,5 +1,5 @@ /** - * Copyright 2015~2023 by XGBoost Contributors + * Copyright 2015-2024, XGBoost Contributors * \file c_api.h * \author Tianqi Chen * \brief C API of XGBoost, used for interfacing to other languages. @@ -639,21 +639,14 @@ XGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle, * \param len length of array * \return 0 when success, -1 when failure happens */ -XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, - const char *field, - const float *array, +XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const float *array, bst_ulong len); -/*! - * \brief set uint32 vector to a content in info - * \param handle a instance of data matrix - * \param field field name - * \param array pointer to unsigned int vector - * \param len length of array - * \return 0 when success, -1 when failure happens +/** + * @deprecated since 2.1.0 + * + * Use @ref XGDMatrixSetInfoFromInterface instead. */ -XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, - const char *field, - const unsigned *array, +XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const unsigned *array, bst_ulong len); /*! @@ -725,42 +718,13 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field, bst_ulong *size, const char ***out_features); -/*! - * \brief Set meta info from dense matrix. Valid field names are: +/** + * @deprecated since 2.1.0 * - * - label - * - weight - * - base_margin - * - group - * - label_lower_bound - * - label_upper_bound - * - feature_weights - * - * \param handle An instance of data matrix - * \param field Field name - * \param data Pointer to consecutive memory storing data. - * \param size Size of the data, this is relative to size of type. (Meaning NOT number - * of bytes.) - * \param type Indicator of data type. This is defined in xgboost::DataType enum class. - * - float = 1 - * - double = 2 - * - uint32_t = 3 - * - uint64_t = 4 - * \return 0 when success, -1 when failure happens + * Use @ref XGDMatrixSetInfoFromInterface instead. */ -XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, - void const *data, bst_ulong size, int type); - -/*! - * \brief (deprecated) Use XGDMatrixSetUIntInfo instead. Set group of the training matrix - * \param handle a instance of data matrix - * \param group pointer to group size - * \param len length of array - * \return 0 when success, -1 when failure happens - */ -XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle, - const unsigned *group, - bst_ulong len); +XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void const *data, + bst_ulong size, int type); /*! * \brief get float info vector from matrix. diff --git a/include/xgboost/data.h b/include/xgboost/data.h index 2bdf3713d..ec06a9c86 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -137,14 +136,6 @@ class MetaInfo { * \param fo The output stream. */ void SaveBinary(dmlc::Stream* fo) const; - /*! - * \brief Set information in the meta info. - * \param key The key of the information. - * \param dptr The data pointer of the source array. - * \param dtype The type of the source data. - * \param num Number of elements in the source array. - */ - void SetInfo(Context const& ctx, const char* key, const void* dptr, DataType dtype, size_t num); /*! * \brief Set information in the meta info with array interface. * \param key The key of the information. @@ -517,10 +508,6 @@ class DMatrix { DMatrix() = default; /*! \brief meta information of the dataset */ virtual MetaInfo& Info() = 0; - virtual void SetInfo(const char* key, const void* dptr, DataType dtype, size_t num) { - auto const& ctx = *this->Ctx(); - this->Info().SetInfo(ctx, key, dptr, dtype, num); - } virtual void SetInfo(const char* key, std::string const& interface_str) { auto const& ctx = *this->Ctx(); this->Info().SetInfo(ctx, key, StringView{interface_str}); diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h index f538adbcd..cb7668f4c 100644 --- a/include/xgboost/linalg.h +++ b/include/xgboost/linalg.h @@ -190,13 +190,14 @@ constexpr auto ArrToTuple(T (&arr)[N]) { // uint division optimization inspired by the CIndexer in cupy. Division operation is // slow on both CPU and GPU, especially 64 bit integer. So here we first try to avoid 64 // bit when the index is smaller, then try to avoid division when it's exp of 2. -template +template LINALG_HD auto UnravelImpl(I idx, common::Span shape) { - size_t index[D]{0}; + std::size_t index[D]{0}; static_assert(std::is_signed::value, "Don't change the type without changing the for loop."); + auto const sptr = shape.data(); for (int32_t dim = D; --dim > 0;) { - auto s = static_cast>>(shape[dim]); + auto s = static_cast>>(sptr[dim]); if (s & (s - 1)) { auto t = idx / s; index[dim] = idx - t * s; @@ -745,6 +746,14 @@ auto ArrayInterfaceStr(TensorView const &t) { return str; } +template +auto Make1dInterface(T const *vec, std::size_t len) { + Context ctx; + auto t = linalg::MakeTensorView(&ctx, common::Span{vec, len}, len); + auto str = linalg::ArrayInterfaceStr(t); + return str; +} + /** * \brief A tensor storage. To use it for other functionality like slicing one needs to * obtain a view first. This way we can use it on both host and device. diff --git a/include/xgboost/span.h b/include/xgboost/span.h index 29ca76d3c..7471c2e44 100644 --- a/include/xgboost/span.h +++ b/include/xgboost/span.h @@ -30,9 +30,8 @@ #define XGBOOST_SPAN_H_ #include -#include -#include // size_t +#include // size_t #include #include #include // numeric_limits @@ -73,8 +72,7 @@ #endif // defined(_MSC_VER) && _MSC_VER < 1910 -namespace xgboost { -namespace common { +namespace xgboost::common { #if defined(__CUDA_ARCH__) // Usual logging facility is not available inside device code. @@ -707,8 +705,8 @@ class IterSpan { return it_ + size(); } }; -} // namespace common -} // namespace xgboost +} // namespace xgboost::common + #if defined(_MSC_VER) &&_MSC_VER < 1910 #undef constexpr diff --git a/jvm-packages/xgboost4j/src/native/xgboost4j.cpp b/jvm-packages/xgboost4j/src/native/xgboost4j.cpp index 332b1a127..9ba944d5a 100644 --- a/jvm-packages/xgboost4j/src/native/xgboost4j.cpp +++ b/jvm-packages/xgboost4j/src/native/xgboost4j.cpp @@ -408,7 +408,8 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetFloatI jfloat* array = jenv->GetFloatArrayElements(jarray, NULL); bst_ulong len = (bst_ulong)jenv->GetArrayLength(jarray); - int ret = XGDMatrixSetFloatInfo(handle, field, (float const *)array, len); + auto str = xgboost::linalg::Make1dInterface(array, len); + int ret = XGDMatrixSetInfoFromInterface(handle, field, str.c_str()); JVM_CHECK_CALL(ret); //release if (field) jenv->ReleaseStringUTFChars(jfield, field); @@ -427,7 +428,8 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetUIntIn const char* field = jenv->GetStringUTFChars(jfield, 0); jint* array = jenv->GetIntArrayElements(jarray, NULL); bst_ulong len = (bst_ulong)jenv->GetArrayLength(jarray); - int ret = XGDMatrixSetUIntInfo(handle, (char const *)field, (unsigned int const *)array, len); + auto str = xgboost::linalg::Make1dInterface(array, len); + int ret = XGDMatrixSetInfoFromInterface(handle, field, str.c_str()); JVM_CHECK_CALL(ret); //release if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field); @@ -730,8 +732,8 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterPredictFr if (jmargin) { margin = jenv->GetFloatArrayElements(jmargin, nullptr); JVM_CHECK_CALL(XGProxyDMatrixCreate(&proxy)); - JVM_CHECK_CALL( - XGDMatrixSetFloatInfo(proxy, "base_margin", margin, jenv->GetArrayLength(jmargin))); + auto str = xgboost::linalg::Make1dInterface(margin, jenv->GetArrayLength(jmargin)); + JVM_CHECK_CALL(XGDMatrixSetInfoFromInterface(proxy, "base_margin", str.c_str())); } bst_ulong const *out_shape; diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 0f4748bfe..598b7f2f5 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1,5 +1,5 @@ /** - * Copyright 2014-2024 by XGBoost Contributors + * Copyright 2014-2024, XGBoost Contributors */ #include "xgboost/c_api.h" @@ -614,8 +614,8 @@ XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const API_BEGIN(); CHECK_HANDLE(); xgboost_CHECK_C_ARG_PTR(field); - auto const& p_fmat = *static_cast *>(handle); - p_fmat->SetInfo(field, info, xgboost::DataType::kFloat32, len); + auto const &p_fmat = *static_cast *>(handle); + p_fmat->SetInfo(field, linalg::Make1dInterface(info, len)); API_END(); } @@ -634,8 +634,9 @@ XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const API_BEGIN(); CHECK_HANDLE(); xgboost_CHECK_C_ARG_PTR(field); + LOG(WARNING) << error::DeprecatedFunc(__func__, "2.1.0", "XGDMatrixSetInfoFromInterface"); auto const &p_fmat = *static_cast *>(handle); - p_fmat->SetInfo(field, info, xgboost::DataType::kUInt32, len); + p_fmat->SetInfo(field, linalg::Make1dInterface(info, len)); API_END(); } @@ -679,19 +680,52 @@ XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void xgboost::bst_ulong size, int type) { API_BEGIN(); CHECK_HANDLE(); + LOG(WARNING) << error::DeprecatedFunc(__func__, "2.1.0", "XGDMatrixSetInfoFromInterface"); auto const &p_fmat = *static_cast *>(handle); CHECK(type >= 1 && type <= 4); xgboost_CHECK_C_ARG_PTR(field); - p_fmat->SetInfo(field, data, static_cast(type), size); - API_END(); -} -XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle, const unsigned *group, xgboost::bst_ulong len) { - API_BEGIN(); - CHECK_HANDLE(); - LOG(WARNING) << "XGDMatrixSetGroup is deprecated, use `XGDMatrixSetUIntInfo` instead."; - auto const &p_fmat = *static_cast *>(handle); - p_fmat->SetInfo("group", group, xgboost::DataType::kUInt32, len); + Context ctx; + auto dtype = static_cast(type); + std::string str; + auto proc = [&](auto cast_d_ptr) { + using T = std::remove_pointer_t; + auto t = linalg::TensorView( + common::Span{cast_d_ptr, static_cast::index_type>(size)}, + {size}, DeviceOrd::CPU()); + CHECK(t.CContiguous()); + Json interface{linalg::ArrayInterface(t)}; + CHECK(ArrayInterface<1>{interface}.is_contiguous); + str = Json::Dump(interface); + return str; + }; + + // Legacy code using XGBoost dtype, which is a small subset of array interface types. + switch (dtype) { + case xgboost::DataType::kFloat32: { + auto cast_ptr = reinterpret_cast(data); + p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr)); + break; + } + case xgboost::DataType::kDouble: { + auto cast_ptr = reinterpret_cast(data); + p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr)); + break; + } + case xgboost::DataType::kUInt32: { + auto cast_ptr = reinterpret_cast(data); + p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr)); + break; + } + case xgboost::DataType::kUInt64: { + auto cast_ptr = reinterpret_cast(data); + p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr)); + break; + } + default: + LOG(FATAL) << "Unknown data type" << static_cast(dtype); + } + API_END(); } @@ -987,7 +1021,7 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, bs bst_float *hess, xgboost::bst_ulong len) { API_BEGIN(); CHECK_HANDLE(); - error::DeprecatedFunc(__func__, "2.1.0", "XGBoosterTrainOneIter"); + LOG(WARNING) << error::DeprecatedFunc(__func__, "2.1.0", "XGBoosterTrainOneIter"); auto *learner = static_cast(handle); auto ctx = learner->Ctx()->MakeCPU(); diff --git a/src/c_api/c_api_utils.h b/src/c_api/c_api_utils.h index 95efb5b9d..04b0fc007 100644 --- a/src/c_api/c_api_utils.h +++ b/src/c_api/c_api_utils.h @@ -1,17 +1,18 @@ /** - * Copyright 2021-2023, XGBoost Contributors + * Copyright 2021-2024, XGBoost Contributors */ #ifndef XGBOOST_C_API_C_API_UTILS_H_ #define XGBOOST_C_API_C_API_UTILS_H_ -#include -#include -#include -#include // for shared_ptr -#include // for string -#include // for make_tuple -#include // for move -#include +#include // for min +#include // for size_t +#include // for multiplies +#include // for shared_ptr +#include // for accumulate +#include // for string +#include // for make_tuple +#include // for move +#include // for vector #include "../common/json_utils.h" // for TypeCheck #include "xgboost/c_api.h" diff --git a/src/collective/nccl_device_communicator.cu b/src/collective/nccl_device_communicator.cu index 31c2d394d..b896e7d06 100644 --- a/src/collective/nccl_device_communicator.cu +++ b/src/collective/nccl_device_communicator.cu @@ -2,6 +2,8 @@ * Copyright 2023 XGBoost contributors */ #if defined(XGBOOST_USE_NCCL) +#include // for accumulate + #include "comm.cuh" #include "nccl_device_communicator.cuh" diff --git a/src/common/error_msg.cc b/src/common/error_msg.cc index 8871c1a1d..cdbe5ebf6 100644 --- a/src/common/error_msg.cc +++ b/src/common/error_msg.cc @@ -11,7 +11,7 @@ #include "xgboost/logging.h" namespace xgboost::error { -std::string DeprecatedFunc(StringView old, StringView since, StringView replacement) { +[[nodiscard]] std::string DeprecatedFunc(StringView old, StringView since, StringView replacement) { std::stringstream ss; ss << "`" << old << "` is deprecated since" << since << ", use `" << replacement << "` instead."; return ss.str(); diff --git a/src/common/error_msg.h b/src/common/error_msg.h index 7264c3532..67114320b 100644 --- a/src/common/error_msg.h +++ b/src/common/error_msg.h @@ -89,7 +89,7 @@ void WarnDeprecatedGPUId(); void WarnEmptyDataset(); -std::string DeprecatedFunc(StringView old, StringView since, StringView replacement); +[[nodiscard]] std::string DeprecatedFunc(StringView old, StringView since, StringView replacement); constexpr StringView InvalidCUDAOrdinal() { return "Invalid device. `device` is required to be CUDA and there must be at least one GPU " diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu index 267309288..99448df21 100644 --- a/src/common/host_device_vector.cu +++ b/src/common/host_device_vector.cu @@ -6,7 +6,6 @@ #include #include -#include #include "xgboost/data.h" #include "xgboost/host_device_vector.h" diff --git a/src/common/quantile.cc b/src/common/quantile.cc index 4ae6ecd36..8c743d940 100644 --- a/src/common/quantile.cc +++ b/src/common/quantile.cc @@ -4,6 +4,7 @@ #include "quantile.h" #include +#include // for partial_sum #include #include "../collective/aggregator.h" diff --git a/src/common/quantile.cu b/src/common/quantile.cu index 529ee30df..e7f09fc4d 100644 --- a/src/common/quantile.cu +++ b/src/common/quantile.cu @@ -1,5 +1,5 @@ /** - * Copyright 2020-2023 by XGBoost Contributors + * Copyright 2020-2024, XGBoost Contributors */ #include #include @@ -8,8 +8,8 @@ #include #include -#include // std::numeric_limits -#include +#include // std::numeric_limits +#include // for partial_sum #include #include "../collective/communicator-inl.cuh" diff --git a/src/common/quantile.cuh b/src/common/quantile.cuh index 6a5a38613..898da03a0 100644 --- a/src/common/quantile.cuh +++ b/src/common/quantile.cuh @@ -1,8 +1,9 @@ +/** + * Copyright 2020-2024, XGBoost Contributors + */ #ifndef XGBOOST_COMMON_QUANTILE_CUH_ #define XGBOOST_COMMON_QUANTILE_CUH_ -#include - #include "xgboost/span.h" #include "xgboost/data.h" #include "device_helpers.cuh" diff --git a/src/data/data.cc b/src/data/data.cc index 8cdcde201..22854def8 100644 --- a/src/data/data.cc +++ b/src/data/data.cc @@ -11,7 +11,6 @@ #include // for abs #include // for uint64_t, int32_t, uint8_t, uint32_t #include // for size_t, strcmp, memcpy -#include // for exception #include // for operator<<, basic_ostream, basic_ostream::op... #include // for map, operator!= #include // for accumulate, partial_sum @@ -22,7 +21,6 @@ #include "../collective/communicator.h" // for Operation #include "../common/algorithm.h" // for StableSort #include "../common/api_entry.h" // for XGBAPIThreadLocalEntry -#include "../common/common.h" // for Split #include "../common/error_msg.h" // for GroupSize, GroupWeight, InfInData #include "../common/group_data.h" // for ParallelGroupBuilder #include "../common/io.h" // for PeekableInStream @@ -473,11 +471,11 @@ void MetaInfo::SetInfo(Context const& ctx, StringView key, StringView interface_ << ", must have at least 1 column even if it's empty."; auto const& first = get(array.front()); auto ptr = ArrayInterfaceHandler::GetPtrFromArrayData(first); - is_cuda = ArrayInterfaceHandler::IsCudaPtr(ptr); + is_cuda = first.find("stream") != first.cend() || ArrayInterfaceHandler::IsCudaPtr(ptr); } else { auto const& first = get(j_interface); auto ptr = ArrayInterfaceHandler::GetPtrFromArrayData(first); - is_cuda = ArrayInterfaceHandler::IsCudaPtr(ptr); + is_cuda = first.find("stream") != first.cend() || ArrayInterfaceHandler::IsCudaPtr(ptr); } if (is_cuda) { @@ -567,46 +565,6 @@ void MetaInfo::SetInfoFromHost(Context const& ctx, StringView key, Json arr) { } } -void MetaInfo::SetInfo(Context const& ctx, const char* key, const void* dptr, DataType dtype, - size_t num) { - CHECK(key); - auto proc = [&](auto cast_d_ptr) { - using T = std::remove_pointer_t; - auto t = linalg::TensorView(common::Span{cast_d_ptr, num}, {num}, DeviceOrd::CPU()); - CHECK(t.CContiguous()); - Json interface { - linalg::ArrayInterface(t) - }; - assert(ArrayInterface<1>{interface}.is_contiguous); - return interface; - }; - // Legacy code using XGBoost dtype, which is a small subset of array interface types. - switch (dtype) { - case xgboost::DataType::kFloat32: { - auto cast_ptr = reinterpret_cast(dptr); - this->SetInfoFromHost(ctx, key, proc(cast_ptr)); - break; - } - case xgboost::DataType::kDouble: { - auto cast_ptr = reinterpret_cast(dptr); - this->SetInfoFromHost(ctx, key, proc(cast_ptr)); - break; - } - case xgboost::DataType::kUInt32: { - auto cast_ptr = reinterpret_cast(dptr); - this->SetInfoFromHost(ctx, key, proc(cast_ptr)); - break; - } - case xgboost::DataType::kUInt64: { - auto cast_ptr = reinterpret_cast(dptr); - this->SetInfoFromHost(ctx, key, proc(cast_ptr)); - break; - } - default: - LOG(FATAL) << "Unknown data type" << static_cast(dtype); - } -} - void MetaInfo::GetInfo(char const* key, bst_ulong* out_len, DataType dtype, const void** out_dptr) const { if (dtype == DataType::kFloat32) { diff --git a/src/data/file_iterator.cc b/src/data/file_iterator.cc index cebfbdc19..1e341447c 100644 --- a/src/data/file_iterator.cc +++ b/src/data/file_iterator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2021-2023, XGBoost contributors + * Copyright 2021-2024, XGBoost contributors */ #include "file_iterator.h" @@ -10,7 +10,10 @@ #include // for operator<<, basic_ostream, istringstream #include // for vector -#include "../common/common.h" // for Split +#include "../common/common.h" // for Split +#include "xgboost/linalg.h" // for ArrayInterfaceStr, MakeVec +#include "xgboost/linalg.h" +#include "xgboost/logging.h" // for CHECK #include "xgboost/string_view.h" // for operator<<, StringView namespace xgboost::data { @@ -28,10 +31,10 @@ std::string ValidateFileFormat(std::string const& uri) { for (size_t i = 0; i < arg_list.size(); ++i) { std::istringstream is(arg_list[i]); std::pair kv; - CHECK(std::getline(is, kv.first, '=')) << "Invalid uri argument format" - << " for key in arg " << i + 1; - CHECK(std::getline(is, kv.second)) << "Invalid uri argument format" - << " for value in arg " << i + 1; + CHECK(std::getline(is, kv.first, '=')) + << "Invalid uri argument format" << " for key in arg " << i + 1; + CHECK(std::getline(is, kv.second)) + << "Invalid uri argument format" << " for value in arg " << i + 1; args.insert(kv); } if (args.find("format") == args.cend()) { @@ -48,4 +51,41 @@ std::string ValidateFileFormat(std::string const& uri) { return name_args[0] + "?" + name_args[1] + '#' + name_args_cache[1]; } } + +int FileIterator::Next() { + CHECK(parser_); + if (parser_->Next()) { + row_block_ = parser_->Value(); + + indptr_ = linalg::Make1dInterface(row_block_.offset, row_block_.size + 1); + values_ = linalg::Make1dInterface(row_block_.value, row_block_.offset[row_block_.size]); + indices_ = linalg::Make1dInterface(row_block_.index, row_block_.offset[row_block_.size]); + + size_t n_columns = + *std::max_element(row_block_.index, row_block_.index + row_block_.offset[row_block_.size]); + // dmlc parser converts 1-based indexing back to 0-based indexing so we can ignore + // this condition and just add 1 to n_columns + n_columns += 1; + + XGProxyDMatrixSetDataCSR(proxy_, indptr_.c_str(), indices_.c_str(), values_.c_str(), n_columns); + + if (row_block_.label) { + auto str = linalg::Make1dInterface(row_block_.label, row_block_.size); + XGDMatrixSetInfoFromInterface(proxy_, "label", str.c_str()); + } + if (row_block_.qid) { + auto str = linalg::Make1dInterface(row_block_.qid, row_block_.size); + XGDMatrixSetInfoFromInterface(proxy_, "qid", str.c_str()); + } + if (row_block_.weight) { + auto str = linalg::Make1dInterface(row_block_.weight, row_block_.size); + XGDMatrixSetInfoFromInterface(proxy_, "weight", str.c_str()); + } + // Continue iteration + return true; + } else { + // Stop iteration + return false; + } +} } // namespace xgboost::data diff --git a/src/data/file_iterator.h b/src/data/file_iterator.h index c7f23b478..a4afbabe4 100644 --- a/src/data/file_iterator.h +++ b/src/data/file_iterator.h @@ -1,20 +1,16 @@ /** - * Copyright 2021-2023, XGBoost contributors + * Copyright 2021-2024, XGBoost contributors */ #ifndef XGBOOST_DATA_FILE_ITERATOR_H_ #define XGBOOST_DATA_FILE_ITERATOR_H_ -#include // for max_element -#include // for size_t #include // for uint32_t #include // for unique_ptr #include // for string #include // for move #include "dmlc/data.h" // for RowBlock, Parser -#include "xgboost/c_api.h" // for XGDMatrixSetDenseInfo, XGDMatrixFree, XGProxyDMatrixCreate -#include "xgboost/linalg.h" // for ArrayInterfaceStr, MakeVec -#include "xgboost/logging.h" // for CHECK +#include "xgboost/c_api.h" // for XGDMatrixFree, XGProxyDMatrixCreate namespace xgboost::data { [[nodiscard]] std::string ValidateFileFormat(std::string const& uri); @@ -53,41 +49,7 @@ class FileIterator { XGDMatrixFree(proxy_); } - int Next() { - CHECK(parser_); - if (parser_->Next()) { - row_block_ = parser_->Value(); - using linalg::MakeVec; - - indptr_ = ArrayInterfaceStr(MakeVec(row_block_.offset, row_block_.size + 1)); - values_ = ArrayInterfaceStr(MakeVec(row_block_.value, row_block_.offset[row_block_.size])); - indices_ = ArrayInterfaceStr(MakeVec(row_block_.index, row_block_.offset[row_block_.size])); - - size_t n_columns = *std::max_element(row_block_.index, - row_block_.index + row_block_.offset[row_block_.size]); - // dmlc parser converts 1-based indexing back to 0-based indexing so we can ignore - // this condition and just add 1 to n_columns - n_columns += 1; - - XGProxyDMatrixSetDataCSR(proxy_, indptr_.c_str(), indices_.c_str(), - values_.c_str(), n_columns); - - if (row_block_.label) { - XGDMatrixSetDenseInfo(proxy_, "label", row_block_.label, row_block_.size, 1); - } - if (row_block_.qid) { - XGDMatrixSetDenseInfo(proxy_, "qid", row_block_.qid, row_block_.size, 1); - } - if (row_block_.weight) { - XGDMatrixSetDenseInfo(proxy_, "weight", row_block_.weight, row_block_.size, 1); - } - // Continue iteration - return true; - } else { - // Stop iteration - return false; - } - } + int Next(); auto Proxy() -> decltype(proxy_) { return proxy_; } diff --git a/src/gbm/gbtree.h b/src/gbm/gbtree.h index a2d84d848..d6ed851c8 100644 --- a/src/gbm/gbtree.h +++ b/src/gbm/gbtree.h @@ -1,5 +1,5 @@ /** - * Copyright 2014-2023 by Contributors + * Copyright 2014-2024, XGBoost Contributors * \file gbtree.cc * \brief gradient boosted tree implementation. * \author Tianqi Chen @@ -11,14 +11,12 @@ #include #include // std::int32_t -#include #include +#include // for iota #include -#include #include #include -#include "../common/common.h" #include "../common/timer.h" #include "../tree/param.h" // TrainParam #include "gbtree_model.h" diff --git a/src/metric/elementwise_metric.cu b/src/metric/elementwise_metric.cu index 9c26011aa..ec5b9079d 100644 --- a/src/metric/elementwise_metric.cu +++ b/src/metric/elementwise_metric.cu @@ -10,15 +10,15 @@ #include #include +#include // for accumulate -#include "../collective/communicator-inl.h" -#include "../common/common.h" // MetricNoCache +#include "../common/common.h" // for AssertGPUSupport #include "../common/math.h" #include "../common/optional_weight.h" // OptionalWeights #include "../common/pseudo_huber.h" #include "../common/quantile_loss_utils.h" // QuantileLossParam #include "../common/threading_utils.h" -#include "metric_common.h" +#include "metric_common.h" // MetricNoCache #include "xgboost/collective/result.h" // for SafeColl #include "xgboost/metric.h" diff --git a/src/metric/metric_common.h b/src/metric/metric_common.h index 53c38ff2a..2b9239990 100644 --- a/src/metric/metric_common.h +++ b/src/metric/metric_common.h @@ -9,8 +9,6 @@ #include #include "../collective/aggregator.h" -#include "../collective/communicator-inl.h" -#include "../common/common.h" #include "xgboost/metric.h" namespace xgboost { diff --git a/src/metric/multiclass_metric.cu b/src/metric/multiclass_metric.cu index acaef7cf7..e51509fc7 100644 --- a/src/metric/multiclass_metric.cu +++ b/src/metric/multiclass_metric.cu @@ -9,8 +9,8 @@ #include #include #include +#include // for accumulate -#include "../collective/communicator-inl.h" #include "../common/math.h" #include "../common/threading_utils.h" #include "metric_common.h" // MetricNoCache diff --git a/src/metric/survival_metric.cu b/src/metric/survival_metric.cu index c64fece6c..9c57be3ab 100644 --- a/src/metric/survival_metric.cu +++ b/src/metric/survival_metric.cu @@ -9,10 +9,9 @@ #include #include +#include // for accumulate #include -#include "../collective/communicator-inl.h" -#include "../common/math.h" #include "../common/survival_util.h" #include "../common/threading_utils.h" #include "metric_common.h" // MetricNoCache diff --git a/tests/cpp/collective/test_allreduce.cc b/tests/cpp/collective/test_allreduce.cc index 21b4d9fd0..8359d17a6 100644 --- a/tests/cpp/collective/test_allreduce.cc +++ b/tests/cpp/collective/test_allreduce.cc @@ -3,6 +3,8 @@ */ #include +#include // for iota + #include "../../../src/collective/allreduce.h" #include "../../../src/collective/coll.h" // for Coll #include "../../../src/collective/tracker.h" diff --git a/tests/cpp/collective/test_worker.h b/tests/cpp/collective/test_worker.h index acee0f297..7b76052c8 100644 --- a/tests/cpp/collective/test_worker.h +++ b/tests/cpp/collective/test_worker.h @@ -1,11 +1,12 @@ /** - * Copyright 2023, XGBoost Contributors + * Copyright 2023-2024, XGBoost Contributors */ #pragma once #include #include // for seconds #include // for int32_t +#include // for ifstream #include // for string #include // for thread #include // for move diff --git a/tests/cpp/common/test_hist_util.cc b/tests/cpp/common/test_hist_util.cc index 5391bc2cf..24e67c9aa 100644 --- a/tests/cpp/common/test_hist_util.cc +++ b/tests/cpp/common/test_hist_util.cc @@ -1,10 +1,9 @@ /** - * Copyright 2019-2023 by XGBoost Contributors + * Copyright 2019-2024, XGBoost Contributors */ #include #include #include -#include #include "../../../src/common/hist_util.h" #include "../../../src/data/gradient_index.h" @@ -135,7 +134,7 @@ TEST(CutsBuilder, SearchGroupInd) { group[2] = 7; group[3] = 5; - p_mat->SetInfo("group", group.data(), DataType::kUInt32, kNumGroups); + p_mat->SetInfo("group", Make1dInterfaceTest(group.data(), group.size())); HistogramCuts hmat; @@ -348,7 +347,8 @@ void TestSketchFromWeights(bool with_group) { for (size_t i = 0; i < kGroups; ++i) { groups[i] = kRows / kGroups; } - info.SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups); + auto sg = linalg::Make1dInterface(groups.data(), kGroups); + info.SetInfo(ctx, "group", sg.c_str()); } info.num_row_ = kRows; @@ -356,10 +356,10 @@ void TestSketchFromWeights(bool with_group) { // Assign weights. if (with_group) { - m->SetInfo("group", groups.data(), DataType::kUInt32, kGroups); + m->SetInfo("group", Make1dInterfaceTest(groups.data(), kGroups)); } - m->SetInfo("weight", h_weights.data(), DataType::kFloat32, h_weights.size()); + m->SetInfo("weight", Make1dInterfaceTest(h_weights.data(), h_weights.size())); m->Info().num_col_ = kCols; m->Info().num_row_ = kRows; ASSERT_EQ(cuts.Ptrs().size(), kCols + 1); diff --git a/tests/cpp/common/test_hist_util.cu b/tests/cpp/common/test_hist_util.cu index 73af7115c..e37f02ddb 100644 --- a/tests/cpp/common/test_hist_util.cu +++ b/tests/cpp/common/test_hist_util.cu @@ -1,5 +1,5 @@ /** - * Copyright 2019-2023 by XGBoost Contributors + * Copyright 2019-2024, XGBoost Contributors */ #include #include @@ -682,7 +682,7 @@ TEST(HistUtil, DeviceSketchFromGroupWeights) { for (size_t i = 0; i < kGroups; ++i) { groups[i] = kRows / kGroups; } - m->SetInfo("group", groups.data(), DataType::kUInt32, kGroups); + m->SetInfo("group", Make1dInterfaceTest(groups.data(), kGroups)); HistogramCuts weighted_cuts = DeviceSketch(&ctx, m.get(), kBins, 0); // sketch with no weight @@ -727,7 +727,7 @@ void TestAdapterSketchFromWeights(bool with_group) { for (size_t i = 0; i < kGroups; ++i) { groups[i] = kRows / kGroups; } - info.SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups); + info.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), kGroups)); } info.weights_.SetDevice(DeviceOrd::CUDA(0)); @@ -746,10 +746,10 @@ void TestAdapterSketchFromWeights(bool with_group) { auto dmat = GetDMatrixFromData(storage.HostVector(), kRows, kCols); if (with_group) { - dmat->Info().SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups); + dmat->Info().SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), kGroups)); } - dmat->Info().SetInfo(ctx, "weight", h_weights.data(), DataType::kFloat32, h_weights.size()); + dmat->Info().SetInfo(ctx, "weight", Make1dInterfaceTest(h_weights.data(), h_weights.size())); dmat->Info().num_col_ = kCols; dmat->Info().num_row_ = kRows; ASSERT_EQ(cuts.Ptrs().size(), kCols + 1); diff --git a/tests/cpp/common/test_transform_range.cc b/tests/cpp/common/test_transform_range.cc index 24d0267b6..4fc06f639 100644 --- a/tests/cpp/common/test_transform_range.cc +++ b/tests/cpp/common/test_transform_range.cc @@ -1,11 +1,12 @@ /** - * Copyright 2018-2023 by XGBoost Contributors + * Copyright 2018-2024, XGBoost Contributors */ #include #include -#include #include +#include +#include // for iota #include #include "../../../src/common/transform.h" diff --git a/tests/cpp/data/test_array_interface.cu b/tests/cpp/data/test_array_interface.cu index 00b996fb9..be8160c8a 100644 --- a/tests/cpp/data/test_array_interface.cu +++ b/tests/cpp/data/test_array_interface.cu @@ -1,10 +1,11 @@ /** - * Copyright 2021-2023, XGBoost Contributors + * Copyright 2021-2024, XGBoost Contributors */ #include #include -#include "../helpers.h" + #include "../../../src/data/array_interface.h" +#include "../helpers.h" namespace xgboost { diff --git a/tests/cpp/data/test_metainfo.cc b/tests/cpp/data/test_metainfo.cc index 0e63ab8f8..a7d9a0c76 100644 --- a/tests/cpp/data/test_metainfo.cc +++ b/tests/cpp/data/test_metainfo.cc @@ -10,7 +10,6 @@ #include #include -#include "../../../src/common/version.h" #include "../filesystem.h" // dmlc::TemporaryDirectory #include "../helpers.h" // for GMockTHrow #include "xgboost/base.h" @@ -23,23 +22,22 @@ TEST(MetaInfo, GetSet) { double double2[2] = {1.0, 2.0}; EXPECT_EQ(info.labels.Size(), 0); - info.SetInfo(ctx, "label", double2, xgboost::DataType::kFloat32, 2); + info.SetInfo(ctx, "label", Make1dInterfaceTest(double2, 2)); EXPECT_EQ(info.labels.Size(), 2); float float2[2] = {1.0f, 2.0f}; - EXPECT_EQ(info.GetWeight(1), 1.0f) - << "When no weights are given, was expecting default value 1"; - info.SetInfo(ctx, "weight", float2, xgboost::DataType::kFloat32, 2); + EXPECT_EQ(info.GetWeight(1), 1.0f) << "When no weights are given, was expecting default value 1"; + info.SetInfo(ctx, "weight", Make1dInterfaceTest(float2, 2)); EXPECT_EQ(info.GetWeight(1), 2.0f); uint32_t uint32_t2[2] = {1U, 2U}; EXPECT_EQ(info.base_margin_.Size(), 0); - info.SetInfo(ctx, "base_margin", uint32_t2, xgboost::DataType::kUInt32, 2); + info.SetInfo(ctx, "base_margin", Make1dInterfaceTest(uint32_t2, 2)); EXPECT_EQ(info.base_margin_.Size(), 2); uint64_t uint64_t2[2] = {1U, 2U}; EXPECT_EQ(info.group_ptr_.size(), 0); - info.SetInfo(ctx, "group", uint64_t2, xgboost::DataType::kUInt64, 2); + info.SetInfo(ctx, "group", Make1dInterfaceTest(uint64_t2, 2)); ASSERT_EQ(info.group_ptr_.size(), 3); EXPECT_EQ(info.group_ptr_[2], 3); @@ -135,9 +133,9 @@ TEST(MetaInfo, SaveLoadBinary) { }; std::vector values (kRows); std::generate(values.begin(), values.end(), generator); - info.SetInfo(ctx, "label", values.data(), xgboost::DataType::kFloat32, kRows); - info.SetInfo(ctx, "weight", values.data(), xgboost::DataType::kFloat32, kRows); - info.SetInfo(ctx, "base_margin", values.data(), xgboost::DataType::kFloat32, kRows); + info.SetInfo(ctx, "label", Make1dInterfaceTest(values.data(), kRows)); + info.SetInfo(ctx, "weight", Make1dInterfaceTest(values.data(), kRows)); + info.SetInfo(ctx, "base_margin", Make1dInterfaceTest(values.data(), kRows)); info.num_row_ = kRows; info.num_col_ = kCols; @@ -271,7 +269,7 @@ TEST(MetaInfo, CPUQid) { qid[i] = i; } - info.SetInfo(ctx, "qid", qid.data(), xgboost::DataType::kUInt32, info.num_row_); + info.SetInfo(ctx, "qid", Make1dInterfaceTest(qid.data(), info.num_row_)); ASSERT_EQ(info.group_ptr_.size(), info.num_row_ + 1); ASSERT_EQ(info.group_ptr_.front(), 0); ASSERT_EQ(info.group_ptr_.back(), info.num_row_); @@ -288,14 +286,12 @@ TEST(MetaInfo, Validate) { info.num_col_ = 3; std::vector groups (11); Context ctx; - info.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, 11); + info.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size())); EXPECT_THROW(info.Validate(FstCU()), dmlc::Error); std::vector labels(info.num_row_ + 1); EXPECT_THROW( - { - info.SetInfo(ctx, "label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1); - }, + { info.SetInfo(ctx, "label", Make1dInterfaceTest(labels.data(), info.num_row_ + 1)); }, dmlc::Error); // Make overflow data, which can happen when users pass group structure as int @@ -305,13 +301,13 @@ TEST(MetaInfo, Validate) { groups.push_back(1562500); } groups.push_back(static_cast(-1)); - EXPECT_THROW(info.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, groups.size()), + EXPECT_THROW(info.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size())), dmlc::Error); #if defined(XGBOOST_USE_CUDA) info.group_ptr_.clear(); labels.resize(info.num_row_); - info.SetInfo(ctx, "label", labels.data(), xgboost::DataType::kFloat32, info.num_row_); + info.SetInfo(ctx, "label", Make1dInterfaceTest(labels.data(), info.num_row_)); info.labels.SetDevice(FstCU()); EXPECT_THROW(info.Validate(DeviceOrd::CUDA(1)), dmlc::Error); @@ -340,8 +336,8 @@ TEST(MetaInfo, HostExtend) { for (size_t g = 0; g < kRows / per_group; ++g) { groups.emplace_back(per_group); } - lhs.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, groups.size()); - rhs.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, groups.size()); + lhs.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size())); + rhs.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size())); lhs.Extend(rhs, true, true); ASSERT_EQ(lhs.num_row_, kRows * 2); diff --git a/tests/cpp/gbm/test_gbtree.cc b/tests/cpp/gbm/test_gbtree.cc index 8f1588077..dcb89b971 100644 --- a/tests/cpp/gbm/test_gbtree.cc +++ b/tests/cpp/gbm/test_gbtree.cc @@ -408,7 +408,7 @@ class Dart : public testing::TestWithParam { for (size_t i = 0; i < kRows; ++i) { labels[i] = i % 2; } - p_mat->SetInfo("label", labels.data(), DataType::kFloat32, kRows); + p_mat->SetInfo("label", Make1dInterfaceTest(labels.data(), kRows)); auto learner = std::unique_ptr(Learner::Create({p_mat})); learner->SetParam("booster", "dart"); diff --git a/tests/cpp/helpers.cu b/tests/cpp/helpers.cu index db94da27a..f75628953 100644 --- a/tests/cpp/helpers.cu +++ b/tests/cpp/helpers.cu @@ -1,8 +1,11 @@ +/** + * Copyright 2020-2024, XGBoost contributors + */ #include -#include "helpers.h" #include "../../src/data/device_adapter.cuh" #include "../../src/data/iterative_dmatrix.h" +#include "helpers.h" namespace xgboost { diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h index c161856bb..273cc0f00 100644 --- a/tests/cpp/helpers.h +++ b/tests/cpp/helpers.h @@ -15,19 +15,18 @@ #include // std::int32_t #include -#include -#include #include #include -#include #include #include "../../src/collective/communicator-inl.h" #include "../../src/common/common.h" #include "../../src/common/threading_utils.h" -#include "../../src/data/array_interface.h" #include "filesystem.h" // dmlc::TemporaryDirectory #include "xgboost/linalg.h" +#if !defined(_OPENMP) +#include +#endif #if defined(__CUDACC__) #define DeclareUnifiedTest(name) GPU ## name @@ -333,7 +332,7 @@ inline std::vector GenerateRandomCategoricalSingleColumn(int n, size_t nu std::vector x(n); std::mt19937 rng(0); std::uniform_int_distribution dist(0, num_categories - 1); - std::generate(x.begin(), x.end(), [&]() { return dist(rng); }); + std::generate(x.begin(), x.end(), [&]() { return static_cast(dist(rng)); }); // Make sure each category is present for (size_t i = 0; i < num_categories; i++) { x[i] = static_cast(i); @@ -494,6 +493,16 @@ inline int Next(DataIterHandle self) { return static_cast(self)->Next(); } +/** + * @brief Create an array interface for host vector. + */ +template +char const* Make1dInterfaceTest(T const* vec, std::size_t len) { + static thread_local std::string str; + str = linalg::Make1dInterface(vec, len); + return str.c_str(); +} + class RMMAllocator; using RMMAllocatorPtr = std::unique_ptr; RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv); diff --git a/tests/cpp/metric/test_elementwise_metric.h b/tests/cpp/metric/test_elementwise_metric.h index ef34d7651..4435c0807 100644 --- a/tests/cpp/metric/test_elementwise_metric.h +++ b/tests/cpp/metric/test_elementwise_metric.h @@ -5,10 +5,9 @@ #include #include -#include #include +#include // for iota -#include "../../../src/common/linalg_op.h" #include "../helpers.h" namespace xgboost::metric { diff --git a/tests/cpp/objective/test_regression_obj_cpu.cc b/tests/cpp/objective/test_regression_obj_cpu.cc index 3613d0d90..18ee4db7e 100644 --- a/tests/cpp/objective/test_regression_obj_cpu.cc +++ b/tests/cpp/objective/test_regression_obj_cpu.cc @@ -1,14 +1,15 @@ -/*! - * Copyright 2018-2023 XGBoost contributors +/** + * Copyright 2018-2024, XGBoost contributors */ #include #include #include -#include "../../../src/objective/adaptive.h" -#include "../../../src/tree/param.h" // for TrainParam -#include "../helpers.h" +#include // for iota +#include "../../../src/objective/adaptive.h" +#include "../../../src/tree/param.h" // for TrainParam +#include "../helpers.h" #include "test_regression_obj.h" namespace xgboost { diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc index 6fe65b97e..541f53008 100644 --- a/tests/cpp/test_learner.cc +++ b/tests/cpp/test_learner.cc @@ -12,7 +12,6 @@ #include // for int32_t, int64_t, uint32_t #include // for size_t #include // for ofstream -#include // for back_insert_iterator, back_inserter #include // for numeric_limits #include // for map #include // for unique_ptr, shared_ptr, __shared_ptr_... @@ -30,7 +29,6 @@ #include "../../src/common/random.h" // for GlobalRandom #include "dmlc/io.h" // for Stream #include "dmlc/omp.h" // for omp_get_max_threads -#include "dmlc/registry.h" // for Registry #include "filesystem.h" // for TemporaryDirectory #include "helpers.h" // for GetBaseScore, RandomDataGenerator #include "objective_helpers.h" // for MakeObjNamesForTest, ObjTestNameGenerator @@ -103,9 +101,9 @@ TEST(Learner, CheckGroup) { labels[i] = i % 2; } - p_mat->SetInfo("weight", static_cast(weight.data()), DataType::kFloat32, kNumGroups); - p_mat->SetInfo("group", group.data(), DataType::kUInt32, kNumGroups); - p_mat->SetInfo("label", labels.data(), DataType::kFloat32, kNumRows); + p_mat->SetInfo("weight", Make1dInterfaceTest(weight.data(), kNumGroups)); + p_mat->SetInfo("group", Make1dInterfaceTest(group.data(), kNumGroups)); + p_mat->SetInfo("label", Make1dInterfaceTest(labels.data(), kNumRows)); std::vector> mat = {p_mat}; auto learner = std::unique_ptr(Learner::Create(mat)); @@ -115,7 +113,7 @@ TEST(Learner, CheckGroup) { group.resize(kNumGroups+1); group[3] = 4; group[4] = 1; - p_mat->SetInfo("group", group.data(), DataType::kUInt32, kNumGroups+1); + p_mat->SetInfo("group", Make1dInterfaceTest(group.data(), kNumGroups+1)); EXPECT_ANY_THROW(learner->UpdateOneIter(0, p_mat)); } @@ -132,7 +130,7 @@ TEST(Learner, SLOW_CheckMultiBatch) { // NOLINT for (size_t i = 0; i < num_row; ++i) { labels[i] = i % 2; } - dmat->SetInfo("label", labels.data(), DataType::kFloat32, num_row); + dmat->SetInfo("label", Make1dInterfaceTest(labels.data(), num_row)); std::vector> mat{dmat}; auto learner = std::unique_ptr(Learner::Create(mat)); learner->SetParams(Args{{"objective", "binary:logistic"}}); diff --git a/tests/cpp/tree/gpu_hist/test_histogram.cu b/tests/cpp/tree/gpu_hist/test_histogram.cu index f7f2e27ea..84cd956db 100644 --- a/tests/cpp/tree/gpu_hist/test_histogram.cu +++ b/tests/cpp/tree/gpu_hist/test_histogram.cu @@ -239,4 +239,18 @@ void TestAtomicAdd() { TEST(Histogram, AtomicAddInt64) { TestAtomicAdd(); } + +TEST(Histogram, Quantiser) { + auto ctx = MakeCUDACtx(0); + std::size_t n_samples{16}; + HostDeviceVector gpair(n_samples, GradientPair{1.0, 1.0}); + gpair.SetDevice(ctx.Device()); + + auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo()); + for (auto v : gpair.ConstHostVector()) { + auto gh = quantiser.ToFloatingPoint(quantiser.ToFixedPoint(v)); + ASSERT_EQ(gh.GetGrad(), 1.0); + ASSERT_EQ(gh.GetHess(), 1.0); + } +} } // namespace xgboost::tree