Cleanup set info. (#10139)
- Use the array interface internally. - Deprecate `XGDMatrixSetDenseInfo`. - Deprecate `XGDMatrixSetUIntInfo`. - Move the handling of `DataType` into the deprecated C function. --------- Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
parent
6a7c6a8ae6
commit
230010d9a0
8
.github/workflows/r_tests.yml
vendored
8
.github/workflows/r_tests.yml
vendored
@ -110,7 +110,7 @@ jobs:
|
||||
name: Test R package on Debian
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: rhub/debian-gcc-devel
|
||||
image: rhub/debian-gcc-release
|
||||
|
||||
steps:
|
||||
- name: Install system dependencies
|
||||
@ -130,12 +130,12 @@ jobs:
|
||||
- name: Install dependencies
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
/tmp/R-devel/bin/Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')"
|
||||
Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')"
|
||||
|
||||
- name: Test R
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
python3 tests/ci_build/test_r_package.py --r=/tmp/R-devel/bin/R --build-tool=autotools --task=check
|
||||
python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --build-tool=autotools --task=check
|
||||
|
||||
- uses: dorny/paths-filter@v2
|
||||
id: changes
|
||||
@ -147,4 +147,4 @@ jobs:
|
||||
- name: Run document check
|
||||
if: steps.changes.outputs.r_package == 'true'
|
||||
run: |
|
||||
python3 tests/ci_build/test_r_package.py --r=/tmp/R-devel/bin/R --task=doc
|
||||
python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --task=doc
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2015~2023 by XGBoost Contributors
|
||||
* Copyright 2015-2024, XGBoost Contributors
|
||||
* \file c_api.h
|
||||
* \author Tianqi Chen
|
||||
* \brief C API of XGBoost, used for interfacing to other languages.
|
||||
@ -639,21 +639,14 @@ XGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle,
|
||||
* \param len length of array
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle,
|
||||
const char *field,
|
||||
const float *array,
|
||||
XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const float *array,
|
||||
bst_ulong len);
|
||||
/*!
|
||||
* \brief set uint32 vector to a content in info
|
||||
* \param handle a instance of data matrix
|
||||
* \param field field name
|
||||
* \param array pointer to unsigned int vector
|
||||
* \param len length of array
|
||||
* \return 0 when success, -1 when failure happens
|
||||
/**
|
||||
* @deprecated since 2.1.0
|
||||
*
|
||||
* Use @ref XGDMatrixSetInfoFromInterface instead.
|
||||
*/
|
||||
XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle,
|
||||
const char *field,
|
||||
const unsigned *array,
|
||||
XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const unsigned *array,
|
||||
bst_ulong len);
|
||||
|
||||
/*!
|
||||
@ -725,42 +718,13 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,
|
||||
bst_ulong *size,
|
||||
const char ***out_features);
|
||||
|
||||
/*!
|
||||
* \brief Set meta info from dense matrix. Valid field names are:
|
||||
/**
|
||||
* @deprecated since 2.1.0
|
||||
*
|
||||
* - label
|
||||
* - weight
|
||||
* - base_margin
|
||||
* - group
|
||||
* - label_lower_bound
|
||||
* - label_upper_bound
|
||||
* - feature_weights
|
||||
*
|
||||
* \param handle An instance of data matrix
|
||||
* \param field Field name
|
||||
* \param data Pointer to consecutive memory storing data.
|
||||
* \param size Size of the data, this is relative to size of type. (Meaning NOT number
|
||||
* of bytes.)
|
||||
* \param type Indicator of data type. This is defined in xgboost::DataType enum class.
|
||||
* - float = 1
|
||||
* - double = 2
|
||||
* - uint32_t = 3
|
||||
* - uint64_t = 4
|
||||
* \return 0 when success, -1 when failure happens
|
||||
* Use @ref XGDMatrixSetInfoFromInterface instead.
|
||||
*/
|
||||
XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field,
|
||||
void const *data, bst_ulong size, int type);
|
||||
|
||||
/*!
|
||||
* \brief (deprecated) Use XGDMatrixSetUIntInfo instead. Set group of the training matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param group pointer to group size
|
||||
* \param len length of array
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle,
|
||||
const unsigned *group,
|
||||
bst_ulong len);
|
||||
XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void const *data,
|
||||
bst_ulong size, int type);
|
||||
|
||||
/*!
|
||||
* \brief get float info vector from matrix.
|
||||
|
||||
@ -19,7 +19,6 @@
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
@ -137,14 +136,6 @@ class MetaInfo {
|
||||
* \param fo The output stream.
|
||||
*/
|
||||
void SaveBinary(dmlc::Stream* fo) const;
|
||||
/*!
|
||||
* \brief Set information in the meta info.
|
||||
* \param key The key of the information.
|
||||
* \param dptr The data pointer of the source array.
|
||||
* \param dtype The type of the source data.
|
||||
* \param num Number of elements in the source array.
|
||||
*/
|
||||
void SetInfo(Context const& ctx, const char* key, const void* dptr, DataType dtype, size_t num);
|
||||
/*!
|
||||
* \brief Set information in the meta info with array interface.
|
||||
* \param key The key of the information.
|
||||
@ -517,10 +508,6 @@ class DMatrix {
|
||||
DMatrix() = default;
|
||||
/*! \brief meta information of the dataset */
|
||||
virtual MetaInfo& Info() = 0;
|
||||
virtual void SetInfo(const char* key, const void* dptr, DataType dtype, size_t num) {
|
||||
auto const& ctx = *this->Ctx();
|
||||
this->Info().SetInfo(ctx, key, dptr, dtype, num);
|
||||
}
|
||||
virtual void SetInfo(const char* key, std::string const& interface_str) {
|
||||
auto const& ctx = *this->Ctx();
|
||||
this->Info().SetInfo(ctx, key, StringView{interface_str});
|
||||
|
||||
@ -190,13 +190,14 @@ constexpr auto ArrToTuple(T (&arr)[N]) {
|
||||
// uint division optimization inspired by the CIndexer in cupy. Division operation is
|
||||
// slow on both CPU and GPU, especially 64 bit integer. So here we first try to avoid 64
|
||||
// bit when the index is smaller, then try to avoid division when it's exp of 2.
|
||||
template <typename I, int32_t D>
|
||||
template <typename I, std::int32_t D>
|
||||
LINALG_HD auto UnravelImpl(I idx, common::Span<size_t const, D> shape) {
|
||||
size_t index[D]{0};
|
||||
std::size_t index[D]{0};
|
||||
static_assert(std::is_signed<decltype(D)>::value,
|
||||
"Don't change the type without changing the for loop.");
|
||||
auto const sptr = shape.data();
|
||||
for (int32_t dim = D; --dim > 0;) {
|
||||
auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(shape[dim]);
|
||||
auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(sptr[dim]);
|
||||
if (s & (s - 1)) {
|
||||
auto t = idx / s;
|
||||
index[dim] = idx - t * s;
|
||||
@ -745,6 +746,14 @@ auto ArrayInterfaceStr(TensorView<T, D> const &t) {
|
||||
return str;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto Make1dInterface(T const *vec, std::size_t len) {
|
||||
Context ctx;
|
||||
auto t = linalg::MakeTensorView(&ctx, common::Span{vec, len}, len);
|
||||
auto str = linalg::ArrayInterfaceStr(t);
|
||||
return str;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief A tensor storage. To use it for other functionality like slicing one needs to
|
||||
* obtain a view first. This way we can use it on both host and device.
|
||||
|
||||
@ -30,9 +30,8 @@
|
||||
#define XGBOOST_SPAN_H_
|
||||
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/logging.h>
|
||||
|
||||
#include <cinttypes> // size_t
|
||||
#include <cstddef> // size_t
|
||||
#include <cstdio>
|
||||
#include <iterator>
|
||||
#include <limits> // numeric_limits
|
||||
@ -73,8 +72,7 @@
|
||||
|
||||
#endif // defined(_MSC_VER) && _MSC_VER < 1910
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace xgboost::common {
|
||||
|
||||
#if defined(__CUDA_ARCH__)
|
||||
// Usual logging facility is not available inside device code.
|
||||
@ -707,8 +705,8 @@ class IterSpan {
|
||||
return it_ + size();
|
||||
}
|
||||
};
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::common
|
||||
|
||||
|
||||
#if defined(_MSC_VER) &&_MSC_VER < 1910
|
||||
#undef constexpr
|
||||
|
||||
@ -408,7 +408,8 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetFloatI
|
||||
|
||||
jfloat* array = jenv->GetFloatArrayElements(jarray, NULL);
|
||||
bst_ulong len = (bst_ulong)jenv->GetArrayLength(jarray);
|
||||
int ret = XGDMatrixSetFloatInfo(handle, field, (float const *)array, len);
|
||||
auto str = xgboost::linalg::Make1dInterface(array, len);
|
||||
int ret = XGDMatrixSetInfoFromInterface(handle, field, str.c_str());
|
||||
JVM_CHECK_CALL(ret);
|
||||
//release
|
||||
if (field) jenv->ReleaseStringUTFChars(jfield, field);
|
||||
@ -427,7 +428,8 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetUIntIn
|
||||
const char* field = jenv->GetStringUTFChars(jfield, 0);
|
||||
jint* array = jenv->GetIntArrayElements(jarray, NULL);
|
||||
bst_ulong len = (bst_ulong)jenv->GetArrayLength(jarray);
|
||||
int ret = XGDMatrixSetUIntInfo(handle, (char const *)field, (unsigned int const *)array, len);
|
||||
auto str = xgboost::linalg::Make1dInterface(array, len);
|
||||
int ret = XGDMatrixSetInfoFromInterface(handle, field, str.c_str());
|
||||
JVM_CHECK_CALL(ret);
|
||||
//release
|
||||
if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field);
|
||||
@ -730,8 +732,8 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterPredictFr
|
||||
if (jmargin) {
|
||||
margin = jenv->GetFloatArrayElements(jmargin, nullptr);
|
||||
JVM_CHECK_CALL(XGProxyDMatrixCreate(&proxy));
|
||||
JVM_CHECK_CALL(
|
||||
XGDMatrixSetFloatInfo(proxy, "base_margin", margin, jenv->GetArrayLength(jmargin)));
|
||||
auto str = xgboost::linalg::Make1dInterface(margin, jenv->GetArrayLength(jmargin));
|
||||
JVM_CHECK_CALL(XGDMatrixSetInfoFromInterface(proxy, "base_margin", str.c_str()));
|
||||
}
|
||||
|
||||
bst_ulong const *out_shape;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2014-2024 by XGBoost Contributors
|
||||
* Copyright 2014-2024, XGBoost Contributors
|
||||
*/
|
||||
#include "xgboost/c_api.h"
|
||||
|
||||
@ -615,7 +615,7 @@ XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const
|
||||
CHECK_HANDLE();
|
||||
xgboost_CHECK_C_ARG_PTR(field);
|
||||
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
|
||||
p_fmat->SetInfo(field, info, xgboost::DataType::kFloat32, len);
|
||||
p_fmat->SetInfo(field, linalg::Make1dInterface(info, len));
|
||||
API_END();
|
||||
}
|
||||
|
||||
@ -634,8 +634,9 @@ XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
xgboost_CHECK_C_ARG_PTR(field);
|
||||
LOG(WARNING) << error::DeprecatedFunc(__func__, "2.1.0", "XGDMatrixSetInfoFromInterface");
|
||||
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
|
||||
p_fmat->SetInfo(field, info, xgboost::DataType::kUInt32, len);
|
||||
p_fmat->SetInfo(field, linalg::Make1dInterface(info, len));
|
||||
API_END();
|
||||
}
|
||||
|
||||
@ -679,19 +680,52 @@ XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void
|
||||
xgboost::bst_ulong size, int type) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
LOG(WARNING) << error::DeprecatedFunc(__func__, "2.1.0", "XGDMatrixSetInfoFromInterface");
|
||||
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
|
||||
CHECK(type >= 1 && type <= 4);
|
||||
xgboost_CHECK_C_ARG_PTR(field);
|
||||
p_fmat->SetInfo(field, data, static_cast<DataType>(type), size);
|
||||
API_END();
|
||||
|
||||
Context ctx;
|
||||
auto dtype = static_cast<DataType>(type);
|
||||
std::string str;
|
||||
auto proc = [&](auto cast_d_ptr) {
|
||||
using T = std::remove_pointer_t<decltype(cast_d_ptr)>;
|
||||
auto t = linalg::TensorView<T, 1>(
|
||||
common::Span<T>{cast_d_ptr, static_cast<typename common::Span<T>::index_type>(size)},
|
||||
{size}, DeviceOrd::CPU());
|
||||
CHECK(t.CContiguous());
|
||||
Json interface{linalg::ArrayInterface(t)};
|
||||
CHECK(ArrayInterface<1>{interface}.is_contiguous);
|
||||
str = Json::Dump(interface);
|
||||
return str;
|
||||
};
|
||||
|
||||
// Legacy code using XGBoost dtype, which is a small subset of array interface types.
|
||||
switch (dtype) {
|
||||
case xgboost::DataType::kFloat32: {
|
||||
auto cast_ptr = reinterpret_cast<const float *>(data);
|
||||
p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr));
|
||||
break;
|
||||
}
|
||||
case xgboost::DataType::kDouble: {
|
||||
auto cast_ptr = reinterpret_cast<const double *>(data);
|
||||
p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr));
|
||||
break;
|
||||
}
|
||||
case xgboost::DataType::kUInt32: {
|
||||
auto cast_ptr = reinterpret_cast<const uint32_t *>(data);
|
||||
p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr));
|
||||
break;
|
||||
}
|
||||
case xgboost::DataType::kUInt64: {
|
||||
auto cast_ptr = reinterpret_cast<const uint64_t *>(data);
|
||||
p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG(FATAL) << "Unknown data type" << static_cast<uint8_t>(dtype);
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle, const unsigned *group, xgboost::bst_ulong len) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
LOG(WARNING) << "XGDMatrixSetGroup is deprecated, use `XGDMatrixSetUIntInfo` instead.";
|
||||
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
|
||||
p_fmat->SetInfo("group", group, xgboost::DataType::kUInt32, len);
|
||||
API_END();
|
||||
}
|
||||
|
||||
@ -987,7 +1021,7 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, bs
|
||||
bst_float *hess, xgboost::bst_ulong len) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
error::DeprecatedFunc(__func__, "2.1.0", "XGBoosterTrainOneIter");
|
||||
LOG(WARNING) << error::DeprecatedFunc(__func__, "2.1.0", "XGBoosterTrainOneIter");
|
||||
auto *learner = static_cast<Learner *>(handle);
|
||||
auto ctx = learner->Ctx()->MakeCPU();
|
||||
|
||||
|
||||
@ -1,17 +1,18 @@
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost Contributors
|
||||
* Copyright 2021-2024, XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_C_API_C_API_UTILS_H_
|
||||
#define XGBOOST_C_API_C_API_UTILS_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <algorithm> // for min
|
||||
#include <cstddef> // for size_t
|
||||
#include <functional> // for multiplies
|
||||
#include <memory> // for shared_ptr
|
||||
#include <numeric> // for accumulate
|
||||
#include <string> // for string
|
||||
#include <tuple> // for make_tuple
|
||||
#include <utility> // for move
|
||||
#include <vector>
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../common/json_utils.h" // for TypeCheck
|
||||
#include "xgboost/c_api.h"
|
||||
|
||||
@ -2,6 +2,8 @@
|
||||
* Copyright 2023 XGBoost contributors
|
||||
*/
|
||||
#if defined(XGBOOST_USE_NCCL)
|
||||
#include <numeric> // for accumulate
|
||||
|
||||
#include "comm.cuh"
|
||||
#include "nccl_device_communicator.cuh"
|
||||
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
#include "xgboost/logging.h"
|
||||
|
||||
namespace xgboost::error {
|
||||
std::string DeprecatedFunc(StringView old, StringView since, StringView replacement) {
|
||||
[[nodiscard]] std::string DeprecatedFunc(StringView old, StringView since, StringView replacement) {
|
||||
std::stringstream ss;
|
||||
ss << "`" << old << "` is deprecated since" << since << ", use `" << replacement << "` instead.";
|
||||
return ss.str();
|
||||
|
||||
@ -89,7 +89,7 @@ void WarnDeprecatedGPUId();
|
||||
|
||||
void WarnEmptyDataset();
|
||||
|
||||
std::string DeprecatedFunc(StringView old, StringView since, StringView replacement);
|
||||
[[nodiscard]] std::string DeprecatedFunc(StringView old, StringView since, StringView replacement);
|
||||
|
||||
constexpr StringView InvalidCUDAOrdinal() {
|
||||
return "Invalid device. `device` is required to be CUDA and there must be at least one GPU "
|
||||
|
||||
@ -6,7 +6,6 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/host_device_vector.h"
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
#include "quantile.h"
|
||||
|
||||
#include <limits>
|
||||
#include <numeric> // for partial_sum
|
||||
#include <utility>
|
||||
|
||||
#include "../collective/aggregator.h"
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
* Copyright 2020-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <thrust/binary_search.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
@ -9,7 +9,7 @@
|
||||
#include <thrust/unique.h>
|
||||
|
||||
#include <limits> // std::numeric_limits
|
||||
#include <memory>
|
||||
#include <numeric> // for partial_sum
|
||||
#include <utility>
|
||||
|
||||
#include "../collective/communicator-inl.cuh"
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
/**
|
||||
* Copyright 2020-2024, XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_COMMON_QUANTILE_CUH_
|
||||
#define XGBOOST_COMMON_QUANTILE_CUH_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "xgboost/span.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "device_helpers.cuh"
|
||||
|
||||
@ -11,7 +11,6 @@
|
||||
#include <cmath> // for abs
|
||||
#include <cstdint> // for uint64_t, int32_t, uint8_t, uint32_t
|
||||
#include <cstring> // for size_t, strcmp, memcpy
|
||||
#include <exception> // for exception
|
||||
#include <iostream> // for operator<<, basic_ostream, basic_ostream::op...
|
||||
#include <map> // for map, operator!=
|
||||
#include <numeric> // for accumulate, partial_sum
|
||||
@ -22,7 +21,6 @@
|
||||
#include "../collective/communicator.h" // for Operation
|
||||
#include "../common/algorithm.h" // for StableSort
|
||||
#include "../common/api_entry.h" // for XGBAPIThreadLocalEntry
|
||||
#include "../common/common.h" // for Split
|
||||
#include "../common/error_msg.h" // for GroupSize, GroupWeight, InfInData
|
||||
#include "../common/group_data.h" // for ParallelGroupBuilder
|
||||
#include "../common/io.h" // for PeekableInStream
|
||||
@ -473,11 +471,11 @@ void MetaInfo::SetInfo(Context const& ctx, StringView key, StringView interface_
|
||||
<< ", must have at least 1 column even if it's empty.";
|
||||
auto const& first = get<Object const>(array.front());
|
||||
auto ptr = ArrayInterfaceHandler::GetPtrFromArrayData<void*>(first);
|
||||
is_cuda = ArrayInterfaceHandler::IsCudaPtr(ptr);
|
||||
is_cuda = first.find("stream") != first.cend() || ArrayInterfaceHandler::IsCudaPtr(ptr);
|
||||
} else {
|
||||
auto const& first = get<Object const>(j_interface);
|
||||
auto ptr = ArrayInterfaceHandler::GetPtrFromArrayData<void*>(first);
|
||||
is_cuda = ArrayInterfaceHandler::IsCudaPtr(ptr);
|
||||
is_cuda = first.find("stream") != first.cend() || ArrayInterfaceHandler::IsCudaPtr(ptr);
|
||||
}
|
||||
|
||||
if (is_cuda) {
|
||||
@ -567,46 +565,6 @@ void MetaInfo::SetInfoFromHost(Context const& ctx, StringView key, Json arr) {
|
||||
}
|
||||
}
|
||||
|
||||
void MetaInfo::SetInfo(Context const& ctx, const char* key, const void* dptr, DataType dtype,
|
||||
size_t num) {
|
||||
CHECK(key);
|
||||
auto proc = [&](auto cast_d_ptr) {
|
||||
using T = std::remove_pointer_t<decltype(cast_d_ptr)>;
|
||||
auto t = linalg::TensorView<T, 1>(common::Span<T>{cast_d_ptr, num}, {num}, DeviceOrd::CPU());
|
||||
CHECK(t.CContiguous());
|
||||
Json interface {
|
||||
linalg::ArrayInterface(t)
|
||||
};
|
||||
assert(ArrayInterface<1>{interface}.is_contiguous);
|
||||
return interface;
|
||||
};
|
||||
// Legacy code using XGBoost dtype, which is a small subset of array interface types.
|
||||
switch (dtype) {
|
||||
case xgboost::DataType::kFloat32: {
|
||||
auto cast_ptr = reinterpret_cast<const float*>(dptr);
|
||||
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
|
||||
break;
|
||||
}
|
||||
case xgboost::DataType::kDouble: {
|
||||
auto cast_ptr = reinterpret_cast<const double*>(dptr);
|
||||
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
|
||||
break;
|
||||
}
|
||||
case xgboost::DataType::kUInt32: {
|
||||
auto cast_ptr = reinterpret_cast<const uint32_t*>(dptr);
|
||||
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
|
||||
break;
|
||||
}
|
||||
case xgboost::DataType::kUInt64: {
|
||||
auto cast_ptr = reinterpret_cast<const uint64_t*>(dptr);
|
||||
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG(FATAL) << "Unknown data type" << static_cast<uint8_t>(dtype);
|
||||
}
|
||||
}
|
||||
|
||||
void MetaInfo::GetInfo(char const* key, bst_ulong* out_len, DataType dtype,
|
||||
const void** out_dptr) const {
|
||||
if (dtype == DataType::kFloat32) {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
* Copyright 2021-2024, XGBoost contributors
|
||||
*/
|
||||
#include "file_iterator.h"
|
||||
|
||||
@ -11,6 +11,9 @@
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../common/common.h" // for Split
|
||||
#include "xgboost/linalg.h" // for ArrayInterfaceStr, MakeVec
|
||||
#include "xgboost/linalg.h"
|
||||
#include "xgboost/logging.h" // for CHECK
|
||||
#include "xgboost/string_view.h" // for operator<<, StringView
|
||||
|
||||
namespace xgboost::data {
|
||||
@ -28,10 +31,10 @@ std::string ValidateFileFormat(std::string const& uri) {
|
||||
for (size_t i = 0; i < arg_list.size(); ++i) {
|
||||
std::istringstream is(arg_list[i]);
|
||||
std::pair<std::string, std::string> kv;
|
||||
CHECK(std::getline(is, kv.first, '=')) << "Invalid uri argument format"
|
||||
<< " for key in arg " << i + 1;
|
||||
CHECK(std::getline(is, kv.second)) << "Invalid uri argument format"
|
||||
<< " for value in arg " << i + 1;
|
||||
CHECK(std::getline(is, kv.first, '='))
|
||||
<< "Invalid uri argument format" << " for key in arg " << i + 1;
|
||||
CHECK(std::getline(is, kv.second))
|
||||
<< "Invalid uri argument format" << " for value in arg " << i + 1;
|
||||
args.insert(kv);
|
||||
}
|
||||
if (args.find("format") == args.cend()) {
|
||||
@ -48,4 +51,41 @@ std::string ValidateFileFormat(std::string const& uri) {
|
||||
return name_args[0] + "?" + name_args[1] + '#' + name_args_cache[1];
|
||||
}
|
||||
}
|
||||
|
||||
int FileIterator::Next() {
|
||||
CHECK(parser_);
|
||||
if (parser_->Next()) {
|
||||
row_block_ = parser_->Value();
|
||||
|
||||
indptr_ = linalg::Make1dInterface(row_block_.offset, row_block_.size + 1);
|
||||
values_ = linalg::Make1dInterface(row_block_.value, row_block_.offset[row_block_.size]);
|
||||
indices_ = linalg::Make1dInterface(row_block_.index, row_block_.offset[row_block_.size]);
|
||||
|
||||
size_t n_columns =
|
||||
*std::max_element(row_block_.index, row_block_.index + row_block_.offset[row_block_.size]);
|
||||
// dmlc parser converts 1-based indexing back to 0-based indexing so we can ignore
|
||||
// this condition and just add 1 to n_columns
|
||||
n_columns += 1;
|
||||
|
||||
XGProxyDMatrixSetDataCSR(proxy_, indptr_.c_str(), indices_.c_str(), values_.c_str(), n_columns);
|
||||
|
||||
if (row_block_.label) {
|
||||
auto str = linalg::Make1dInterface(row_block_.label, row_block_.size);
|
||||
XGDMatrixSetInfoFromInterface(proxy_, "label", str.c_str());
|
||||
}
|
||||
if (row_block_.qid) {
|
||||
auto str = linalg::Make1dInterface(row_block_.qid, row_block_.size);
|
||||
XGDMatrixSetInfoFromInterface(proxy_, "qid", str.c_str());
|
||||
}
|
||||
if (row_block_.weight) {
|
||||
auto str = linalg::Make1dInterface(row_block_.weight, row_block_.size);
|
||||
XGDMatrixSetInfoFromInterface(proxy_, "weight", str.c_str());
|
||||
}
|
||||
// Continue iteration
|
||||
return true;
|
||||
} else {
|
||||
// Stop iteration
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::data
|
||||
|
||||
@ -1,20 +1,16 @@
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
* Copyright 2021-2024, XGBoost contributors
|
||||
*/
|
||||
#ifndef XGBOOST_DATA_FILE_ITERATOR_H_
|
||||
#define XGBOOST_DATA_FILE_ITERATOR_H_
|
||||
|
||||
#include <algorithm> // for max_element
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for uint32_t
|
||||
#include <memory> // for unique_ptr
|
||||
#include <string> // for string
|
||||
#include <utility> // for move
|
||||
|
||||
#include "dmlc/data.h" // for RowBlock, Parser
|
||||
#include "xgboost/c_api.h" // for XGDMatrixSetDenseInfo, XGDMatrixFree, XGProxyDMatrixCreate
|
||||
#include "xgboost/linalg.h" // for ArrayInterfaceStr, MakeVec
|
||||
#include "xgboost/logging.h" // for CHECK
|
||||
#include "xgboost/c_api.h" // for XGDMatrixFree, XGProxyDMatrixCreate
|
||||
|
||||
namespace xgboost::data {
|
||||
[[nodiscard]] std::string ValidateFileFormat(std::string const& uri);
|
||||
@ -53,41 +49,7 @@ class FileIterator {
|
||||
XGDMatrixFree(proxy_);
|
||||
}
|
||||
|
||||
int Next() {
|
||||
CHECK(parser_);
|
||||
if (parser_->Next()) {
|
||||
row_block_ = parser_->Value();
|
||||
using linalg::MakeVec;
|
||||
|
||||
indptr_ = ArrayInterfaceStr(MakeVec(row_block_.offset, row_block_.size + 1));
|
||||
values_ = ArrayInterfaceStr(MakeVec(row_block_.value, row_block_.offset[row_block_.size]));
|
||||
indices_ = ArrayInterfaceStr(MakeVec(row_block_.index, row_block_.offset[row_block_.size]));
|
||||
|
||||
size_t n_columns = *std::max_element(row_block_.index,
|
||||
row_block_.index + row_block_.offset[row_block_.size]);
|
||||
// dmlc parser converts 1-based indexing back to 0-based indexing so we can ignore
|
||||
// this condition and just add 1 to n_columns
|
||||
n_columns += 1;
|
||||
|
||||
XGProxyDMatrixSetDataCSR(proxy_, indptr_.c_str(), indices_.c_str(),
|
||||
values_.c_str(), n_columns);
|
||||
|
||||
if (row_block_.label) {
|
||||
XGDMatrixSetDenseInfo(proxy_, "label", row_block_.label, row_block_.size, 1);
|
||||
}
|
||||
if (row_block_.qid) {
|
||||
XGDMatrixSetDenseInfo(proxy_, "qid", row_block_.qid, row_block_.size, 1);
|
||||
}
|
||||
if (row_block_.weight) {
|
||||
XGDMatrixSetDenseInfo(proxy_, "weight", row_block_.weight, row_block_.size, 1);
|
||||
}
|
||||
// Continue iteration
|
||||
return true;
|
||||
} else {
|
||||
// Stop iteration
|
||||
return false;
|
||||
}
|
||||
}
|
||||
int Next();
|
||||
|
||||
auto Proxy() -> decltype(proxy_) { return proxy_; }
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2014-2023 by Contributors
|
||||
* Copyright 2014-2024, XGBoost Contributors
|
||||
* \file gbtree.cc
|
||||
* \brief gradient boosted tree implementation.
|
||||
* \author Tianqi Chen
|
||||
@ -11,14 +11,12 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint> // std::int32_t
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <numeric> // for iota
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "../common/common.h"
|
||||
#include "../common/timer.h"
|
||||
#include "../tree/param.h" // TrainParam
|
||||
#include "gbtree_model.h"
|
||||
|
||||
@ -10,15 +10,15 @@
|
||||
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <numeric> // for accumulate
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/common.h" // MetricNoCache
|
||||
#include "../common/common.h" // for AssertGPUSupport
|
||||
#include "../common/math.h"
|
||||
#include "../common/optional_weight.h" // OptionalWeights
|
||||
#include "../common/pseudo_huber.h"
|
||||
#include "../common/quantile_loss_utils.h" // QuantileLossParam
|
||||
#include "../common/threading_utils.h"
|
||||
#include "metric_common.h"
|
||||
#include "metric_common.h" // MetricNoCache
|
||||
#include "xgboost/collective/result.h" // for SafeColl
|
||||
#include "xgboost/metric.h"
|
||||
|
||||
|
||||
@ -9,8 +9,6 @@
|
||||
#include <string>
|
||||
|
||||
#include "../collective/aggregator.h"
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/common.h"
|
||||
#include "xgboost/metric.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
@ -9,8 +9,8 @@
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <numeric> // for accumulate
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/math.h"
|
||||
#include "../common/threading_utils.h"
|
||||
#include "metric_common.h" // MetricNoCache
|
||||
|
||||
@ -9,10 +9,9 @@
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <numeric> // for accumulate
|
||||
#include <vector>
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/math.h"
|
||||
#include "../common/survival_util.h"
|
||||
#include "../common/threading_utils.h"
|
||||
#include "metric_common.h" // MetricNoCache
|
||||
|
||||
@ -3,6 +3,8 @@
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <numeric> // for iota
|
||||
|
||||
#include "../../../src/collective/allreduce.h"
|
||||
#include "../../../src/collective/coll.h" // for Coll
|
||||
#include "../../../src/collective/tracker.h"
|
||||
|
||||
@ -1,11 +1,12 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
* Copyright 2023-2024, XGBoost Contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <chrono> // for seconds
|
||||
#include <cstdint> // for int32_t
|
||||
#include <fstream> // for ifstream
|
||||
#include <string> // for string
|
||||
#include <thread> // for thread
|
||||
#include <utility> // for move
|
||||
|
||||
@ -1,10 +1,9 @@
|
||||
/**
|
||||
* Copyright 2019-2023 by XGBoost Contributors
|
||||
* Copyright 2019-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "../../../src/common/hist_util.h"
|
||||
#include "../../../src/data/gradient_index.h"
|
||||
@ -135,7 +134,7 @@ TEST(CutsBuilder, SearchGroupInd) {
|
||||
group[2] = 7;
|
||||
group[3] = 5;
|
||||
|
||||
p_mat->SetInfo("group", group.data(), DataType::kUInt32, kNumGroups);
|
||||
p_mat->SetInfo("group", Make1dInterfaceTest(group.data(), group.size()));
|
||||
|
||||
HistogramCuts hmat;
|
||||
|
||||
@ -348,7 +347,8 @@ void TestSketchFromWeights(bool with_group) {
|
||||
for (size_t i = 0; i < kGroups; ++i) {
|
||||
groups[i] = kRows / kGroups;
|
||||
}
|
||||
info.SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups);
|
||||
auto sg = linalg::Make1dInterface(groups.data(), kGroups);
|
||||
info.SetInfo(ctx, "group", sg.c_str());
|
||||
}
|
||||
|
||||
info.num_row_ = kRows;
|
||||
@ -356,10 +356,10 @@ void TestSketchFromWeights(bool with_group) {
|
||||
|
||||
// Assign weights.
|
||||
if (with_group) {
|
||||
m->SetInfo("group", groups.data(), DataType::kUInt32, kGroups);
|
||||
m->SetInfo("group", Make1dInterfaceTest(groups.data(), kGroups));
|
||||
}
|
||||
|
||||
m->SetInfo("weight", h_weights.data(), DataType::kFloat32, h_weights.size());
|
||||
m->SetInfo("weight", Make1dInterfaceTest(h_weights.data(), h_weights.size()));
|
||||
m->Info().num_col_ = kCols;
|
||||
m->Info().num_row_ = kRows;
|
||||
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2019-2023 by XGBoost Contributors
|
||||
* Copyright 2019-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/device_vector.h>
|
||||
@ -682,7 +682,7 @@ TEST(HistUtil, DeviceSketchFromGroupWeights) {
|
||||
for (size_t i = 0; i < kGroups; ++i) {
|
||||
groups[i] = kRows / kGroups;
|
||||
}
|
||||
m->SetInfo("group", groups.data(), DataType::kUInt32, kGroups);
|
||||
m->SetInfo("group", Make1dInterfaceTest(groups.data(), kGroups));
|
||||
HistogramCuts weighted_cuts = DeviceSketch(&ctx, m.get(), kBins, 0);
|
||||
|
||||
// sketch with no weight
|
||||
@ -727,7 +727,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
for (size_t i = 0; i < kGroups; ++i) {
|
||||
groups[i] = kRows / kGroups;
|
||||
}
|
||||
info.SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups);
|
||||
info.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), kGroups));
|
||||
}
|
||||
|
||||
info.weights_.SetDevice(DeviceOrd::CUDA(0));
|
||||
@ -746,10 +746,10 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
|
||||
auto dmat = GetDMatrixFromData(storage.HostVector(), kRows, kCols);
|
||||
if (with_group) {
|
||||
dmat->Info().SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups);
|
||||
dmat->Info().SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), kGroups));
|
||||
}
|
||||
|
||||
dmat->Info().SetInfo(ctx, "weight", h_weights.data(), DataType::kFloat32, h_weights.size());
|
||||
dmat->Info().SetInfo(ctx, "weight", Make1dInterfaceTest(h_weights.data(), h_weights.size()));
|
||||
dmat->Info().num_col_ = kCols;
|
||||
dmat->Info().num_row_ = kRows;
|
||||
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);
|
||||
|
||||
@ -1,11 +1,12 @@
|
||||
/**
|
||||
* Copyright 2018-2023 by XGBoost Contributors
|
||||
* Copyright 2018-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/span.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/span.h>
|
||||
|
||||
#include <numeric> // for iota
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/common/transform.h"
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost Contributors
|
||||
* Copyright 2021-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include "../helpers.h"
|
||||
|
||||
#include "../../../src/data/array_interface.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
|
||||
@ -10,7 +10,6 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "../../../src/common/version.h"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h" // for GMockTHrow
|
||||
#include "xgboost/base.h"
|
||||
@ -23,23 +22,22 @@ TEST(MetaInfo, GetSet) {
|
||||
double double2[2] = {1.0, 2.0};
|
||||
|
||||
EXPECT_EQ(info.labels.Size(), 0);
|
||||
info.SetInfo(ctx, "label", double2, xgboost::DataType::kFloat32, 2);
|
||||
info.SetInfo(ctx, "label", Make1dInterfaceTest(double2, 2));
|
||||
EXPECT_EQ(info.labels.Size(), 2);
|
||||
|
||||
float float2[2] = {1.0f, 2.0f};
|
||||
EXPECT_EQ(info.GetWeight(1), 1.0f)
|
||||
<< "When no weights are given, was expecting default value 1";
|
||||
info.SetInfo(ctx, "weight", float2, xgboost::DataType::kFloat32, 2);
|
||||
EXPECT_EQ(info.GetWeight(1), 1.0f) << "When no weights are given, was expecting default value 1";
|
||||
info.SetInfo(ctx, "weight", Make1dInterfaceTest(float2, 2));
|
||||
EXPECT_EQ(info.GetWeight(1), 2.0f);
|
||||
|
||||
uint32_t uint32_t2[2] = {1U, 2U};
|
||||
EXPECT_EQ(info.base_margin_.Size(), 0);
|
||||
info.SetInfo(ctx, "base_margin", uint32_t2, xgboost::DataType::kUInt32, 2);
|
||||
info.SetInfo(ctx, "base_margin", Make1dInterfaceTest(uint32_t2, 2));
|
||||
EXPECT_EQ(info.base_margin_.Size(), 2);
|
||||
|
||||
uint64_t uint64_t2[2] = {1U, 2U};
|
||||
EXPECT_EQ(info.group_ptr_.size(), 0);
|
||||
info.SetInfo(ctx, "group", uint64_t2, xgboost::DataType::kUInt64, 2);
|
||||
info.SetInfo(ctx, "group", Make1dInterfaceTest(uint64_t2, 2));
|
||||
ASSERT_EQ(info.group_ptr_.size(), 3);
|
||||
EXPECT_EQ(info.group_ptr_[2], 3);
|
||||
|
||||
@ -135,9 +133,9 @@ TEST(MetaInfo, SaveLoadBinary) {
|
||||
};
|
||||
std::vector<float> values (kRows);
|
||||
std::generate(values.begin(), values.end(), generator);
|
||||
info.SetInfo(ctx, "label", values.data(), xgboost::DataType::kFloat32, kRows);
|
||||
info.SetInfo(ctx, "weight", values.data(), xgboost::DataType::kFloat32, kRows);
|
||||
info.SetInfo(ctx, "base_margin", values.data(), xgboost::DataType::kFloat32, kRows);
|
||||
info.SetInfo(ctx, "label", Make1dInterfaceTest(values.data(), kRows));
|
||||
info.SetInfo(ctx, "weight", Make1dInterfaceTest(values.data(), kRows));
|
||||
info.SetInfo(ctx, "base_margin", Make1dInterfaceTest(values.data(), kRows));
|
||||
|
||||
info.num_row_ = kRows;
|
||||
info.num_col_ = kCols;
|
||||
@ -271,7 +269,7 @@ TEST(MetaInfo, CPUQid) {
|
||||
qid[i] = i;
|
||||
}
|
||||
|
||||
info.SetInfo(ctx, "qid", qid.data(), xgboost::DataType::kUInt32, info.num_row_);
|
||||
info.SetInfo(ctx, "qid", Make1dInterfaceTest(qid.data(), info.num_row_));
|
||||
ASSERT_EQ(info.group_ptr_.size(), info.num_row_ + 1);
|
||||
ASSERT_EQ(info.group_ptr_.front(), 0);
|
||||
ASSERT_EQ(info.group_ptr_.back(), info.num_row_);
|
||||
@ -288,14 +286,12 @@ TEST(MetaInfo, Validate) {
|
||||
info.num_col_ = 3;
|
||||
std::vector<xgboost::bst_group_t> groups (11);
|
||||
Context ctx;
|
||||
info.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, 11);
|
||||
info.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size()));
|
||||
EXPECT_THROW(info.Validate(FstCU()), dmlc::Error);
|
||||
|
||||
std::vector<float> labels(info.num_row_ + 1);
|
||||
EXPECT_THROW(
|
||||
{
|
||||
info.SetInfo(ctx, "label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1);
|
||||
},
|
||||
{ info.SetInfo(ctx, "label", Make1dInterfaceTest(labels.data(), info.num_row_ + 1)); },
|
||||
dmlc::Error);
|
||||
|
||||
// Make overflow data, which can happen when users pass group structure as int
|
||||
@ -305,13 +301,13 @@ TEST(MetaInfo, Validate) {
|
||||
groups.push_back(1562500);
|
||||
}
|
||||
groups.push_back(static_cast<xgboost::bst_group_t>(-1));
|
||||
EXPECT_THROW(info.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, groups.size()),
|
||||
EXPECT_THROW(info.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size())),
|
||||
dmlc::Error);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
info.group_ptr_.clear();
|
||||
labels.resize(info.num_row_);
|
||||
info.SetInfo(ctx, "label", labels.data(), xgboost::DataType::kFloat32, info.num_row_);
|
||||
info.SetInfo(ctx, "label", Make1dInterfaceTest(labels.data(), info.num_row_));
|
||||
info.labels.SetDevice(FstCU());
|
||||
EXPECT_THROW(info.Validate(DeviceOrd::CUDA(1)), dmlc::Error);
|
||||
|
||||
@ -340,8 +336,8 @@ TEST(MetaInfo, HostExtend) {
|
||||
for (size_t g = 0; g < kRows / per_group; ++g) {
|
||||
groups.emplace_back(per_group);
|
||||
}
|
||||
lhs.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, groups.size());
|
||||
rhs.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, groups.size());
|
||||
lhs.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size()));
|
||||
rhs.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size()));
|
||||
|
||||
lhs.Extend(rhs, true, true);
|
||||
ASSERT_EQ(lhs.num_row_, kRows * 2);
|
||||
|
||||
@ -408,7 +408,7 @@ class Dart : public testing::TestWithParam<char const*> {
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
labels[i] = i % 2;
|
||||
}
|
||||
p_mat->SetInfo("label", labels.data(), DataType::kFloat32, kRows);
|
||||
p_mat->SetInfo("label", Make1dInterfaceTest(labels.data(), kRows));
|
||||
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create({p_mat}));
|
||||
learner->SetParam("booster", "dart");
|
||||
|
||||
@ -1,8 +1,11 @@
|
||||
/**
|
||||
* Copyright 2020-2024, XGBoost contributors
|
||||
*/
|
||||
#include <xgboost/c_api.h>
|
||||
|
||||
#include "helpers.h"
|
||||
#include "../../src/data/device_adapter.cuh"
|
||||
#include "../../src/data/iterative_dmatrix.h"
|
||||
#include "helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
|
||||
@ -15,19 +15,18 @@
|
||||
|
||||
#include <cstdint> // std::int32_t
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "../../src/collective/communicator-inl.h"
|
||||
#include "../../src/common/common.h"
|
||||
#include "../../src/common/threading_utils.h"
|
||||
#include "../../src/data/array_interface.h"
|
||||
#include "filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "xgboost/linalg.h"
|
||||
#if !defined(_OPENMP)
|
||||
#include <thread>
|
||||
#endif
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
#define DeclareUnifiedTest(name) GPU ## name
|
||||
@ -333,7 +332,7 @@ inline std::vector<float> GenerateRandomCategoricalSingleColumn(int n, size_t nu
|
||||
std::vector<float> x(n);
|
||||
std::mt19937 rng(0);
|
||||
std::uniform_int_distribution<size_t> dist(0, num_categories - 1);
|
||||
std::generate(x.begin(), x.end(), [&]() { return dist(rng); });
|
||||
std::generate(x.begin(), x.end(), [&]() { return static_cast<float>(dist(rng)); });
|
||||
// Make sure each category is present
|
||||
for (size_t i = 0; i < num_categories; i++) {
|
||||
x[i] = static_cast<decltype(x)::value_type>(i);
|
||||
@ -494,6 +493,16 @@ inline int Next(DataIterHandle self) {
|
||||
return static_cast<ArrayIterForTest*>(self)->Next();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Create an array interface for host vector.
|
||||
*/
|
||||
template <typename T>
|
||||
char const* Make1dInterfaceTest(T const* vec, std::size_t len) {
|
||||
static thread_local std::string str;
|
||||
str = linalg::Make1dInterface(vec, len);
|
||||
return str.c_str();
|
||||
}
|
||||
|
||||
class RMMAllocator;
|
||||
using RMMAllocatorPtr = std::unique_ptr<RMMAllocator, void(*)(RMMAllocator*)>;
|
||||
RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv);
|
||||
|
||||
@ -5,10 +5,9 @@
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/metric.h>
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <numeric> // for iota
|
||||
|
||||
#include "../../../src/common/linalg_op.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost::metric {
|
||||
|
||||
@ -1,14 +1,15 @@
|
||||
/*!
|
||||
* Copyright 2018-2023 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2018-2024, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/objective.h>
|
||||
|
||||
#include <numeric> // for iota
|
||||
|
||||
#include "../../../src/objective/adaptive.h"
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
|
||||
#include "test_regression_obj.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
@ -12,7 +12,6 @@
|
||||
#include <cinttypes> // for int32_t, int64_t, uint32_t
|
||||
#include <cstddef> // for size_t
|
||||
#include <iosfwd> // for ofstream
|
||||
#include <iterator> // for back_insert_iterator, back_inserter
|
||||
#include <limits> // for numeric_limits
|
||||
#include <map> // for map
|
||||
#include <memory> // for unique_ptr, shared_ptr, __shared_ptr_...
|
||||
@ -30,7 +29,6 @@
|
||||
#include "../../src/common/random.h" // for GlobalRandom
|
||||
#include "dmlc/io.h" // for Stream
|
||||
#include "dmlc/omp.h" // for omp_get_max_threads
|
||||
#include "dmlc/registry.h" // for Registry
|
||||
#include "filesystem.h" // for TemporaryDirectory
|
||||
#include "helpers.h" // for GetBaseScore, RandomDataGenerator
|
||||
#include "objective_helpers.h" // for MakeObjNamesForTest, ObjTestNameGenerator
|
||||
@ -103,9 +101,9 @@ TEST(Learner, CheckGroup) {
|
||||
labels[i] = i % 2;
|
||||
}
|
||||
|
||||
p_mat->SetInfo("weight", static_cast<void *>(weight.data()), DataType::kFloat32, kNumGroups);
|
||||
p_mat->SetInfo("group", group.data(), DataType::kUInt32, kNumGroups);
|
||||
p_mat->SetInfo("label", labels.data(), DataType::kFloat32, kNumRows);
|
||||
p_mat->SetInfo("weight", Make1dInterfaceTest(weight.data(), kNumGroups));
|
||||
p_mat->SetInfo("group", Make1dInterfaceTest(group.data(), kNumGroups));
|
||||
p_mat->SetInfo("label", Make1dInterfaceTest(labels.data(), kNumRows));
|
||||
|
||||
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_mat};
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
|
||||
@ -115,7 +113,7 @@ TEST(Learner, CheckGroup) {
|
||||
group.resize(kNumGroups+1);
|
||||
group[3] = 4;
|
||||
group[4] = 1;
|
||||
p_mat->SetInfo("group", group.data(), DataType::kUInt32, kNumGroups+1);
|
||||
p_mat->SetInfo("group", Make1dInterfaceTest(group.data(), kNumGroups+1));
|
||||
EXPECT_ANY_THROW(learner->UpdateOneIter(0, p_mat));
|
||||
}
|
||||
|
||||
@ -132,7 +130,7 @@ TEST(Learner, SLOW_CheckMultiBatch) { // NOLINT
|
||||
for (size_t i = 0; i < num_row; ++i) {
|
||||
labels[i] = i % 2;
|
||||
}
|
||||
dmat->SetInfo("label", labels.data(), DataType::kFloat32, num_row);
|
||||
dmat->SetInfo("label", Make1dInterfaceTest(labels.data(), num_row));
|
||||
std::vector<std::shared_ptr<DMatrix>> mat{dmat};
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
|
||||
learner->SetParams(Args{{"objective", "binary:logistic"}});
|
||||
|
||||
@ -239,4 +239,18 @@ void TestAtomicAdd() {
|
||||
TEST(Histogram, AtomicAddInt64) {
|
||||
TestAtomicAdd();
|
||||
}
|
||||
|
||||
TEST(Histogram, Quantiser) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
std::size_t n_samples{16};
|
||||
HostDeviceVector<GradientPair> gpair(n_samples, GradientPair{1.0, 1.0});
|
||||
gpair.SetDevice(ctx.Device());
|
||||
|
||||
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
|
||||
for (auto v : gpair.ConstHostVector()) {
|
||||
auto gh = quantiser.ToFloatingPoint(quantiser.ToFixedPoint(v));
|
||||
ASSERT_EQ(gh.GetGrad(), 1.0);
|
||||
ASSERT_EQ(gh.GetHess(), 1.0);
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::tree
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user