rocm enable for v2.0.1
This commit is contained in:
@@ -8,13 +8,20 @@
|
||||
#include <xgboost/learner.h>
|
||||
#include <xgboost/version_config.h>
|
||||
|
||||
#include <cstddef> // std::size_t
|
||||
#include <limits> // std::numeric_limits
|
||||
#include <string> // std::string
|
||||
#include <array> // for array
|
||||
#include <cstddef> // std::size_t
|
||||
#include <filesystem> // std::filesystem
|
||||
#include <limits> // std::numeric_limits
|
||||
#include <string> // std::string
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/c_api/c_api_error.h"
|
||||
#include "../../../src/common/io.h"
|
||||
#include "../../../src/data/adapter.h" // for ArrayAdapter
|
||||
#include "../../../src/data/array_interface.h" // for ArrayInterface
|
||||
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../src/data/iterative_dmatrix.h" // for IterativeDMatrix
|
||||
#include "../../../src/data/sparse_page_dmatrix.h" // for SparsePageDMatrix
|
||||
#include "../helpers.h"
|
||||
|
||||
TEST(CAPI, XGDMatrixCreateFromMatDT) {
|
||||
@@ -137,9 +144,9 @@ TEST(CAPI, ConfigIO) {
|
||||
BoosterHandle handle = learner.get();
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
|
||||
char const* out[1];
|
||||
std::array<char const* , 1> out;
|
||||
bst_ulong len {0};
|
||||
XGBoosterSaveJsonConfig(handle, &len, out);
|
||||
XGBoosterSaveJsonConfig(handle, &len, out.data());
|
||||
|
||||
std::string config_str_0 { out[0] };
|
||||
auto config_0 = Json::Load({config_str_0.c_str(), config_str_0.size()});
|
||||
@@ -147,7 +154,7 @@ TEST(CAPI, ConfigIO) {
|
||||
|
||||
bst_ulong len_1 {0};
|
||||
std::string config_str_1 { out[0] };
|
||||
XGBoosterSaveJsonConfig(handle, &len_1, out);
|
||||
XGBoosterSaveJsonConfig(handle, &len_1, out.data());
|
||||
auto config_1 = Json::Load({config_str_1.c_str(), config_str_1.size()});
|
||||
|
||||
ASSERT_EQ(config_0, config_1);
|
||||
@@ -156,7 +163,7 @@ TEST(CAPI, ConfigIO) {
|
||||
TEST(CAPI, JsonModelIO) {
|
||||
size_t constexpr kRows = 10;
|
||||
size_t constexpr kCols = 10;
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
auto tempdir = std::filesystem::temp_directory_path();
|
||||
|
||||
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
|
||||
@@ -172,19 +179,19 @@ TEST(CAPI, JsonModelIO) {
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
BoosterHandle handle = learner.get();
|
||||
|
||||
std::string modelfile_0 = tempdir.path + "/model_0.json";
|
||||
XGBoosterSaveModel(handle, modelfile_0.c_str());
|
||||
XGBoosterLoadModel(handle, modelfile_0.c_str());
|
||||
auto modelfile_0 = tempdir / std::filesystem::u8path(u8"모델_0.json");
|
||||
XGBoosterSaveModel(handle, modelfile_0.u8string().c_str());
|
||||
XGBoosterLoadModel(handle, modelfile_0.u8string().c_str());
|
||||
|
||||
bst_ulong num_feature {0};
|
||||
ASSERT_EQ(XGBoosterGetNumFeature(handle, &num_feature), 0);
|
||||
ASSERT_EQ(num_feature, kCols);
|
||||
|
||||
std::string modelfile_1 = tempdir.path + "/model_1.json";
|
||||
XGBoosterSaveModel(handle, modelfile_1.c_str());
|
||||
auto modelfile_1 = tempdir / "model_1.json";
|
||||
XGBoosterSaveModel(handle, modelfile_1.u8string().c_str());
|
||||
|
||||
auto model_str_0 = common::LoadSequentialFile(modelfile_0);
|
||||
auto model_str_1 = common::LoadSequentialFile(modelfile_1);
|
||||
auto model_str_0 = common::LoadSequentialFile(modelfile_0.u8string());
|
||||
auto model_str_1 = common::LoadSequentialFile(modelfile_1.u8string());
|
||||
|
||||
ASSERT_EQ(model_str_0.front(), '{');
|
||||
ASSERT_EQ(model_str_0, model_str_1);
|
||||
@@ -266,9 +273,9 @@ TEST(CAPI, DMatrixSetFeatureName) {
|
||||
ASSERT_EQ(std::to_string(i), c_out_features[i]);
|
||||
}
|
||||
|
||||
char const* feat_types [] {"i", "q"};
|
||||
std::array<char const *, 2> feat_types{"i", "q"};
|
||||
static_assert(sizeof(feat_types) / sizeof(feat_types[0]) == kCols);
|
||||
XGDMatrixSetStrFeatureInfo(handle, "feature_type", feat_types, kCols);
|
||||
XGDMatrixSetStrFeatureInfo(handle, "feature_type", feat_types.data(), kCols);
|
||||
char const **c_out_types;
|
||||
XGDMatrixGetStrFeatureInfo(handle, u8"feature_type", &out_len,
|
||||
&c_out_types);
|
||||
@@ -410,4 +417,210 @@ TEST(CAPI, JArgs) {
|
||||
ASSERT_THROW({ RequiredArg<String>(args, "null", __func__); }, dmlc::Error);
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
void MakeLabelForTest(std::shared_ptr<DMatrix> Xy, DMatrixHandle cxy) {
|
||||
auto n_samples = Xy->Info().num_row_;
|
||||
std::vector<float> y(n_samples);
|
||||
for (std::size_t i = 0; i < y.size(); ++i) {
|
||||
y[i] = static_cast<float>(i);
|
||||
}
|
||||
|
||||
Xy->Info().labels.Reshape(n_samples);
|
||||
Xy->Info().labels.Data()->HostVector() = y;
|
||||
|
||||
auto y_int = GetArrayInterface(Xy->Info().labels.Data(), n_samples, 1);
|
||||
std::string s_y_int;
|
||||
Json::Dump(y_int, &s_y_int);
|
||||
|
||||
XGDMatrixSetInfoFromInterface(cxy, "label", s_y_int.c_str());
|
||||
}
|
||||
|
||||
auto MakeSimpleDMatrixForTest(bst_row_t n_samples, bst_feature_t n_features, Json dconfig) {
|
||||
HostDeviceVector<float> storage;
|
||||
auto arr_int = RandomDataGenerator{n_samples, n_features, 0.5f}.GenerateArrayInterface(&storage);
|
||||
|
||||
data::ArrayAdapter adapter{StringView{arr_int}};
|
||||
std::shared_ptr<DMatrix> Xy{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), Context{}.Threads())};
|
||||
|
||||
DMatrixHandle p_fmat;
|
||||
std::string s_dconfig;
|
||||
Json::Dump(dconfig, &s_dconfig);
|
||||
CHECK_EQ(XGDMatrixCreateFromDense(arr_int.c_str(), s_dconfig.c_str(), &p_fmat), 0);
|
||||
|
||||
MakeLabelForTest(Xy, p_fmat);
|
||||
return std::pair{p_fmat, Xy};
|
||||
}
|
||||
|
||||
auto MakeQDMForTest(Context const *ctx, bst_row_t n_samples, bst_feature_t n_features,
|
||||
Json dconfig) {
|
||||
bst_bin_t n_bins{16};
|
||||
dconfig["max_bin"] = Integer{n_bins};
|
||||
|
||||
std::size_t n_batches{4};
|
||||
std::unique_ptr<ArrayIterForTest> iter_0;
|
||||
if (ctx->IsCUDA()) {
|
||||
iter_0 = std::make_unique<CudaArrayIterForTest>(0.0f, n_samples, n_features, n_batches);
|
||||
} else {
|
||||
iter_0 = std::make_unique<NumpyArrayIterForTest>(0.0f, n_samples, n_features, n_batches);
|
||||
}
|
||||
std::string s_dconfig;
|
||||
Json::Dump(dconfig, &s_dconfig);
|
||||
DMatrixHandle p_fmat;
|
||||
CHECK_EQ(XGQuantileDMatrixCreateFromCallback(static_cast<DataIterHandle>(iter_0.get()),
|
||||
iter_0->Proxy(), nullptr, Reset, Next,
|
||||
s_dconfig.c_str(), &p_fmat),
|
||||
0);
|
||||
|
||||
std::unique_ptr<ArrayIterForTest> iter_1;
|
||||
if (ctx->IsCUDA()) {
|
||||
iter_1 = std::make_unique<CudaArrayIterForTest>(0.0f, n_samples, n_features, n_batches);
|
||||
} else {
|
||||
iter_1 = std::make_unique<NumpyArrayIterForTest>(0.0f, n_samples, n_features, n_batches);
|
||||
}
|
||||
auto Xy =
|
||||
std::make_shared<data::IterativeDMatrix>(iter_1.get(), iter_1->Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
return std::pair{p_fmat, Xy};
|
||||
}
|
||||
|
||||
auto MakeExtMemForTest(bst_row_t n_samples, bst_feature_t n_features, Json dconfig) {
|
||||
std::size_t n_batches{4};
|
||||
NumpyArrayIterForTest iter_0{0.0f, n_samples, n_features, n_batches};
|
||||
std::string s_dconfig;
|
||||
dconfig["cache_prefix"] = String{"cache"};
|
||||
Json::Dump(dconfig, &s_dconfig);
|
||||
DMatrixHandle p_fmat;
|
||||
CHECK_EQ(XGDMatrixCreateFromCallback(static_cast<DataIterHandle>(&iter_0), iter_0.Proxy(), Reset,
|
||||
Next, s_dconfig.c_str(), &p_fmat),
|
||||
0);
|
||||
|
||||
NumpyArrayIterForTest iter_1{0.0f, n_samples, n_features, n_batches};
|
||||
auto Xy = std::make_shared<data::SparsePageDMatrix>(
|
||||
&iter_1, iter_1.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, "");
|
||||
MakeLabelForTest(Xy, p_fmat);
|
||||
return std::pair{p_fmat, Xy};
|
||||
}
|
||||
|
||||
template <typename Page>
|
||||
void CheckResult(Context const *ctx, bst_feature_t n_features, std::shared_ptr<DMatrix> Xy,
|
||||
float const *out_data, std::uint64_t const *out_indptr) {
|
||||
for (auto const &page : Xy->GetBatches<Page>(ctx, BatchParam{16, 0.2})) {
|
||||
auto const &cut = page.Cuts();
|
||||
auto const &ptrs = cut.Ptrs();
|
||||
auto const &vals = cut.Values();
|
||||
auto const &mins = cut.MinValues();
|
||||
for (bst_feature_t f = 0; f < Xy->Info().num_col_; ++f) {
|
||||
ASSERT_EQ(ptrs[f] + f, out_indptr[f]);
|
||||
ASSERT_EQ(mins[f], out_data[out_indptr[f]]);
|
||||
auto beg = out_indptr[f];
|
||||
auto end = out_indptr[f + 1];
|
||||
auto val_beg = ptrs[f];
|
||||
for (std::uint64_t i = beg + 1, j = val_beg; i < end; ++i, ++j) {
|
||||
ASSERT_EQ(vals[j], out_data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ(ptrs[n_features] + n_features, out_indptr[n_features]);
|
||||
}
|
||||
}
|
||||
|
||||
void TestXGDMatrixGetQuantileCut(Context const *ctx) {
|
||||
bst_row_t n_samples{1024};
|
||||
bst_feature_t n_features{16};
|
||||
|
||||
Json dconfig{Object{}};
|
||||
dconfig["ntread"] = Integer{Context{}.Threads()};
|
||||
dconfig["missing"] = Number{std::numeric_limits<float>::quiet_NaN()};
|
||||
|
||||
auto check_result = [n_features, &ctx](std::shared_ptr<DMatrix> Xy, StringView s_out_data,
|
||||
StringView s_out_indptr) {
|
||||
auto i_out_data = ArrayInterface<1, false>{s_out_data};
|
||||
ASSERT_EQ(i_out_data.type, ArrayInterfaceHandler::kF4);
|
||||
auto out_data = static_cast<float const *>(i_out_data.data);
|
||||
ASSERT_TRUE(out_data);
|
||||
|
||||
auto i_out_indptr = ArrayInterface<1, false>{s_out_indptr};
|
||||
ASSERT_EQ(i_out_indptr.type, ArrayInterfaceHandler::kU8);
|
||||
auto out_indptr = static_cast<std::uint64_t const *>(i_out_indptr.data);
|
||||
ASSERT_TRUE(out_data);
|
||||
|
||||
if (ctx->IsCPU()) {
|
||||
CheckResult<GHistIndexMatrix>(ctx, n_features, Xy, out_data, out_indptr);
|
||||
} else {
|
||||
CheckResult<EllpackPage>(ctx, n_features, Xy, out_data, out_indptr);
|
||||
}
|
||||
};
|
||||
|
||||
Json config{Null{}};
|
||||
std::string s_config;
|
||||
Json::Dump(config, &s_config);
|
||||
char const *out_indptr;
|
||||
char const *out_data;
|
||||
|
||||
{
|
||||
// SimpleDMatrix
|
||||
auto [p_fmat, Xy] = MakeSimpleDMatrixForTest(n_samples, n_features, dconfig);
|
||||
// assert fail, we don't have the quantile yet.
|
||||
ASSERT_EQ(XGDMatrixGetQuantileCut(p_fmat, s_config.c_str(), &out_indptr, &out_data), -1);
|
||||
|
||||
std::array<DMatrixHandle, 1> mats{p_fmat};
|
||||
BoosterHandle booster;
|
||||
ASSERT_EQ(XGBoosterCreate(mats.data(), 1, &booster), 0);
|
||||
ASSERT_EQ(XGBoosterSetParam(booster, "max_bin", "16"), 0);
|
||||
if (ctx->IsCUDA()) {
|
||||
ASSERT_EQ(XGBoosterSetParam(booster, "tree_method", "gpu_hist"), 0);
|
||||
}
|
||||
ASSERT_EQ(XGBoosterUpdateOneIter(booster, 0, p_fmat), 0);
|
||||
ASSERT_EQ(XGDMatrixGetQuantileCut(p_fmat, s_config.c_str(), &out_indptr, &out_data), 0);
|
||||
|
||||
check_result(Xy, out_data, out_indptr);
|
||||
|
||||
XGDMatrixFree(p_fmat);
|
||||
XGBoosterFree(booster);
|
||||
}
|
||||
|
||||
{
|
||||
// IterativeDMatrix
|
||||
auto [p_fmat, Xy] = MakeQDMForTest(ctx, n_samples, n_features, dconfig);
|
||||
ASSERT_EQ(XGDMatrixGetQuantileCut(p_fmat, s_config.c_str(), &out_indptr, &out_data), 0);
|
||||
|
||||
check_result(Xy, out_data, out_indptr);
|
||||
XGDMatrixFree(p_fmat);
|
||||
}
|
||||
|
||||
{
|
||||
// SparsePageDMatrix
|
||||
auto [p_fmat, Xy] = MakeExtMemForTest(n_samples, n_features, dconfig);
|
||||
// assert fail, we don't have the quantile yet.
|
||||
ASSERT_EQ(XGDMatrixGetQuantileCut(p_fmat, s_config.c_str(), &out_indptr, &out_data), -1);
|
||||
|
||||
std::array<DMatrixHandle, 1> mats{p_fmat};
|
||||
BoosterHandle booster;
|
||||
ASSERT_EQ(XGBoosterCreate(mats.data(), 1, &booster), 0);
|
||||
ASSERT_EQ(XGBoosterSetParam(booster, "max_bin", "16"), 0);
|
||||
if (ctx->IsCUDA()) {
|
||||
ASSERT_EQ(XGBoosterSetParam(booster, "tree_method", "gpu_hist"), 0);
|
||||
}
|
||||
ASSERT_EQ(XGBoosterUpdateOneIter(booster, 0, p_fmat), 0);
|
||||
ASSERT_EQ(XGDMatrixGetQuantileCut(p_fmat, s_config.c_str(), &out_indptr, &out_data), 0);
|
||||
|
||||
XGDMatrixFree(p_fmat);
|
||||
XGBoosterFree(booster);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(CAPI, XGDMatrixGetQuantileCut) {
|
||||
Context ctx;
|
||||
TestXGDMatrixGetQuantileCut(&ctx);
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST(CAPI, GPUXGDMatrixGetQuantileCut) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestXGDMatrixGetQuantileCut(&ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <bitset>
|
||||
#include <string> // for string
|
||||
|
||||
#if defined(XGBOOST_USE_NCCL)
|
||||
@@ -14,17 +15,13 @@
|
||||
#include "../../../src/collective/nccl_device_communicator.hip.h"
|
||||
#include "../../../src/collective/communicator-inl.hip.h"
|
||||
#endif
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace collective {
|
||||
|
||||
TEST(NcclDeviceCommunicatorSimpleTest, ThrowOnInvalidDeviceOrdinal) {
|
||||
auto construct = []() { NcclDeviceCommunicator comm{-1, nullptr}; };
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(NcclDeviceCommunicatorSimpleTest, ThrowOnInvalidCommunicator) {
|
||||
auto construct = []() { NcclDeviceCommunicator comm{0, nullptr}; };
|
||||
auto construct = []() { NcclDeviceCommunicator comm{-1, false}; };
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
@@ -36,6 +33,72 @@ TEST(NcclDeviceCommunicatorSimpleTest, SystemError) {
|
||||
ASSERT_TRUE(str.find("environment variables") != std::string::npos);
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
void VerifyAllReduceBitwiseAND() {
|
||||
auto const rank = collective::GetRank();
|
||||
std::bitset<64> original{};
|
||||
original[rank] = true;
|
||||
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, rank);
|
||||
collective::AllReduce<collective::Operation::kBitwiseAND>(rank, buffer.DevicePointer(), 1);
|
||||
collective::Synchronize(rank);
|
||||
EXPECT_EQ(buffer.HostVector()[0], 0ULL);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(NcclDeviceCommunicator, MGPUAllReduceBitwiseAND) {
|
||||
auto const n_gpus = common::AllVisibleGPUs();
|
||||
if (n_gpus <= 1) {
|
||||
GTEST_SKIP() << "Skipping MGPUAllReduceBitwiseAND test with # GPUs = " << n_gpus;
|
||||
}
|
||||
auto constexpr kUseNccl = true;
|
||||
RunWithInMemoryCommunicator<kUseNccl>(n_gpus, VerifyAllReduceBitwiseAND);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void VerifyAllReduceBitwiseOR() {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
std::bitset<64> original{};
|
||||
original[rank] = true;
|
||||
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, rank);
|
||||
collective::AllReduce<collective::Operation::kBitwiseOR>(rank, buffer.DevicePointer(), 1);
|
||||
collective::Synchronize(rank);
|
||||
EXPECT_EQ(buffer.HostVector()[0], (1ULL << world_size) - 1);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(NcclDeviceCommunicator, MGPUAllReduceBitwiseOR) {
|
||||
auto const n_gpus = common::AllVisibleGPUs();
|
||||
if (n_gpus <= 1) {
|
||||
GTEST_SKIP() << "Skipping MGPUAllReduceBitwiseOR test with # GPUs = " << n_gpus;
|
||||
}
|
||||
auto constexpr kUseNccl = true;
|
||||
RunWithInMemoryCommunicator<kUseNccl>(n_gpus, VerifyAllReduceBitwiseOR);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void VerifyAllReduceBitwiseXOR() {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
std::bitset<64> original{~0ULL};
|
||||
original[rank] = false;
|
||||
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, rank);
|
||||
collective::AllReduce<collective::Operation::kBitwiseXOR>(rank, buffer.DevicePointer(), 1);
|
||||
collective::Synchronize(rank);
|
||||
EXPECT_EQ(buffer.HostVector()[0], (1ULL << world_size) - 1);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(NcclDeviceCommunicator, MGPUAllReduceBitwiseXOR) {
|
||||
auto const n_gpus = common::AllVisibleGPUs();
|
||||
if (n_gpus <= 1) {
|
||||
GTEST_SKIP() << "Skipping MGPUAllReduceBitwiseXOR test with # GPUs = " << n_gpus;
|
||||
}
|
||||
auto constexpr kUseNccl = true;
|
||||
RunWithInMemoryCommunicator<kUseNccl>(n_gpus, VerifyAllReduceBitwiseXOR);
|
||||
}
|
||||
|
||||
} // namespace collective
|
||||
} // namespace xgboost
|
||||
|
||||
|
||||
@@ -21,8 +21,7 @@
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
void TestSegmentedArgSort() {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
|
||||
size_t constexpr kElements = 100, kGroups = 3;
|
||||
dh::device_vector<size_t> sorted_idx(kElements, 0);
|
||||
@@ -60,8 +59,7 @@ void TestSegmentedArgSort() {
|
||||
TEST(Algorithm, SegmentedArgSort) { TestSegmentedArgSort(); }
|
||||
|
||||
TEST(Algorithm, GpuArgSort) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
|
||||
dh::device_vector<float> values(20);
|
||||
dh::Iota(dh::ToSpan(values)); // accending
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2019 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include "../../../src/common/bitfield.h"
|
||||
@@ -14,7 +14,7 @@ TEST(BitField, Check) {
|
||||
static_cast<typename common::Span<LBitField64::value_type>::index_type>(
|
||||
storage.size())});
|
||||
size_t true_bit = 190;
|
||||
for (size_t i = true_bit + 1; i < bits.Size(); ++i) {
|
||||
for (size_t i = true_bit + 1; i < bits.Capacity(); ++i) {
|
||||
ASSERT_FALSE(bits.Check(i));
|
||||
}
|
||||
ASSERT_TRUE(bits.Check(true_bit));
|
||||
@@ -34,7 +34,7 @@ TEST(BitField, Check) {
|
||||
ASSERT_FALSE(bits.Check(i));
|
||||
}
|
||||
ASSERT_TRUE(bits.Check(true_bit));
|
||||
for (size_t i = true_bit + 1; i < bits.Size(); ++i) {
|
||||
for (size_t i = true_bit + 1; i < bits.Capacity(); ++i) {
|
||||
ASSERT_FALSE(bits.Check(i));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2019 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/copy.h>
|
||||
@@ -16,7 +16,7 @@ namespace xgboost {
|
||||
|
||||
__global__ void TestSetKernel(LBitField64 bits) {
|
||||
auto tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (tid < bits.Size()) {
|
||||
if (tid < bits.Capacity()) {
|
||||
bits.Set(tid);
|
||||
}
|
||||
}
|
||||
@@ -40,20 +40,16 @@ TEST(BitField, GPUSet) {
|
||||
|
||||
std::vector<LBitField64::value_type> h_storage(storage.size());
|
||||
thrust::copy(storage.begin(), storage.end(), h_storage.begin());
|
||||
|
||||
LBitField64 outputs {
|
||||
common::Span<LBitField64::value_type>{h_storage.data(),
|
||||
h_storage.data() + h_storage.size()}};
|
||||
LBitField64 outputs{
|
||||
common::Span<LBitField64::value_type>{h_storage.data(), h_storage.data() + h_storage.size()}};
|
||||
for (size_t i = 0; i < kBits; ++i) {
|
||||
ASSERT_TRUE(outputs.Check(i));
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void TestOrKernel(LBitField64 lhs, LBitField64 rhs) {
|
||||
lhs |= rhs;
|
||||
}
|
||||
|
||||
TEST(BitField, GPUAnd) {
|
||||
namespace {
|
||||
template <bool is_and, typename Op>
|
||||
void TestGPULogic(Op op) {
|
||||
uint32_t constexpr kBits = 128;
|
||||
dh::device_vector<LBitField64::value_type> lhs_storage(kBits);
|
||||
dh::device_vector<LBitField64::value_type> rhs_storage(kBits);
|
||||
@@ -61,13 +57,32 @@ TEST(BitField, GPUAnd) {
|
||||
auto rhs = LBitField64(dh::ToSpan(rhs_storage));
|
||||
thrust::fill(lhs_storage.begin(), lhs_storage.end(), 0UL);
|
||||
thrust::fill(rhs_storage.begin(), rhs_storage.end(), ~static_cast<LBitField64::value_type>(0UL));
|
||||
TestOrKernel<<<1, kBits>>>(lhs, rhs);
|
||||
dh::LaunchN(kBits, [=] __device__(auto) mutable { op(lhs, rhs); });
|
||||
|
||||
std::vector<LBitField64::value_type> h_storage(lhs_storage.size());
|
||||
thrust::copy(lhs_storage.begin(), lhs_storage.end(), h_storage.begin());
|
||||
LBitField64 outputs {{h_storage.data(), h_storage.data() + h_storage.size()}};
|
||||
for (size_t i = 0; i < kBits; ++i) {
|
||||
ASSERT_TRUE(outputs.Check(i));
|
||||
LBitField64 outputs{{h_storage.data(), h_storage.data() + h_storage.size()}};
|
||||
if (is_and) {
|
||||
for (size_t i = 0; i < kBits; ++i) {
|
||||
ASSERT_FALSE(outputs.Check(i));
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < kBits; ++i) {
|
||||
ASSERT_TRUE(outputs.Check(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TestGPUAnd() {
|
||||
TestGPULogic<true>([] XGBOOST_DEVICE(LBitField64 & lhs, LBitField64 const& rhs) { lhs &= rhs; });
|
||||
}
|
||||
|
||||
void TestGPUOr() {
|
||||
TestGPULogic<false>([] XGBOOST_DEVICE(LBitField64 & lhs, LBitField64 const& rhs) { lhs |= rhs; });
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(BitField, GPUAnd) { TestGPUAnd(); }
|
||||
|
||||
TEST(BitField, GPUOr) { TestGPUOr(); }
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -2,15 +2,26 @@
|
||||
* Copyright 2018-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for bst_bin_t
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/data.h> // for BatchIterator, BatchSet, DMatrix, Met...
|
||||
|
||||
#include "../../../src/common/column_matrix.h"
|
||||
#include "../helpers.h"
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for int32_t, uint16_t, uint8_t
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for shared_ptr, __shared_ptr_access, allo...
|
||||
#include <type_traits> // for remove_reference_t
|
||||
|
||||
#include "../../../src/common/column_matrix.h" // for ColumnMatrix, Column, DenseColumnIter
|
||||
#include "../../../src/common/hist_util.h" // for DispatchBinType, BinTypeSize, Index
|
||||
#include "../../../src/common/ref_resource_view.h" // for RefResourceView
|
||||
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../src/data/iterative_dmatrix.h" // for IterativeDMatrix
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h" // for RandomDataGenerator, NumpyArrayIterFo...
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
TEST(DenseColumn, Test) {
|
||||
namespace xgboost::common {
|
||||
TEST(ColumnMatrix, Basic) {
|
||||
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
|
||||
@@ -22,7 +33,7 @@ TEST(DenseColumn, Test) {
|
||||
GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, sparse_thresh, false};
|
||||
ColumnMatrix column_matrix;
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
column_matrix.InitFromSparse(page, gmat, sparse_thresh, AllThreadsForTest());
|
||||
column_matrix.InitFromSparse(page, gmat, sparse_thresh, ctx.Threads());
|
||||
}
|
||||
ASSERT_GE(column_matrix.GetTypeSize(), last);
|
||||
ASSERT_LE(column_matrix.GetTypeSize(), kUint32BinsTypeSize);
|
||||
@@ -59,7 +70,7 @@ void CheckSparseColumn(SparseColumnIter<BinIdxType>* p_col, const GHistIndexMatr
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SparseColumn, Test) {
|
||||
TEST(ColumnMatrix, SparseColumn) {
|
||||
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
|
||||
@@ -69,7 +80,7 @@ TEST(SparseColumn, Test) {
|
||||
GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, 0.5f, false};
|
||||
ColumnMatrix column_matrix;
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
column_matrix.InitFromSparse(page, gmat, 1.0, AllThreadsForTest());
|
||||
column_matrix.InitFromSparse(page, gmat, 1.0, ctx.Threads());
|
||||
}
|
||||
common::DispatchBinType(column_matrix.GetTypeSize(), [&](auto dtype) {
|
||||
using T = decltype(dtype);
|
||||
@@ -83,12 +94,14 @@ template <typename BinIdxType>
|
||||
void CheckColumWithMissingValue(const DenseColumnIter<BinIdxType, true>& col,
|
||||
const GHistIndexMatrix& gmat) {
|
||||
for (auto i = 0ull; i < col.Size(); i++) {
|
||||
if (col.IsMissing(i)) continue;
|
||||
if (col.IsMissing(i)) {
|
||||
continue;
|
||||
}
|
||||
EXPECT_EQ(gmat.index[gmat.row_ptr[i]], col.GetGlobalBinIdx(i));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DenseColumnWithMissing, Test) {
|
||||
TEST(ColumnMatrix, DenseColumnWithMissing) {
|
||||
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
|
||||
@@ -98,7 +111,7 @@ TEST(DenseColumnWithMissing, Test) {
|
||||
GHistIndexMatrix gmat(&ctx, dmat.get(), max_num_bin, 0.2, false);
|
||||
ColumnMatrix column_matrix;
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
column_matrix.InitFromSparse(page, gmat, 0.2, AllThreadsForTest());
|
||||
column_matrix.InitFromSparse(page, gmat, 0.2, ctx.Threads());
|
||||
}
|
||||
ASSERT_TRUE(column_matrix.AnyMissing());
|
||||
DispatchBinType(column_matrix.GetTypeSize(), [&](auto dtype) {
|
||||
@@ -108,5 +121,29 @@ TEST(DenseColumnWithMissing, Test) {
|
||||
});
|
||||
}
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
TEST(ColumnMatrix, GrowMissing) {
|
||||
float sparsity = 0.5;
|
||||
NumpyArrayIterForTest iter(sparsity);
|
||||
auto n_threads = 0;
|
||||
bst_bin_t n_bins = 16;
|
||||
BatchParam batch{n_bins, tree::TrainParam::DftSparseThreshold()};
|
||||
Context ctx;
|
||||
auto m = std::make_shared<data::IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
n_threads, n_bins);
|
||||
for (auto const& page : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
auto const& column_matrix = page.Transpose();
|
||||
auto const& missing = column_matrix.Missing();
|
||||
auto n = NumpyArrayIterForTest::Rows() * NumpyArrayIterForTest::Cols();
|
||||
auto expected = std::remove_reference_t<decltype(missing)>::BitFieldT::ComputeStorageSize(n);
|
||||
auto got = missing.storage.size();
|
||||
ASSERT_EQ(expected, got);
|
||||
DispatchBinType(column_matrix.GetTypeSize(), [&](auto dtype) {
|
||||
using T = decltype(dtype);
|
||||
auto col = column_matrix.DenseColumn<T, true>(0);
|
||||
CheckColumWithMissingValue(col, page);
|
||||
});
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -131,11 +131,7 @@ TEST(DeviceHelpers, Reduce) {
|
||||
auto it = thrust::make_counting_iterator(0ul);
|
||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
auto batched = dh::Reduce(thrust::cuda::par(alloc), it, it + kSize, 0ul, thrust::maximum<size_t>{});
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
auto batched = dh::Reduce(thrust::hip::par(alloc), it, it + kSize, 0ul, thrust::maximum<size_t>{});
|
||||
#endif
|
||||
|
||||
CHECK_EQ(batched, kSize - 1);
|
||||
}
|
||||
@@ -180,10 +176,6 @@ TEST(Allocator, OOM) {
|
||||
ASSERT_THROW({dh::caching_device_vector<char> vec(size);}, dmlc::Error);
|
||||
ASSERT_THROW({dh::device_vector<char> vec(size);}, dmlc::Error);
|
||||
// Clear last error so we don't fail subsequent tests
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
cudaGetLastError();
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
hipGetLastError();
|
||||
#endif
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -36,11 +36,7 @@ struct ReadSymbolFunction {
|
||||
};
|
||||
|
||||
TEST(CompressedIterator, TestGPU) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
std::vector<int> test_cases = {1, 3, 426, 21, 64, 256, 100000, INT32_MAX};
|
||||
int num_elements = 1000;
|
||||
int repetitions = 1000;
|
||||
|
||||
@@ -27,8 +27,8 @@ void ParallelGHistBuilderReset() {
|
||||
|
||||
for(size_t inode = 0; inode < kNodesExtended; inode++) {
|
||||
collection.AddHistRow(inode);
|
||||
collection.AllocateData(inode);
|
||||
}
|
||||
collection.AllocateAllData();
|
||||
ParallelGHistBuilder hist_builder;
|
||||
hist_builder.Init(kBins);
|
||||
std::vector<GHistRow> target_hist(kNodes);
|
||||
@@ -83,8 +83,8 @@ void ParallelGHistBuilderReduceHist(){
|
||||
|
||||
for(size_t inode = 0; inode < kNodes; inode++) {
|
||||
collection.AddHistRow(inode);
|
||||
collection.AllocateData(inode);
|
||||
}
|
||||
collection.AllocateAllData();
|
||||
ParallelGHistBuilder hist_builder;
|
||||
hist_builder.Init(kBins);
|
||||
std::vector<GHistRow> target_hist(kNodes);
|
||||
@@ -129,7 +129,7 @@ TEST(CutsBuilder, SearchGroupInd) {
|
||||
|
||||
auto p_mat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
|
||||
std::vector<bst_int> group(kNumGroups);
|
||||
std::vector<bst_group_t> group(kNumGroups);
|
||||
group[0] = 2;
|
||||
group[1] = 3;
|
||||
group[2] = 7;
|
||||
|
||||
@@ -3,18 +3,23 @@
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <xgboost/base.h> // for bst_bin_t
|
||||
#include <xgboost/c_api.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <algorithm> // for transform
|
||||
#include <cmath> // for floor
|
||||
#include <cstddef> // for size_t
|
||||
#include <limits> // for numeric_limits
|
||||
#include <string> // for string, to_string
|
||||
#include <tuple> // for tuple, make_tuple
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../include/xgboost/logging.h"
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
#include "../../../src/common/hist_util.cuh"
|
||||
#include "../../../src/common/hist_util.h"
|
||||
#include "../../../src/common/math.h"
|
||||
#include "../../../src/data/device_adapter.cuh"
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
#include "../../../src/common/device_helpers.hip.h"
|
||||
@@ -29,8 +34,7 @@
|
||||
#include "../helpers.h"
|
||||
#include "test_hist_util.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace xgboost::common {
|
||||
|
||||
template <typename AdapterT>
|
||||
HistogramCuts GetHostCuts(Context const* ctx, AdapterT* adapter, int num_bins, float missing) {
|
||||
@@ -40,16 +44,17 @@ HistogramCuts GetHostCuts(Context const* ctx, AdapterT* adapter, int num_bins, f
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketch) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_columns = 1;
|
||||
int num_bins = 4;
|
||||
std::vector<float> x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 7.0f, -1.0f};
|
||||
int num_rows = x.size();
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
|
||||
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto device_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
|
||||
Context ctx;
|
||||
HistogramCuts host_cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
|
||||
Context cpu_ctx;
|
||||
HistogramCuts host_cuts = SketchOnDMatrix(&cpu_ctx, dmat.get(), num_bins);
|
||||
|
||||
EXPECT_EQ(device_cuts.Values(), host_cuts.Values());
|
||||
EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs());
|
||||
@@ -64,11 +69,7 @@ TEST(HistUtil, SketchBatchNumElements) {
|
||||
size_t constexpr kCols = 10000;
|
||||
int device;
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaGetDevice(&device));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipGetDevice(&device));
|
||||
#endif
|
||||
|
||||
auto avail = static_cast<size_t>(dh::AvailableMemory(device) * 0.8);
|
||||
auto per_elem = detail::BytesPerElement(false);
|
||||
@@ -79,6 +80,7 @@ TEST(HistUtil, SketchBatchNumElements) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchMemory) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_columns = 100;
|
||||
int num_rows = 1000;
|
||||
int num_bins = 256;
|
||||
@@ -87,7 +89,7 @@ TEST(HistUtil, DeviceSketchMemory) {
|
||||
|
||||
dh::GlobalMemoryLogger().Clear();
|
||||
ConsoleLogger::Configure({{"verbosity", "3"}});
|
||||
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto device_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
|
||||
size_t bytes_required = detail::RequiredMemory(
|
||||
num_rows, num_columns, num_rows * num_columns, num_bins, false);
|
||||
@@ -97,6 +99,7 @@ TEST(HistUtil, DeviceSketchMemory) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchWeightsMemory) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_columns = 100;
|
||||
int num_rows = 1000;
|
||||
int num_bins = 256;
|
||||
@@ -106,7 +109,7 @@ TEST(HistUtil, DeviceSketchWeightsMemory) {
|
||||
|
||||
dh::GlobalMemoryLogger().Clear();
|
||||
ConsoleLogger::Configure({{"verbosity", "3"}});
|
||||
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto device_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
ConsoleLogger::Configure({{"verbosity", "0"}});
|
||||
|
||||
size_t bytes_required = detail::RequiredMemory(
|
||||
@@ -116,52 +119,56 @@ TEST(HistUtil, DeviceSketchWeightsMemory) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchDeterminism) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_rows = 500;
|
||||
int num_columns = 5;
|
||||
int num_bins = 256;
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
auto reference_sketch = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto reference_sketch = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
size_t constexpr kRounds{ 100 };
|
||||
for (size_t r = 0; r < kRounds; ++r) {
|
||||
auto new_sketch = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto new_sketch = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
ASSERT_EQ(reference_sketch.Values(), new_sketch.Values());
|
||||
ASSERT_EQ(reference_sketch.MinValues(), new_sketch.MinValues());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchCategoricalAsNumeric) {
|
||||
int categorical_sizes[] = {2, 6, 8, 12};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto categorical_sizes = {2, 6, 8, 12};
|
||||
int num_bins = 256;
|
||||
int sizes[] = {25, 100, 1000};
|
||||
auto sizes = {25, 100, 1000};
|
||||
for (auto n : sizes) {
|
||||
for (auto num_categories : categorical_sizes) {
|
||||
auto x = GenerateRandomCategoricalSingleColumn(n, num_categories);
|
||||
auto dmat = GetDMatrixFromData(x, n, 1);
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchCategoricalFeatures) {
|
||||
TestCategoricalSketch(1000, 256, 32, false,
|
||||
[](DMatrix *p_fmat, int32_t num_bins) {
|
||||
return DeviceSketch(0, p_fmat, num_bins);
|
||||
});
|
||||
TestCategoricalSketch(1000, 256, 32, true,
|
||||
[](DMatrix *p_fmat, int32_t num_bins) {
|
||||
return DeviceSketch(0, p_fmat, num_bins);
|
||||
});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestCategoricalSketch(1000, 256, 32, false, [ctx](DMatrix* p_fmat, int32_t num_bins) {
|
||||
return DeviceSketch(&ctx, p_fmat, num_bins);
|
||||
});
|
||||
TestCategoricalSketch(1000, 256, 32, true, [ctx](DMatrix* p_fmat, int32_t num_bins) {
|
||||
return DeviceSketch(&ctx, p_fmat, num_bins);
|
||||
});
|
||||
}
|
||||
|
||||
void TestMixedSketch() {
|
||||
size_t n_samples = 1000, n_features = 2, n_categories = 3;
|
||||
bst_bin_t n_bins = 64;
|
||||
|
||||
std::vector<float> data(n_samples * n_features);
|
||||
SimpleLCG gen;
|
||||
SimpleRealUniformDistribution<float> cat_d{0.0f, static_cast<float>(n_categories)};
|
||||
SimpleRealUniformDistribution<float> num_d{0.0f, 3.0f};
|
||||
for (size_t i = 0; i < n_samples * n_features; ++i) {
|
||||
// two features, row major. The first column is numeric and the second is categorical.
|
||||
if (i % 2 == 0) {
|
||||
data[i] = std::floor(cat_d(&gen));
|
||||
} else {
|
||||
@@ -173,46 +180,113 @@ void TestMixedSketch() {
|
||||
m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
|
||||
m->Info().feature_types.HostVector().push_back(FeatureType::kNumerical);
|
||||
|
||||
auto cuts = DeviceSketch(0, m.get(), 64);
|
||||
ASSERT_EQ(cuts.Values().size(), 64 + n_categories);
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto cuts = DeviceSketch(&ctx, m.get(), n_bins);
|
||||
ASSERT_EQ(cuts.Values().size(), n_bins + n_categories);
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchMixedFeatures) {
|
||||
TestMixedSketch();
|
||||
TEST(HistUtil, DeviceSketchMixedFeatures) { TestMixedSketch(); }
|
||||
|
||||
TEST(HistUtil, RemoveDuplicatedCategories) {
|
||||
bst_row_t n_samples = 512;
|
||||
bst_feature_t n_features = 3;
|
||||
bst_cat_t n_categories = 5;
|
||||
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
SimpleLCG rng;
|
||||
SimpleRealUniformDistribution<float> cat_d{0.0f, static_cast<float>(n_categories)};
|
||||
|
||||
dh::device_vector<Entry> sorted_entries(n_samples * n_features);
|
||||
for (std::size_t i = 0; i < n_samples; ++i) {
|
||||
for (bst_feature_t j = 0; j < n_features; ++j) {
|
||||
float fvalue{0.0f};
|
||||
// The second column is categorical
|
||||
if (j == 1) {
|
||||
fvalue = std::floor(cat_d(&rng));
|
||||
} else {
|
||||
fvalue = i;
|
||||
}
|
||||
sorted_entries[i * n_features + j] = Entry{j, fvalue};
|
||||
}
|
||||
}
|
||||
|
||||
MetaInfo info;
|
||||
info.num_col_ = n_features;
|
||||
info.num_row_ = n_samples;
|
||||
info.feature_types.HostVector() = std::vector<FeatureType>{
|
||||
FeatureType::kNumerical, FeatureType::kCategorical, FeatureType::kNumerical};
|
||||
ASSERT_EQ(info.feature_types.Size(), n_features);
|
||||
|
||||
HostDeviceVector<bst_row_t> cuts_ptr{0, n_samples, n_samples * 2, n_samples * 3};
|
||||
cuts_ptr.SetDevice(0);
|
||||
|
||||
dh::device_vector<float> weight(n_samples * n_features, 0);
|
||||
dh::Iota(dh::ToSpan(weight));
|
||||
|
||||
dh::caching_device_vector<bst_row_t> columns_ptr(4);
|
||||
for (std::size_t i = 0; i < columns_ptr.size(); ++i) {
|
||||
columns_ptr[i] = i * n_samples;
|
||||
}
|
||||
// sort into column major
|
||||
thrust::sort_by_key(sorted_entries.begin(), sorted_entries.end(), weight.begin(),
|
||||
detail::EntryCompareOp());
|
||||
|
||||
detail::RemoveDuplicatedCategories(ctx.gpu_id, info, cuts_ptr.DeviceSpan(), &sorted_entries,
|
||||
&weight, &columns_ptr);
|
||||
|
||||
auto const& h_cptr = cuts_ptr.ConstHostVector();
|
||||
ASSERT_EQ(h_cptr.back(), n_samples * 2 + n_categories);
|
||||
// check numerical
|
||||
for (std::size_t i = 0; i < n_samples; ++i) {
|
||||
ASSERT_EQ(weight[i], i * 3);
|
||||
}
|
||||
auto beg = n_samples + n_categories;
|
||||
for (std::size_t i = 0; i < n_samples; ++i) {
|
||||
ASSERT_EQ(weight[i + beg], i * 3 + 2);
|
||||
}
|
||||
// check categorical
|
||||
beg = n_samples;
|
||||
for (bst_cat_t i = 0; i < n_categories; ++i) {
|
||||
// all from the second column
|
||||
ASSERT_EQ(static_cast<bst_feature_t>(weight[i + beg]) % n_features, 1);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchMultipleColumns) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchMultipleColumnsWeights) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUitl, DeviceSketchWeights) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
@@ -222,8 +296,8 @@ TEST(HistUitl, DeviceSketchWeights) {
|
||||
h_weights.resize(num_rows);
|
||||
std::fill(h_weights.begin(), h_weights.end(), 1.0f);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto wcuts = DeviceSketch(0, weighted_dmat.get(), num_bins);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
auto wcuts = DeviceSketch(&ctx, weighted_dmat.get(), num_bins);
|
||||
ASSERT_EQ(cuts.MinValues(), wcuts.MinValues());
|
||||
ASSERT_EQ(cuts.Ptrs(), wcuts.Ptrs());
|
||||
ASSERT_EQ(cuts.Values(), wcuts.Values());
|
||||
@@ -234,14 +308,15 @@ TEST(HistUitl, DeviceSketchWeights) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchBatches) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_bins = 256;
|
||||
int num_rows = 5000;
|
||||
int batch_sizes[] = {0, 100, 1500, 6000};
|
||||
auto batch_sizes = {0, 100, 1500, 6000};
|
||||
int num_columns = 5;
|
||||
for (auto batch_size : batch_sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins, batch_size);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins, batch_size);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
|
||||
@@ -249,8 +324,8 @@ TEST(HistUtil, DeviceSketchBatches) {
|
||||
size_t batches = 16;
|
||||
auto x = GenerateRandom(num_rows * batches, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows * batches, num_columns);
|
||||
auto cuts_with_batches = DeviceSketch(0, dmat.get(), num_bins, num_rows);
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins, 0);
|
||||
auto cuts_with_batches = DeviceSketch(&ctx, dmat.get(), num_bins, num_rows);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins, 0);
|
||||
|
||||
auto const& cut_values_batched = cuts_with_batches.Values();
|
||||
auto const& cut_values = cuts.Values();
|
||||
@@ -261,15 +336,16 @@ TEST(HistUtil, DeviceSketchBatches) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchMultipleColumnsExternal) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns =5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
dmlc::TemporaryDirectory temp;
|
||||
auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, temp);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -277,8 +353,9 @@ TEST(HistUtil, DeviceSketchMultipleColumnsExternal) {
|
||||
|
||||
// See https://github.com/dmlc/xgboost/issues/5866.
|
||||
TEST(HistUtil, DeviceSketchExternalMemoryWithWeights) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
dmlc::TemporaryDirectory temp;
|
||||
for (auto num_rows : sizes) {
|
||||
@@ -286,7 +363,7 @@ TEST(HistUtil, DeviceSketchExternalMemoryWithWeights) {
|
||||
auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, temp);
|
||||
dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -299,7 +376,7 @@ auto MakeUnweightedCutsForTest(Adapter adapter, int32_t num_bins, float missing,
|
||||
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(), 0);
|
||||
MetaInfo info;
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
|
||||
sketch_container.MakeCuts(&batched_cuts);
|
||||
sketch_container.MakeCuts(&batched_cuts, info.IsColumnSplit());
|
||||
return batched_cuts;
|
||||
}
|
||||
|
||||
@@ -367,7 +444,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
HistogramCuts cuts;
|
||||
sketch_container.MakeCuts(&cuts);
|
||||
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
size_t bytes_required = detail::RequiredMemory(
|
||||
num_rows, num_columns, num_rows * num_columns, num_bins, false);
|
||||
EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required * 1.05);
|
||||
@@ -397,7 +474,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
|
||||
&sketch_container);
|
||||
|
||||
HistogramCuts cuts;
|
||||
sketch_container.MakeCuts(&cuts);
|
||||
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
ConsoleLogger::Configure({{"verbosity", "0"}});
|
||||
size_t bytes_required = detail::RequiredMemory(
|
||||
num_rows, num_columns, num_rows * num_columns, num_bins, true);
|
||||
@@ -430,7 +507,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &container);
|
||||
HistogramCuts cuts;
|
||||
container.MakeCuts(&cuts);
|
||||
container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
|
||||
thrust::sort(x.begin(), x.end());
|
||||
auto n_uniques = thrust::unique(x.begin(), x.end()) - x.begin();
|
||||
@@ -452,9 +529,9 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
|
||||
}
|
||||
|
||||
TEST(HistUtil, AdapterDeviceSketchCategorical) {
|
||||
int categorical_sizes[] = {2, 6, 8, 12};
|
||||
auto categorical_sizes = {2, 6, 8, 12};
|
||||
int num_bins = 256;
|
||||
int sizes[] = {25, 100, 1000};
|
||||
auto sizes = {25, 100, 1000};
|
||||
for (auto n : sizes) {
|
||||
for (auto num_categories : categorical_sizes) {
|
||||
auto x = GenerateRandomCategoricalSingleColumn(n, num_categories);
|
||||
@@ -469,8 +546,8 @@ TEST(HistUtil, AdapterDeviceSketchCategorical) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, AdapterDeviceSketchMultipleColumns) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
@@ -486,7 +563,7 @@ TEST(HistUtil, AdapterDeviceSketchMultipleColumns) {
|
||||
TEST(HistUtil, AdapterDeviceSketchBatches) {
|
||||
int num_bins = 256;
|
||||
int num_rows = 5000;
|
||||
int batch_sizes[] = {0, 100, 1500, 6000};
|
||||
auto batch_sizes = {0, 100, 1500, 6000};
|
||||
int num_columns = 5;
|
||||
for (auto batch_size : batch_sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
@@ -499,11 +576,7 @@ TEST(HistUtil, AdapterDeviceSketchBatches) {
|
||||
|
||||
namespace {
|
||||
auto MakeData(Context const* ctx, std::size_t n_samples, bst_feature_t n_features) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(ctx->gpu_id));
|
||||
#endif
|
||||
auto n = n_samples * n_features;
|
||||
std::vector<float> x;
|
||||
x.resize(n);
|
||||
@@ -571,14 +644,15 @@ TEST(HistUtil, GetColumnSize) {
|
||||
// Check sketching from adapter or DMatrix results in the same answer
|
||||
// Consistency here is useful for testing and user experience
|
||||
TEST(HistUtil, SketchingEquivalent) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto dmat_cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto dmat_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
|
||||
common::HistogramCuts adapter_cuts = MakeUnweightedCutsForTest(
|
||||
@@ -593,21 +667,25 @@ TEST(HistUtil, SketchingEquivalent) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchFromGroupWeights) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows = 3000, kCols = 200, kBins = 256;
|
||||
size_t constexpr kGroups = 10;
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
|
||||
|
||||
// sketch with group weight
|
||||
auto& h_weights = m->Info().weights_.HostVector();
|
||||
h_weights.resize(kRows);
|
||||
h_weights.resize(kGroups);
|
||||
std::fill(h_weights.begin(), h_weights.end(), 1.0f);
|
||||
std::vector<bst_group_t> groups(kGroups);
|
||||
for (size_t i = 0; i < kGroups; ++i) {
|
||||
groups[i] = kRows / kGroups;
|
||||
}
|
||||
m->SetInfo("group", groups.data(), DataType::kUInt32, kGroups);
|
||||
HistogramCuts weighted_cuts = DeviceSketch(0, m.get(), kBins, 0);
|
||||
HistogramCuts weighted_cuts = DeviceSketch(&ctx, m.get(), kBins, 0);
|
||||
|
||||
// sketch with no weight
|
||||
h_weights.clear();
|
||||
HistogramCuts cuts = DeviceSketch(0, m.get(), kBins, 0);
|
||||
HistogramCuts cuts = DeviceSketch(&ctx, m.get(), kBins, 0);
|
||||
|
||||
ASSERT_EQ(cuts.Values().size(), weighted_cuts.Values().size());
|
||||
ASSERT_EQ(cuts.MinValues().size(), weighted_cuts.MinValues().size());
|
||||
@@ -662,7 +740,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
&sketch_container);
|
||||
|
||||
common::HistogramCuts cuts;
|
||||
sketch_container.MakeCuts(&cuts);
|
||||
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
|
||||
auto dmat = GetDMatrixFromData(storage.HostVector(), kRows, kCols);
|
||||
if (with_group) {
|
||||
@@ -675,9 +753,10 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);
|
||||
ValidateCuts(cuts, dmat.get(), kBins);
|
||||
|
||||
auto cuda_ctx = MakeCUDACtx(0);
|
||||
if (with_group) {
|
||||
dmat->Info().weights_ = decltype(dmat->Info().weights_)(); // remove weight
|
||||
HistogramCuts non_weighted = DeviceSketch(0, dmat.get(), kBins, 0);
|
||||
HistogramCuts non_weighted = DeviceSketch(&cuda_ctx, dmat.get(), kBins, 0);
|
||||
for (size_t i = 0; i < cuts.Values().size(); ++i) {
|
||||
ASSERT_EQ(cuts.Values()[i], non_weighted.Values()[i]);
|
||||
}
|
||||
@@ -703,7 +782,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
SketchContainer sketch_container(ft, kBins, kCols, kRows, 0);
|
||||
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
sketch_container.MakeCuts(&weighted);
|
||||
sketch_container.MakeCuts(&weighted, info.IsColumnSplit());
|
||||
ValidateCuts(weighted, dmat.get(), kBins);
|
||||
}
|
||||
}
|
||||
@@ -712,5 +791,156 @@ TEST(HistUtil, AdapterSketchFromWeights) {
|
||||
TestAdapterSketchFromWeights(false);
|
||||
TestAdapterSketchFromWeights(true);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
namespace {
|
||||
class DeviceSketchWithHessianTest
|
||||
: public ::testing::TestWithParam<std::tuple<bool, bst_row_t, bst_bin_t>> {
|
||||
bst_feature_t n_features_ = 5;
|
||||
bst_group_t n_groups_{3};
|
||||
|
||||
auto GenerateHessian(Context const* ctx, bst_row_t n_samples) const {
|
||||
HostDeviceVector<float> hessian;
|
||||
auto& h_hess = hessian.HostVector();
|
||||
h_hess = GenerateRandomWeights(n_samples);
|
||||
std::mt19937 rng(0);
|
||||
std::shuffle(h_hess.begin(), h_hess.end(), rng);
|
||||
hessian.SetDevice(ctx->Device());
|
||||
return hessian;
|
||||
}
|
||||
|
||||
void CheckReg(Context const* ctx, std::shared_ptr<DMatrix> p_fmat, bst_bin_t n_bins,
|
||||
HostDeviceVector<float> const& hessian, std::vector<float> const& w,
|
||||
std::size_t n_elements) const {
|
||||
auto const& h_hess = hessian.ConstHostVector();
|
||||
{
|
||||
auto& h_weight = p_fmat->Info().weights_.HostVector();
|
||||
h_weight = w;
|
||||
}
|
||||
|
||||
HistogramCuts cuts_hess =
|
||||
DeviceSketchWithHessian(ctx, p_fmat.get(), n_bins, hessian.ConstDeviceSpan(), n_elements);
|
||||
ValidateCuts(cuts_hess, p_fmat.get(), n_bins);
|
||||
|
||||
// merge hessian
|
||||
{
|
||||
auto& h_weight = p_fmat->Info().weights_.HostVector();
|
||||
ASSERT_EQ(h_weight.size(), h_hess.size());
|
||||
for (std::size_t i = 0; i < h_weight.size(); ++i) {
|
||||
h_weight[i] = w[i] * h_hess[i];
|
||||
}
|
||||
}
|
||||
|
||||
HistogramCuts cuts_wh = DeviceSketch(ctx, p_fmat.get(), n_bins, n_elements);
|
||||
ValidateCuts(cuts_wh, p_fmat.get(), n_bins);
|
||||
ASSERT_EQ(cuts_hess.Values().size(), cuts_wh.Values().size());
|
||||
for (std::size_t i = 0; i < cuts_hess.Values().size(); ++i) {
|
||||
ASSERT_NEAR(cuts_wh.Values()[i], cuts_hess.Values()[i], kRtEps);
|
||||
}
|
||||
|
||||
p_fmat->Info().weights_.HostVector() = w;
|
||||
}
|
||||
|
||||
protected:
|
||||
Context ctx_ = MakeCUDACtx(0);
|
||||
|
||||
void TestLTR(Context const* ctx, bst_row_t n_samples, bst_bin_t n_bins,
|
||||
std::size_t n_elements) const {
|
||||
auto x = GenerateRandom(n_samples, n_features_);
|
||||
|
||||
std::vector<bst_group_t> gptr;
|
||||
gptr.resize(n_groups_ + 1, 0);
|
||||
gptr[1] = n_samples / n_groups_;
|
||||
gptr[2] = n_samples / n_groups_ + gptr[1];
|
||||
gptr.back() = n_samples;
|
||||
|
||||
auto hessian = this->GenerateHessian(ctx, n_samples);
|
||||
auto const& h_hess = hessian.ConstHostVector();
|
||||
auto p_fmat = GetDMatrixFromData(x, n_samples, n_features_);
|
||||
p_fmat->Info().group_ptr_ = gptr;
|
||||
|
||||
// test with constant group weight
|
||||
std::vector<float> w(n_groups_, 1.0f);
|
||||
p_fmat->Info().weights_.HostVector() = w;
|
||||
HistogramCuts cuts_hess =
|
||||
DeviceSketchWithHessian(ctx, p_fmat.get(), n_bins, hessian.ConstDeviceSpan(), n_elements);
|
||||
// make validation easier by converting it into sample weight.
|
||||
p_fmat->Info().weights_.HostVector() = h_hess;
|
||||
p_fmat->Info().group_ptr_.clear();
|
||||
ValidateCuts(cuts_hess, p_fmat.get(), n_bins);
|
||||
// restore ltr properties
|
||||
p_fmat->Info().weights_.HostVector() = w;
|
||||
p_fmat->Info().group_ptr_ = gptr;
|
||||
|
||||
// test with random group weight
|
||||
w = GenerateRandomWeights(n_groups_);
|
||||
p_fmat->Info().weights_.HostVector() = w;
|
||||
cuts_hess =
|
||||
DeviceSketchWithHessian(ctx, p_fmat.get(), n_bins, hessian.ConstDeviceSpan(), n_elements);
|
||||
// make validation easier by converting it into sample weight.
|
||||
p_fmat->Info().weights_.HostVector() = h_hess;
|
||||
p_fmat->Info().group_ptr_.clear();
|
||||
ValidateCuts(cuts_hess, p_fmat.get(), n_bins);
|
||||
|
||||
// merge hessian with sample weight
|
||||
p_fmat->Info().weights_.Resize(n_samples);
|
||||
p_fmat->Info().group_ptr_.clear();
|
||||
for (std::size_t i = 0; i < h_hess.size(); ++i) {
|
||||
auto gidx = dh::SegmentId(Span{gptr.data(), gptr.size()}, i);
|
||||
p_fmat->Info().weights_.HostVector()[i] = w[gidx] * h_hess[i];
|
||||
}
|
||||
auto cuts = DeviceSketch(ctx, p_fmat.get(), n_bins, n_elements);
|
||||
ValidateCuts(cuts, p_fmat.get(), n_bins);
|
||||
ASSERT_EQ(cuts.Values().size(), cuts_hess.Values().size());
|
||||
for (std::size_t i = 0; i < cuts.Values().size(); ++i) {
|
||||
EXPECT_NEAR(cuts.Values()[i], cuts_hess.Values()[i], 1e-4f);
|
||||
}
|
||||
}
|
||||
|
||||
void TestRegression(Context const* ctx, bst_row_t n_samples, bst_bin_t n_bins,
|
||||
std::size_t n_elements) const {
|
||||
auto x = GenerateRandom(n_samples, n_features_);
|
||||
auto p_fmat = GetDMatrixFromData(x, n_samples, n_features_);
|
||||
std::vector<float> w = GenerateRandomWeights(n_samples);
|
||||
|
||||
auto hessian = this->GenerateHessian(ctx, n_samples);
|
||||
|
||||
this->CheckReg(ctx, p_fmat, n_bins, hessian, w, n_elements);
|
||||
}
|
||||
};
|
||||
|
||||
auto MakeParamsForTest() {
|
||||
std::vector<bst_row_t> sizes = {1, 2, 256, 512, 1000, 1500};
|
||||
std::vector<bst_bin_t> bin_sizes = {2, 16, 256, 512};
|
||||
std::vector<std::tuple<bool, bst_row_t, bst_bin_t>> configs;
|
||||
for (auto n_samples : sizes) {
|
||||
for (auto n_bins : bin_sizes) {
|
||||
configs.emplace_back(true, n_samples, n_bins);
|
||||
configs.emplace_back(false, n_samples, n_bins);
|
||||
}
|
||||
}
|
||||
return configs;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST_P(DeviceSketchWithHessianTest, DeviceSketchWithHessian) {
|
||||
auto param = GetParam();
|
||||
auto n_samples = std::get<1>(param);
|
||||
auto n_bins = std::get<2>(param);
|
||||
if (std::get<0>(param)) {
|
||||
this->TestLTR(&ctx_, n_samples, n_bins, 0);
|
||||
this->TestLTR(&ctx_, n_samples, n_bins, 512);
|
||||
} else {
|
||||
this->TestRegression(&ctx_, n_samples, n_bins, 0);
|
||||
this->TestRegression(&ctx_, n_samples, n_bins, 512);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
HistUtil, DeviceSketchWithHessianTest, ::testing::ValuesIn(MakeParamsForTest()),
|
||||
[](::testing::TestParamInfo<DeviceSketchWithHessianTest::ParamType> const& info) {
|
||||
auto task = std::get<0>(info.param) ? "ltr" : "reg";
|
||||
auto n_samples = std::to_string(std::get<1>(info.param));
|
||||
auto n_bins = std::to_string(std::get<2>(info.param));
|
||||
return std::string{task} + "_" + n_samples + "_" + n_bins;
|
||||
});
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -19,15 +19,9 @@ namespace {
|
||||
void SetDeviceForTest(int device) {
|
||||
int n_devices;
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaGetDeviceCount(&n_devices));
|
||||
device %= n_devices;
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipGetDeviceCount(&n_devices));
|
||||
device %= n_devices;
|
||||
dh::safe_cuda(hipSetDevice(device));
|
||||
#endif
|
||||
}
|
||||
} // namespace
|
||||
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
/*!
|
||||
* Copyright (c) by XGBoost Contributors 2019
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <cstddef> // for size_t
|
||||
#include <fstream> // for ofstream
|
||||
|
||||
#include "../../../src/common/io.h"
|
||||
#include "../helpers.h"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace xgboost::common {
|
||||
TEST(MemoryFixSizeBuffer, Seek) {
|
||||
size_t constexpr kSize { 64 };
|
||||
std::vector<int32_t> memory( kSize );
|
||||
@@ -89,5 +89,138 @@ TEST(IO, LoadSequentialFile) {
|
||||
|
||||
ASSERT_THROW(LoadSequentialFile("non-exist", true), dmlc::Error);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
TEST(IO, Resource) {
|
||||
{
|
||||
// test malloc basic
|
||||
std::size_t n = 128;
|
||||
std::shared_ptr<ResourceHandler> resource = std::make_shared<MallocResource>(n);
|
||||
ASSERT_EQ(resource->Size(), n);
|
||||
ASSERT_EQ(resource->Type(), ResourceHandler::kMalloc);
|
||||
}
|
||||
|
||||
// test malloc resize
|
||||
auto test_malloc_resize = [](bool force_malloc) {
|
||||
std::size_t n = 64;
|
||||
std::shared_ptr<ResourceHandler> resource = std::make_shared<MallocResource>(n);
|
||||
auto ptr = reinterpret_cast<std::uint8_t *>(resource->Data());
|
||||
std::iota(ptr, ptr + n, 0);
|
||||
|
||||
auto malloc_resource = std::dynamic_pointer_cast<MallocResource>(resource);
|
||||
ASSERT_TRUE(malloc_resource);
|
||||
if (force_malloc) {
|
||||
malloc_resource->Resize<true>(n * 2);
|
||||
} else {
|
||||
malloc_resource->Resize<false>(n * 2);
|
||||
}
|
||||
for (std::size_t i = 0; i < n; ++i) {
|
||||
ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], i) << force_malloc;
|
||||
}
|
||||
for (std::size_t i = n; i < 2 * n; ++i) {
|
||||
ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], 0);
|
||||
}
|
||||
|
||||
ptr = malloc_resource->DataAs<std::uint8_t>();
|
||||
std::fill_n(ptr, malloc_resource->Size(), 7);
|
||||
if (force_malloc) {
|
||||
malloc_resource->Resize<true>(n * 3, std::byte{3});
|
||||
} else {
|
||||
malloc_resource->Resize<false>(n * 3, std::byte{3});
|
||||
}
|
||||
for (std::size_t i = 0; i < n * 2; ++i) {
|
||||
ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], 7);
|
||||
}
|
||||
for (std::size_t i = n * 2; i < n * 3; ++i) {
|
||||
ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], 3);
|
||||
}
|
||||
};
|
||||
test_malloc_resize(true);
|
||||
test_malloc_resize(false);
|
||||
|
||||
{
|
||||
// test mmap
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto path = tmpdir.path + "/testfile";
|
||||
|
||||
std::ofstream fout(path, std::ios::binary);
|
||||
double val{1.0};
|
||||
fout.write(reinterpret_cast<char const *>(&val), sizeof(val));
|
||||
fout << 1.0 << std::endl;
|
||||
fout.close();
|
||||
|
||||
auto resource = std::shared_ptr<MmapResource>{
|
||||
new MmapResource{path, 0, sizeof(double)}};
|
||||
ASSERT_EQ(resource->Size(), sizeof(double));
|
||||
ASSERT_EQ(resource->Type(), ResourceHandler::kMmap);
|
||||
ASSERT_EQ(resource->DataAs<double>()[0], val);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IO, PrivateMmapStream) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
auto path = tempdir.path + "/testfile";
|
||||
|
||||
// The page size on Linux is usually set to 4096, while the allocation granularity on
|
||||
// the Windows machine where this test is writted is 65536. We span the test to cover
|
||||
// all of them.
|
||||
std::size_t n_batches{64};
|
||||
std::size_t multiplier{2048};
|
||||
|
||||
std::vector<std::vector<std::int32_t>> batches;
|
||||
std::vector<std::size_t> offset{0ul};
|
||||
|
||||
using T = std::int32_t;
|
||||
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
|
||||
for (std::size_t i = 0; i < n_batches; ++i) {
|
||||
std::size_t size = (i + 1) * multiplier;
|
||||
std::vector<T> data(size, 0);
|
||||
std::iota(data.begin(), data.end(), i * i);
|
||||
|
||||
fo->Write(static_cast<std::uint64_t>(data.size()));
|
||||
fo->Write(data.data(), data.size() * sizeof(T));
|
||||
|
||||
std::size_t bytes = sizeof(std::uint64_t) + data.size() * sizeof(T);
|
||||
offset.push_back(bytes);
|
||||
|
||||
batches.emplace_back(std::move(data));
|
||||
}
|
||||
}
|
||||
|
||||
// Turn size info offset
|
||||
std::partial_sum(offset.begin(), offset.end(), offset.begin());
|
||||
|
||||
// Test read
|
||||
for (std::size_t i = 0; i < n_batches; ++i) {
|
||||
std::size_t off = offset[i];
|
||||
std::size_t n = offset.at(i + 1) - offset[i];
|
||||
auto fi{std::make_unique<PrivateMmapConstStream>(path, off, n)};
|
||||
std::vector<T> data;
|
||||
|
||||
std::uint64_t size{0};
|
||||
ASSERT_TRUE(fi->Read(&size));
|
||||
ASSERT_EQ(fi->Tell(), sizeof(size));
|
||||
data.resize(size);
|
||||
|
||||
ASSERT_EQ(fi->Read(data.data(), size * sizeof(T)), size * sizeof(T));
|
||||
ASSERT_EQ(data, batches[i]);
|
||||
}
|
||||
|
||||
// Test consume
|
||||
for (std::size_t i = 0; i < n_batches; ++i) {
|
||||
std::size_t off = offset[i];
|
||||
std::size_t n = offset.at(i + 1) - offset[i];
|
||||
std::unique_ptr<AlignedResourceReadStream> fi{std::make_unique<PrivateMmapConstStream>(path, off, n)};
|
||||
std::vector<T> data;
|
||||
|
||||
std::uint64_t size{0};
|
||||
ASSERT_TRUE(fi->Consume(&size));
|
||||
ASSERT_EQ(fi->Tell(), sizeof(size));
|
||||
data.resize(size);
|
||||
|
||||
ASSERT_EQ(fi->Read(data.data(), size * sizeof(T)), sizeof(T) * size);
|
||||
ASSERT_EQ(data, batches[i]);
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -41,7 +41,6 @@ std::string GetModelStr() {
|
||||
"num_class": "0",
|
||||
"num_feature": "10",
|
||||
"objective": "reg:linear",
|
||||
"predictor": "gpu_predictor",
|
||||
"tree_method": "gpu_hist",
|
||||
"updater": "grow_gpu_hist"
|
||||
},
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
#include "../../../src/common/linalg_op.hip.h"
|
||||
#endif
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/context.h"
|
||||
#include "xgboost/linalg.h"
|
||||
|
||||
@@ -58,8 +59,7 @@ void TestElementWiseKernel() {
|
||||
}
|
||||
|
||||
void TestSlice() {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 1;
|
||||
auto ctx = MakeCUDACtx(1);
|
||||
thrust::device_vector<double> data(2 * 3 * 4);
|
||||
auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4);
|
||||
dh::LaunchN(1, [=] __device__(size_t) {
|
||||
|
||||
@@ -1,16 +1,22 @@
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include "test_quantile.h"
|
||||
#include "../helpers.h"
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
#include "../../../src/collective/communicator-inl.cuh"
|
||||
#include "../../../src/common/hist_util.cuh"
|
||||
#include "../../../src/common/quantile.cuh"
|
||||
#include "../../../src/data/device_adapter.cuh" // CupyAdapter
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
#include "../../../src/collective/communicator-inl.hip.h"
|
||||
#include "../../../src/common/hist_util.hip.h"
|
||||
#include "../../../src/common/quantile.hip.h"
|
||||
#include "../../../src/data/device_adapter.hip.h" // CupyAdapter
|
||||
#endif
|
||||
|
||||
#include "../helpers.h"
|
||||
#include "test_quantile.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace {
|
||||
struct IsSorted {
|
||||
@@ -20,6 +26,9 @@ struct IsSorted {
|
||||
};
|
||||
}
|
||||
namespace common {
|
||||
|
||||
class MGPUQuantileTest : public BaseMGPUTest {};
|
||||
|
||||
TEST(GPUQuantile, Basic) {
|
||||
constexpr size_t kRows = 1000, kCols = 100, kBins = 256;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
@@ -86,11 +95,7 @@ TEST(GPUQuantile, Unique) {
|
||||
// if with_error is true, the test tolerates floating point error
|
||||
void TestQuantileElemRank(int32_t device, Span<SketchEntry const> in,
|
||||
Span<bst_row_t const> d_columns_ptr, bool with_error = false) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(device));
|
||||
#endif
|
||||
std::vector<SketchEntry> h_in(in.size());
|
||||
dh::CopyDeviceSpanToVector(&h_in, in);
|
||||
std::vector<bst_row_t> h_columns_ptr(d_columns_ptr.size());
|
||||
@@ -349,12 +354,11 @@ TEST(GPUQuantile, MultiMerge) {
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestAllReduceBasic(int32_t n_gpus) {
|
||||
void TestAllReduceBasic() {
|
||||
auto const world = collective::GetWorldSize();
|
||||
CHECK_EQ(world, n_gpus);
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
auto const device = collective::GetRank();
|
||||
auto const device = GPUIDX;
|
||||
|
||||
// Set up single node version;
|
||||
HostDeviceVector<FeatureType> ft({}, device);
|
||||
@@ -398,7 +402,7 @@ void TestAllReduceBasic(int32_t n_gpus) {
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_distributed);
|
||||
sketch_distributed.AllReduce();
|
||||
sketch_distributed.AllReduce(false);
|
||||
sketch_distributed.Unique();
|
||||
|
||||
ASSERT_EQ(sketch_distributed.ColumnsPtr().size(),
|
||||
@@ -427,23 +431,66 @@ void TestAllReduceBasic(int32_t n_gpus) {
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(GPUQuantile, MGPUAllReduceBasic) {
|
||||
auto const n_gpus = AllVisibleGPUs();
|
||||
if (n_gpus <= 1) {
|
||||
GTEST_SKIP() << "Skipping MGPUAllReduceBasic test with # GPUs = " << n_gpus;
|
||||
}
|
||||
RunWithInMemoryCommunicator(n_gpus, TestAllReduceBasic, n_gpus);
|
||||
TEST_F(MGPUQuantileTest, AllReduceBasic) {
|
||||
DoTest(TestAllReduceBasic);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestSameOnAllWorkers(std::int32_t n_gpus) {
|
||||
void TestColumnSplitBasic() {
|
||||
auto const world = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
std::size_t constexpr kRows = 1000, kCols = 100, kBins = 64;
|
||||
|
||||
auto m = std::unique_ptr<DMatrix>{[=]() {
|
||||
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
|
||||
return dmat->SliceCol(world, rank);
|
||||
}()};
|
||||
|
||||
// Generate cuts for distributed environment.
|
||||
auto ctx = MakeCUDACtx(GPUIDX);
|
||||
HistogramCuts distributed_cuts = common::DeviceSketch(&ctx, m.get(), kBins);
|
||||
|
||||
// Generate cuts for single node environment
|
||||
collective::Finalize();
|
||||
CHECK_EQ(collective::GetWorldSize(), 1);
|
||||
HistogramCuts single_node_cuts = common::DeviceSketch(&ctx, m.get(), kBins);
|
||||
|
||||
auto const& sptrs = single_node_cuts.Ptrs();
|
||||
auto const& dptrs = distributed_cuts.Ptrs();
|
||||
auto const& svals = single_node_cuts.Values();
|
||||
auto const& dvals = distributed_cuts.Values();
|
||||
auto const& smins = single_node_cuts.MinValues();
|
||||
auto const& dmins = distributed_cuts.MinValues();
|
||||
|
||||
EXPECT_EQ(sptrs.size(), dptrs.size());
|
||||
for (size_t i = 0; i < sptrs.size(); ++i) {
|
||||
EXPECT_EQ(sptrs[i], dptrs[i]) << "rank: " << rank << ", i: " << i;
|
||||
}
|
||||
|
||||
EXPECT_EQ(svals.size(), dvals.size());
|
||||
for (size_t i = 0; i < svals.size(); ++i) {
|
||||
EXPECT_NEAR(svals[i], dvals[i], 2e-2f) << "rank: " << rank << ", i: " << i;
|
||||
}
|
||||
|
||||
EXPECT_EQ(smins.size(), dmins.size());
|
||||
for (size_t i = 0; i < smins.size(); ++i) {
|
||||
EXPECT_FLOAT_EQ(smins[i], dmins[i]) << "rank: " << rank << ", i: " << i;
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST_F(MGPUQuantileTest, ColumnSplitBasic) {
|
||||
DoTest(TestColumnSplitBasic);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestSameOnAllWorkers() {
|
||||
auto world = collective::GetWorldSize();
|
||||
CHECK_EQ(world, n_gpus);
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins,
|
||||
MetaInfo const &info) {
|
||||
auto const rank = collective::GetRank();
|
||||
auto const device = rank;
|
||||
auto const device = GPUIDX;
|
||||
HostDeviceVector<FeatureType> ft({}, device);
|
||||
SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, device);
|
||||
HostDeviceVector<float> storage({}, device);
|
||||
@@ -455,7 +502,7 @@ void TestSameOnAllWorkers(std::int32_t n_gpus) {
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_distributed);
|
||||
sketch_distributed.AllReduce();
|
||||
sketch_distributed.AllReduce(false);
|
||||
sketch_distributed.Unique();
|
||||
TestQuantileElemRank(device, sketch_distributed.Data(), sketch_distributed.ColumnsPtr(), true);
|
||||
|
||||
@@ -497,12 +544,8 @@ void TestSameOnAllWorkers(std::int32_t n_gpus) {
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(GPUQuantile, MGPUSameOnAllWorkers) {
|
||||
auto const n_gpus = AllVisibleGPUs();
|
||||
if (n_gpus <= 1) {
|
||||
GTEST_SKIP() << "Skipping MGPUSameOnAllWorkers test with # GPUs = " << n_gpus;
|
||||
}
|
||||
RunWithInMemoryCommunicator(n_gpus, TestSameOnAllWorkers, n_gpus);
|
||||
TEST_F(MGPUQuantileTest, SameOnAllWorkers) {
|
||||
DoTest(TestSameOnAllWorkers);
|
||||
}
|
||||
|
||||
TEST(GPUQuantile, Push) {
|
||||
|
||||
@@ -30,8 +30,7 @@
|
||||
|
||||
namespace xgboost::ltr {
|
||||
void TestCalcQueriesInvIDCG() {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
std::size_t n_groups = 5, n_samples_per_group = 32;
|
||||
|
||||
dh::device_vector<float> scores(n_samples_per_group * n_groups);
|
||||
@@ -92,20 +91,17 @@ void TestRankingCache(Context const* ctx) {
|
||||
} // namespace
|
||||
|
||||
TEST(RankingCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestRankingCache(&ctx);
|
||||
}
|
||||
|
||||
TEST(NDCGCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestNDCGCache(&ctx);
|
||||
}
|
||||
|
||||
TEST(MAPCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestMAPCache(&ctx);
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
108
tests/cpp/common/test_ref_resource_view.cc
Normal file
108
tests/cpp/common/test_ref_resource_view.cc
Normal file
@@ -0,0 +1,108 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
#include <memory> // for make_shared, make_unique
|
||||
#include <numeric> // for iota
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/ref_resource_view.h"
|
||||
#include "dmlc/filesystem.h" // for TemporaryDirectory
|
||||
|
||||
namespace xgboost::common {
|
||||
TEST(RefResourceView, Basic) {
|
||||
std::size_t n_bytes = 1024;
|
||||
auto mem = std::make_shared<MallocResource>(n_bytes);
|
||||
{
|
||||
RefResourceView view{reinterpret_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem};
|
||||
|
||||
RefResourceView kview{reinterpret_cast<float const*>(mem->Data()), mem->Size() / sizeof(float),
|
||||
mem};
|
||||
ASSERT_EQ(mem.use_count(), 3);
|
||||
ASSERT_EQ(view.size(), n_bytes / sizeof(1024));
|
||||
ASSERT_EQ(kview.size(), n_bytes / sizeof(1024));
|
||||
}
|
||||
{
|
||||
RefResourceView view{reinterpret_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem,
|
||||
1.5f};
|
||||
for (auto v : view) {
|
||||
ASSERT_EQ(v, 1.5f);
|
||||
}
|
||||
std::iota(view.begin(), view.end(), 0.0f);
|
||||
ASSERT_EQ(view.front(), 0.0f);
|
||||
ASSERT_EQ(view.back(), static_cast<float>(view.size() - 1));
|
||||
|
||||
view.front() = 1.0f;
|
||||
view.back() = 2.0f;
|
||||
ASSERT_EQ(view.front(), 1.0f);
|
||||
ASSERT_EQ(view.back(), 2.0f);
|
||||
}
|
||||
ASSERT_EQ(mem.use_count(), 1);
|
||||
}
|
||||
|
||||
TEST(RefResourceView, IO) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto path = tmpdir.path + "/testfile";
|
||||
auto data = MakeFixedVecWithMalloc(123, std::size_t{1});
|
||||
|
||||
{
|
||||
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
ASSERT_EQ(fo->Write(data.data(), data.size_bytes()), data.size_bytes());
|
||||
}
|
||||
{
|
||||
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
ASSERT_EQ(WriteVec(fo.get(), data),
|
||||
data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
|
||||
}
|
||||
{
|
||||
auto fi = std::make_unique<PrivateMmapConstStream>(
|
||||
path, 0, data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
|
||||
auto read = MakeFixedVecWithMalloc(123, std::size_t{1});
|
||||
ASSERT_TRUE(ReadVec(fi.get(), &read));
|
||||
for (auto v : read) {
|
||||
ASSERT_EQ(v, 1ul);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RefResourceView, IOAligned) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto path = tmpdir.path + "/testfile";
|
||||
auto data = MakeFixedVecWithMalloc(123, 1.0f);
|
||||
|
||||
{
|
||||
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
// + sizeof(float) for alignment
|
||||
ASSERT_EQ(WriteVec(fo.get(), data),
|
||||
data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type) + sizeof(float));
|
||||
}
|
||||
{
|
||||
auto fi = std::make_unique<PrivateMmapConstStream>(
|
||||
path, 0, data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
|
||||
// wrong type, float vs. double
|
||||
auto read = MakeFixedVecWithMalloc(123, 2.0);
|
||||
ASSERT_FALSE(ReadVec(fi.get(), &read));
|
||||
}
|
||||
{
|
||||
auto fi = std::make_unique<PrivateMmapConstStream>(
|
||||
path, 0, data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
|
||||
auto read = MakeFixedVecWithMalloc(123, 2.0f);
|
||||
ASSERT_TRUE(ReadVec(fi.get(), &read));
|
||||
for (auto v : read) {
|
||||
ASSERT_EQ(v, 1ul);
|
||||
}
|
||||
}
|
||||
{
|
||||
// Test std::vector
|
||||
std::vector<float> data(123);
|
||||
std::iota(data.begin(), data.end(), 0.0f);
|
||||
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
// + sizeof(float) for alignment
|
||||
ASSERT_EQ(WriteVec(fo.get(), data), data.size() * sizeof(float) +
|
||||
sizeof(RefResourceView<std::size_t>::size_type) +
|
||||
sizeof(float));
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::common
|
||||
@@ -25,37 +25,20 @@ struct TestStatus {
|
||||
|
||||
public:
|
||||
TestStatus () {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaMalloc(&status_, sizeof(int)));
|
||||
int h_status = 1;
|
||||
dh::safe_cuda(cudaMemcpy(status_, &h_status,
|
||||
sizeof(int), cudaMemcpyHostToDevice));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipMalloc(&status_, sizeof(int)));
|
||||
int h_status = 1;
|
||||
dh::safe_cuda(hipMemcpy(status_, &h_status,
|
||||
sizeof(int), hipMemcpyHostToDevice));
|
||||
#endif
|
||||
}
|
||||
~TestStatus() {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaFree(status_));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipFree(status_));
|
||||
#endif
|
||||
}
|
||||
|
||||
int Get() {
|
||||
int h_status;
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaMemcpy(&h_status, status_,
|
||||
sizeof(int), cudaMemcpyDeviceToHost));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipMemcpy(&h_status, status_,
|
||||
sizeof(int), hipMemcpyDeviceToHost));
|
||||
#endif
|
||||
|
||||
return h_status;
|
||||
}
|
||||
|
||||
@@ -112,22 +95,14 @@ TEST(GPUSpan, FromOther) {
|
||||
}
|
||||
|
||||
TEST(GPUSpan, Assignment) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
TestStatus status;
|
||||
dh::LaunchN(16, TestAssignment{status.Data()});
|
||||
ASSERT_EQ(status.Get(), 1);
|
||||
}
|
||||
|
||||
TEST(GPUSpan, TestStatus) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
TestStatus status;
|
||||
dh::LaunchN(16, TestTestStatus{status.Data()});
|
||||
ASSERT_EQ(status.Get(), -1);
|
||||
@@ -150,11 +125,7 @@ struct TestEqual {
|
||||
};
|
||||
|
||||
TEST(GPUSpan, WithTrust) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
// Not adviced to initialize span with host_vector, since h_vec.data() is
|
||||
// a host function.
|
||||
thrust::host_vector<float> h_vec (16);
|
||||
@@ -191,22 +162,14 @@ TEST(GPUSpan, WithTrust) {
|
||||
}
|
||||
|
||||
TEST(GPUSpan, BeginEnd) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
TestStatus status;
|
||||
dh::LaunchN(16, TestBeginEnd{status.Data()});
|
||||
ASSERT_EQ(status.Get(), 1);
|
||||
}
|
||||
|
||||
TEST(GPUSpan, RBeginREnd) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
TestStatus status;
|
||||
dh::LaunchN(16, TestRBeginREnd{status.Data()});
|
||||
ASSERT_EQ(status.Get(), 1);
|
||||
@@ -238,22 +201,14 @@ TEST(GPUSpan, Modify) {
|
||||
}
|
||||
|
||||
TEST(GPUSpan, Observers) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
TestStatus status;
|
||||
dh::LaunchN(16, TestObservers{status.Data()});
|
||||
ASSERT_EQ(status.Get(), 1);
|
||||
}
|
||||
|
||||
TEST(GPUSpan, Compare) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
TestStatus status;
|
||||
dh::LaunchN(16, TestIterCompare{status.Data()});
|
||||
ASSERT_EQ(status.Get(), 1);
|
||||
@@ -273,11 +228,7 @@ struct TestElementAccess {
|
||||
};
|
||||
|
||||
TEST(GPUSpanDeathTest, ElementAccess) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
auto test_element_access = []() {
|
||||
thrust::host_vector<float> h_vec (16);
|
||||
InitializeRange(h_vec.begin(), h_vec.end());
|
||||
@@ -375,13 +326,8 @@ void TestFrontBack() {
|
||||
// make sure the termination happens inside this test.
|
||||
try {
|
||||
dh::LaunchN(1, [=] __device__(size_t) { s.front(); });
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipDeviceSynchronize());
|
||||
dh::safe_cuda(hipGetLastError());
|
||||
#endif
|
||||
} catch (dmlc::Error const& e) {
|
||||
std::terminate();
|
||||
}
|
||||
@@ -391,13 +337,8 @@ void TestFrontBack() {
|
||||
{
|
||||
try {
|
||||
dh::LaunchN(1, [=] __device__(size_t) { s.back(); });
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipDeviceSynchronize());
|
||||
dh::safe_cuda(hipGetLastError());
|
||||
#endif
|
||||
} catch (dmlc::Error const& e) {
|
||||
std::terminate();
|
||||
}
|
||||
@@ -447,66 +388,42 @@ TEST(GPUSpanDeathTest, Subspan) {
|
||||
}
|
||||
|
||||
TEST(GPUSpanIter, Construct) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
TestStatus status;
|
||||
dh::LaunchN(16, TestIterConstruct{status.Data()});
|
||||
ASSERT_EQ(status.Get(), 1);
|
||||
}
|
||||
|
||||
TEST(GPUSpanIter, Ref) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
TestStatus status;
|
||||
dh::LaunchN(16, TestIterRef{status.Data()});
|
||||
ASSERT_EQ(status.Get(), 1);
|
||||
}
|
||||
|
||||
TEST(GPUSpanIter, Calculate) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
TestStatus status;
|
||||
dh::LaunchN(16, TestIterCalculate{status.Data()});
|
||||
ASSERT_EQ(status.Get(), 1);
|
||||
}
|
||||
|
||||
TEST(GPUSpanIter, Compare) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
TestStatus status;
|
||||
dh::LaunchN(16, TestIterCompare{status.Data()});
|
||||
ASSERT_EQ(status.Get(), 1);
|
||||
}
|
||||
|
||||
TEST(GPUSpan, AsBytes) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
TestStatus status;
|
||||
dh::LaunchN(16, TestAsBytes{status.Data()});
|
||||
ASSERT_EQ(status.Get(), 1);
|
||||
}
|
||||
|
||||
TEST(GPUSpan, AsWritableBytes) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
TestStatus status;
|
||||
dh::LaunchN(16, TestAsWritableBytes{status.Data()});
|
||||
ASSERT_EQ(status.Get(), 1);
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "../../../src/common/stats.h"
|
||||
#include "../../../src/common/transform_iterator.h" // common::MakeIndexTransformIter
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@@ -71,7 +72,7 @@ TEST(Stats, Median) {
|
||||
ASSERT_EQ(m, .5f);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
ctx.gpu_id = 0;
|
||||
ctx = ctx.MakeCUDA(0);
|
||||
ASSERT_FALSE(ctx.IsCPU());
|
||||
Median(&ctx, values, weights, &out);
|
||||
m = out(0);
|
||||
@@ -80,7 +81,7 @@ TEST(Stats, Median) {
|
||||
}
|
||||
|
||||
{
|
||||
ctx.gpu_id = Context::kCpuId;
|
||||
ctx = ctx.MakeCPU();
|
||||
// 4x2 matrix
|
||||
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.gpu_id};
|
||||
HostDeviceVector<float> weights;
|
||||
@@ -90,7 +91,7 @@ TEST(Stats, Median) {
|
||||
ASSERT_EQ(out(1), .5f);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
ctx.gpu_id = 0;
|
||||
ctx = ctx.MakeCUDA(0);
|
||||
Median(&ctx, values, weights, &out);
|
||||
ASSERT_EQ(out(0), .5f);
|
||||
ASSERT_EQ(out(1), .5f);
|
||||
@@ -123,8 +124,7 @@ TEST(Stats, Mean) {
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
TEST(Stats, GPUMean) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestMean(&ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <cstddef> // std::size_t
|
||||
#include <utility> // std::pair
|
||||
#include <vector> // std::vector
|
||||
#include <cstddef> // std::size_t
|
||||
#include <utility> // std::pair
|
||||
#include <vector> // std::vector
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
#include "../../../src/common/linalg_op.cuh" // ElementWiseTransformDevice
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "../../../src/common/linalg_op.hip.h" // ElementWiseTransformDevice
|
||||
#include "../../../src/common/stats.hip.h"
|
||||
#endif
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/base.h" // XGBOOST_DEVICE
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
@@ -38,7 +39,7 @@ class StatsGPU : public ::testing::Test {
|
||||
}
|
||||
|
||||
public:
|
||||
void SetUp() override { ctx_.gpu_id = 0; }
|
||||
void SetUp() override { ctx_ = MakeCUDACtx(0); }
|
||||
|
||||
void WeightedMulti() {
|
||||
// data for one segment
|
||||
|
||||
@@ -40,25 +40,13 @@ TEST(ArrayInterface, Stream) {
|
||||
TEST(ArrayInterface, Ptr) {
|
||||
std::vector<float> h_data(10);
|
||||
ASSERT_FALSE(ArrayInterfaceHandler::IsCudaPtr(h_data.data()));
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipGetLastError());
|
||||
#endif
|
||||
|
||||
dh::device_vector<float> d_data(10);
|
||||
ASSERT_TRUE(ArrayInterfaceHandler::IsCudaPtr(d_data.data().get()));
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipGetLastError());
|
||||
#endif
|
||||
|
||||
ASSERT_FALSE(ArrayInterfaceHandler::IsCudaPtr(nullptr));
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipGetLastError());
|
||||
#endif
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -51,11 +51,7 @@ void TestCudfAdapter()
|
||||
}
|
||||
});
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipDeviceSynchronize());
|
||||
#endif
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
#include "../../../src/data/ellpack_page.hip.h"
|
||||
#endif
|
||||
#include "../../../src/data/ellpack_page.h"
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../helpers.h"
|
||||
#include "../histogram_helpers.h"
|
||||
@@ -233,11 +234,7 @@ TEST(EllpackPage, Compact) {
|
||||
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0),
|
||||
current_row, row_d.data().get()));
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipDeviceSynchronize());
|
||||
#endif
|
||||
thrust::copy(row_d.begin(), row_d.end(), row.begin());
|
||||
|
||||
dh::LaunchN(kCols,
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include "../../../src/common/io.h" // for PrivateMmapConstStream, AlignedResourceReadStream...
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
@@ -14,8 +15,7 @@
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
namespace xgboost::data {
|
||||
TEST(EllpackPageRawFormat, IO) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
@@ -26,15 +26,17 @@ TEST(EllpackPageRawFormat, IO) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string path = tmpdir.path + "/ellpack.page";
|
||||
|
||||
std::size_t n_bytes{0};
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
|
||||
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
format->Write(ellpack, fo.get());
|
||||
n_bytes += format->Write(ellpack, fo.get());
|
||||
}
|
||||
}
|
||||
|
||||
EllpackPage page;
|
||||
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
|
||||
std::unique_ptr<common::AlignedResourceReadStream> fi{
|
||||
std::make_unique<common::PrivateMmapConstStream>(path.c_str(), 0, n_bytes)};
|
||||
format->Read(&page, fi.get());
|
||||
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
@@ -48,5 +50,4 @@ TEST(EllpackPageRawFormat, IO) {
|
||||
ASSERT_EQ(loaded->gidx_buffer.HostVector(), orig->gidx_buffer.HostVector());
|
||||
}
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -26,28 +26,32 @@
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
TEST(GradientIndex, ExternalMemory) {
|
||||
namespace xgboost::data {
|
||||
TEST(GradientIndex, ExternalMemoryBaseRowID) {
|
||||
Context ctx;
|
||||
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(10000);
|
||||
auto p_fmat = RandomDataGenerator{4096, 256, 0.5}
|
||||
.Device(ctx.gpu_id)
|
||||
.Batches(8)
|
||||
.GenerateSparsePageDMatrix("cache", true);
|
||||
|
||||
std::vector<size_t> base_rowids;
|
||||
std::vector<float> hessian(dmat->Info().num_row_, 1);
|
||||
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, true})) {
|
||||
std::vector<float> hessian(p_fmat->Info().num_row_, 1);
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, true})) {
|
||||
base_rowids.push_back(page.base_rowid);
|
||||
}
|
||||
size_t i = 0;
|
||||
for (auto const &page : dmat->GetBatches<SparsePage>()) {
|
||||
|
||||
std::size_t i = 0;
|
||||
for (auto const &page : p_fmat->GetBatches<SparsePage>()) {
|
||||
ASSERT_EQ(base_rowids[i], page.base_rowid);
|
||||
++i;
|
||||
}
|
||||
|
||||
base_rowids.clear();
|
||||
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, false})) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, false})) {
|
||||
base_rowids.push_back(page.base_rowid);
|
||||
}
|
||||
i = 0;
|
||||
for (auto const &page : dmat->GetBatches<SparsePage>()) {
|
||||
for (auto const &page : p_fmat->GetBatches<SparsePage>()) {
|
||||
ASSERT_EQ(base_rowids[i], page.base_rowid);
|
||||
++i;
|
||||
}
|
||||
@@ -167,11 +171,10 @@ class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, flo
|
||||
ASSERT_TRUE(Xy->SingleColBlock());
|
||||
bst_bin_t constexpr kBins{17};
|
||||
auto p = BatchParam{kBins, threshold};
|
||||
Context gpu_ctx;
|
||||
gpu_ctx.gpu_id = 0;
|
||||
auto gpu_ctx = MakeCUDACtx(0);
|
||||
for (auto const &page : Xy->GetBatches<EllpackPage>(
|
||||
&gpu_ctx, BatchParam{kBins, tree::TrainParam::DftSparseThreshold()})) {
|
||||
from_ellpack.reset(new GHistIndexMatrix{&ctx, Xy->Info(), page, p});
|
||||
from_ellpack = std::make_unique<GHistIndexMatrix>(&ctx, Xy->Info(), page, p);
|
||||
}
|
||||
|
||||
for (auto const &from_sparse_page : Xy->GetBatches<GHistIndexMatrix>(&ctx, p)) {
|
||||
@@ -199,13 +202,15 @@ class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, flo
|
||||
|
||||
std::string from_sparse_buf;
|
||||
{
|
||||
common::MemoryBufferStream fo{&from_sparse_buf};
|
||||
columns_from_sparse.Write(&fo);
|
||||
common::AlignedMemWriteStream fo{&from_sparse_buf};
|
||||
auto n_bytes = columns_from_sparse.Write(&fo);
|
||||
ASSERT_EQ(fo.Tell(), n_bytes);
|
||||
}
|
||||
std::string from_ellpack_buf;
|
||||
{
|
||||
common::MemoryBufferStream fo{&from_ellpack_buf};
|
||||
columns_from_sparse.Write(&fo);
|
||||
common::AlignedMemWriteStream fo{&from_ellpack_buf};
|
||||
auto n_bytes = columns_from_sparse.Write(&fo);
|
||||
ASSERT_EQ(fo.Tell(), n_bytes);
|
||||
}
|
||||
ASSERT_EQ(from_sparse_buf, from_ellpack_buf);
|
||||
}
|
||||
@@ -228,6 +233,5 @@ INSTANTIATE_TEST_SUITE_P(GHistIndexMatrix, GHistIndexMatrixTest,
|
||||
std::make_tuple(.5f, .6), // sparse columns
|
||||
std::make_tuple(.6f, .4))); // dense columns
|
||||
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -2,14 +2,18 @@
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h> // for Context
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
#include <memory> // for unique_ptr
|
||||
|
||||
#include "../../../src/common/column_matrix.h"
|
||||
#include "../../../src/data/gradient_index.h"
|
||||
#include "../../../src/common/io.h" // for MmapResource, AlignedResourceReadStream...
|
||||
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../src/data/sparse_page_source.h"
|
||||
#include "../helpers.h"
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
namespace xgboost::data {
|
||||
TEST(GHistIndexPageRawFormat, IO) {
|
||||
Context ctx;
|
||||
|
||||
@@ -20,15 +24,18 @@ TEST(GHistIndexPageRawFormat, IO) {
|
||||
std::string path = tmpdir.path + "/ghistindex.page";
|
||||
auto batch = BatchParam{256, 0.5};
|
||||
|
||||
std::size_t bytes{0};
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
|
||||
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
for (auto const &index : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
format->Write(index, fo.get());
|
||||
bytes += format->Write(index, fo.get());
|
||||
}
|
||||
}
|
||||
|
||||
GHistIndexMatrix page;
|
||||
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
|
||||
|
||||
std::unique_ptr<common::AlignedResourceReadStream> fi{
|
||||
std::make_unique<common::PrivateMmapConstStream>(path, 0, bytes)};
|
||||
format->Read(&page, fi.get());
|
||||
|
||||
for (auto const &gidx : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
@@ -37,6 +44,8 @@ TEST(GHistIndexPageRawFormat, IO) {
|
||||
ASSERT_EQ(loaded.cut.MinValues(), page.cut.MinValues());
|
||||
ASSERT_EQ(loaded.cut.Values(), page.cut.Values());
|
||||
ASSERT_EQ(loaded.base_rowid, page.base_rowid);
|
||||
ASSERT_EQ(loaded.row_ptr.size(), page.row_ptr.size());
|
||||
ASSERT_TRUE(std::equal(loaded.row_ptr.cbegin(), loaded.row_ptr.cend(), page.row_ptr.cbegin()));
|
||||
ASSERT_EQ(loaded.IsDense(), page.IsDense());
|
||||
ASSERT_TRUE(std::equal(loaded.index.begin(), loaded.index.end(), page.index.begin()));
|
||||
ASSERT_TRUE(std::equal(loaded.index.Offset(), loaded.index.Offset() + loaded.index.OffsetSize(),
|
||||
@@ -45,5 +54,4 @@ TEST(GHistIndexPageRawFormat, IO) {
|
||||
ASSERT_EQ(loaded.Transpose().GetTypeSize(), loaded.Transpose().GetTypeSize());
|
||||
}
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -12,8 +12,7 @@
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/data.h" // DMatrix
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
namespace xgboost::data {
|
||||
TEST(IterativeDMatrix, Ref) {
|
||||
Context ctx;
|
||||
TestRefDMatrix<GHistIndexMatrix, NumpyArrayIterForTest>(
|
||||
@@ -21,7 +20,7 @@ TEST(IterativeDMatrix, Ref) {
|
||||
}
|
||||
|
||||
TEST(IterativeDMatrix, IsDense) {
|
||||
int n_bins = 16;
|
||||
bst_bin_t n_bins = 16;
|
||||
auto test = [n_bins](float sparsity) {
|
||||
NumpyArrayIterForTest iter(sparsity);
|
||||
auto n_threads = 0;
|
||||
@@ -38,5 +37,4 @@ TEST(IterativeDMatrix, IsDense) {
|
||||
test(0.1);
|
||||
test(1.0);
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "../../../src/data/device_adapter.hip.h"
|
||||
#include "../../../src/data/ellpack_page.hip.h"
|
||||
#endif
|
||||
#include "../../../src/data/ellpack_page.h"
|
||||
#include "../../../src/data/iterative_dmatrix.h"
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../helpers.h"
|
||||
|
||||
@@ -47,11 +47,7 @@ std::string PrepareData(std::string typestr, thrust::device_vector<T>* out, cons
|
||||
}
|
||||
|
||||
TEST(MetaInfo, FromInterface) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
cudaSetDevice(0);
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
hipSetDevice(0);
|
||||
#endif
|
||||
|
||||
Context ctx;
|
||||
thrust::device_vector<float> d_data;
|
||||
@@ -96,11 +92,7 @@ TEST(MetaInfo, GPUStridedData) {
|
||||
}
|
||||
|
||||
TEST(MetaInfo, Group) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
cudaSetDevice(0);
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
hipSetDevice(0);
|
||||
#endif
|
||||
|
||||
MetaInfo info;
|
||||
Context ctx;
|
||||
@@ -155,11 +147,7 @@ TEST(MetaInfo, GPUQid) {
|
||||
|
||||
|
||||
TEST(MetaInfo, DeviceExtend) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
|
||||
size_t const kRows = 100;
|
||||
MetaInfo lhs, rhs;
|
||||
|
||||
@@ -115,13 +115,8 @@ TEST(SimpleDMatrix, FromColumnarWithEmptyRows) {
|
||||
data.resize(kRows);
|
||||
thrust::sequence(data.begin(), data.end(), 0);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipDeviceSynchronize());
|
||||
dh::safe_cuda(hipGetLastError());
|
||||
#endif
|
||||
|
||||
ASSERT_EQ(data.size(), kRows);
|
||||
|
||||
|
||||
@@ -76,9 +76,11 @@ TEST(SparsePageDMatrix, LoadFile) {
|
||||
// allow caller to retain pages so they can process multiple pages at the same time.
|
||||
template <typename Page>
|
||||
void TestRetainPage() {
|
||||
auto m = CreateSparsePageDMatrix(10000);
|
||||
std::size_t n_batches = 4;
|
||||
auto p_fmat = RandomDataGenerator{1024, 128, 0.5f}.Batches(n_batches).GenerateSparsePageDMatrix(
|
||||
"cache", true);
|
||||
Context ctx;
|
||||
auto batches = m->GetBatches<Page>(&ctx);
|
||||
auto batches = p_fmat->GetBatches<Page>(&ctx);
|
||||
auto begin = batches.begin();
|
||||
auto end = batches.end();
|
||||
|
||||
@@ -94,7 +96,7 @@ void TestRetainPage() {
|
||||
}
|
||||
ASSERT_EQ(pages.back().Size(), (*it).Size());
|
||||
}
|
||||
ASSERT_GE(iterators.size(), 2);
|
||||
ASSERT_GE(iterators.size(), n_batches);
|
||||
|
||||
for (size_t i = 0; i < iterators.size(); ++i) {
|
||||
ASSERT_EQ((*iterators[i]).Size(), pages.at(i).Size());
|
||||
@@ -102,7 +104,7 @@ void TestRetainPage() {
|
||||
}
|
||||
|
||||
// make sure it's const and the caller can not modify the content of page.
|
||||
for (auto &page : m->GetBatches<Page>({&ctx})) {
|
||||
for (auto &page : p_fmat->GetBatches<Page>({&ctx})) {
|
||||
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
#include "../../../src/data/ellpack_page.hip.h"
|
||||
#endif
|
||||
#include "../../../src/data/ellpack_page.h"
|
||||
#include "../../../src/data/sparse_page_dmatrix.h"
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
|
||||
@@ -2,20 +2,20 @@
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h> // for CSCPage, SortedCSCPage, SparsePage
|
||||
#include <xgboost/data.h> // for CSCPage, SortedCSCPage, SparsePage
|
||||
|
||||
#include <memory> // for allocator, unique_ptr, __shared_ptr_ac...
|
||||
#include <string> // for char_traits, operator+, basic_string
|
||||
#include <memory> // for allocator, unique_ptr, __shared_ptr_ac...
|
||||
#include <string> // for char_traits, operator+, basic_string
|
||||
|
||||
#include "../../../src/common/io.h" // for PrivateMmapConstStream, AlignedResourceReadStream...
|
||||
#include "../../../src/data/sparse_page_writer.h" // for CreatePageFormat
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
#include "dmlc/filesystem.h" // for TemporaryDirectory
|
||||
#include "dmlc/io.h" // for SeekStream, Stream
|
||||
#include "dmlc/io.h" // for Stream
|
||||
#include "gtest/gtest_pred_impl.h" // for Test, AssertionResult, ASSERT_EQ, TEST
|
||||
#include "xgboost/context.h" // for Context
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
namespace xgboost::data {
|
||||
template <typename S> void TestSparsePageRawFormat() {
|
||||
std::unique_ptr<SparsePageFormat<S>> format{CreatePageFormat<S>("raw")};
|
||||
Context ctx;
|
||||
@@ -25,17 +25,19 @@ template <typename S> void TestSparsePageRawFormat() {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string path = tmpdir.path + "/sparse.page";
|
||||
S orig;
|
||||
std::size_t n_bytes{0};
|
||||
{
|
||||
// block code to flush the stream
|
||||
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
|
||||
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
for (auto const &page : m->GetBatches<S>(&ctx)) {
|
||||
orig.Push(page);
|
||||
format->Write(page, fo.get());
|
||||
n_bytes = format->Write(page, fo.get());
|
||||
}
|
||||
}
|
||||
|
||||
S page;
|
||||
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
|
||||
std::unique_ptr<common::AlignedResourceReadStream> fi{
|
||||
std::make_unique<common::PrivateMmapConstStream>(path.c_str(), 0, n_bytes)};
|
||||
format->Read(&page, fi.get());
|
||||
for (size_t i = 0; i < orig.data.Size(); ++i) {
|
||||
ASSERT_EQ(page.data.HostVector()[i].fvalue,
|
||||
@@ -59,5 +61,4 @@ TEST(SparsePageRawFormat, CSCPage) {
|
||||
TEST(SparsePageRawFormat, SortedCSCPage) {
|
||||
TestSparsePageRawFormat<SortedCSCPage>();
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
42
tests/cpp/gbm/test_gblinear.cu
Normal file
42
tests/cpp/gbm/test_gblinear.cu
Normal file
@@ -0,0 +1,42 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/global_config.h> // for GlobalConfigThreadLocalStore
|
||||
#include <xgboost/json.h> // for Json, Object
|
||||
#include <xgboost/learner.h> // for Learner
|
||||
|
||||
#include <algorithm> // for transform
|
||||
#include <string> // for string
|
||||
#include <utility> // for swap
|
||||
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
|
||||
namespace xgboost {
|
||||
TEST(GBlinear, DispatchUpdater) {
|
||||
auto verbosity = 3;
|
||||
std::swap(GlobalConfigThreadLocalStore::Get()->verbosity, verbosity);
|
||||
|
||||
auto test = [](std::string device) {
|
||||
auto p_fmat = RandomDataGenerator{10, 10, 0.0f}.GenerateDMatrix(true);
|
||||
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||
learner->SetParams(
|
||||
Args{{"booster", "gblinear"}, {"updater", "coord_descent"}, {"device", device}});
|
||||
learner->Configure();
|
||||
for (std::int32_t iter = 0; iter < 3; ++iter) {
|
||||
learner->UpdateOneIter(iter, p_fmat);
|
||||
}
|
||||
Json config{Object{}};
|
||||
::testing::internal::CaptureStderr();
|
||||
learner->SaveConfig(&config);
|
||||
auto str = ::testing::internal::GetCapturedStderr();
|
||||
std::transform(device.cbegin(), device.cend(), device.begin(),
|
||||
[](char c) { return std::toupper(c); });
|
||||
ASSERT_NE(str.find(device), std::string::npos);
|
||||
};
|
||||
test("cpu");
|
||||
test("gpu");
|
||||
|
||||
std::swap(GlobalConfigThreadLocalStore::Get()->verbosity, verbosity);
|
||||
}
|
||||
} // namespace xgboost
|
||||
@@ -1,17 +1,22 @@
|
||||
/*!
|
||||
* Copyright 2019-2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/host_device_vector.h> // for HostDeviceVector
|
||||
#include <xgboost/json.h> // for Json, Object
|
||||
#include <xgboost/learner.h> // for Learner
|
||||
|
||||
#include "../../../src/data/adapter.h"
|
||||
#include "../../../src/data/proxy_dmatrix.h"
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for shared_ptr
|
||||
#include <optional> // for optional
|
||||
#include <string> // for string
|
||||
|
||||
#include "../../../src/data/proxy_dmatrix.h" // for DMatrixProxy
|
||||
#include "../../../src/gbm/gbtree.h"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/host_device_vector.h"
|
||||
#include "xgboost/learner.h"
|
||||
#include "xgboost/predictor.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -113,12 +118,11 @@ TEST(GBTree, WrongUpdater) {
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
TEST(GBTree, ChoosePredictor) {
|
||||
// The test ensures data don't get pulled into device.
|
||||
size_t constexpr kRows = 17;
|
||||
size_t constexpr kCols = 15;
|
||||
std::size_t constexpr kRows = 17, kCols = 15;
|
||||
|
||||
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
|
||||
auto& data = (*(p_dmat->GetBatches<SparsePage>().begin())).data;
|
||||
auto const& data = (*(p_dmat->GetBatches<SparsePage>().begin())).data;
|
||||
p_dmat->Info().labels.Reshape(kRows);
|
||||
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
|
||||
@@ -127,14 +131,13 @@ TEST(GBTree, ChoosePredictor) {
|
||||
learner->UpdateOneIter(i, p_dmat);
|
||||
}
|
||||
ASSERT_TRUE(data.HostCanWrite());
|
||||
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string fname = tempdir.path + "/model_param.bst";
|
||||
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
|
||||
learner->Save(fo.get());
|
||||
}
|
||||
|
||||
// a new learner
|
||||
learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
|
||||
{
|
||||
@@ -146,6 +149,8 @@ TEST(GBTree, ChoosePredictor) {
|
||||
learner->UpdateOneIter(i, p_dmat);
|
||||
}
|
||||
ASSERT_TRUE(data.HostCanWrite());
|
||||
ASSERT_FALSE(data.DeviceCanWrite());
|
||||
ASSERT_FALSE(data.DeviceCanRead());
|
||||
|
||||
// pull data into device.
|
||||
data.HostVector();
|
||||
@@ -162,7 +167,139 @@ TEST(GBTree, ChoosePredictor) {
|
||||
// data is not pulled back into host
|
||||
ASSERT_FALSE(data.HostCanWrite());
|
||||
}
|
||||
#endif // XGBOOST_USE_CUDA || XGBOOST_USE_HIP
|
||||
|
||||
TEST(GBTree, ChooseTreeMethod) {
|
||||
bst_row_t n_samples{128};
|
||||
bst_feature_t n_features{64};
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0.5f}.GenerateDMatrix(true);
|
||||
|
||||
auto with_update = [&](std::optional<std::string> device,
|
||||
std::optional<std::string> tree_method) {
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create({Xy}));
|
||||
if (tree_method.has_value()) {
|
||||
learner->SetParam("tree_method", tree_method.value());
|
||||
}
|
||||
if (device.has_value()) {
|
||||
auto const& d = device.value();
|
||||
if (std::isdigit(d.front()) || d.front() == '-') {
|
||||
learner->SetParam("gpu_id", d);
|
||||
} else {
|
||||
learner->SetParam("device", d);
|
||||
}
|
||||
}
|
||||
learner->Configure();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
learner->UpdateOneIter(0, Xy);
|
||||
}
|
||||
Json config{Object{}};
|
||||
learner->SaveConfig(&config);
|
||||
auto updater = config["learner"]["gradient_booster"]["updater"];
|
||||
CHECK(!IsA<Null>(updater));
|
||||
return updater;
|
||||
};
|
||||
|
||||
auto with_boost = [&](std::optional<std::string> device, std::optional<std::string> tree_method) {
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create({Xy}));
|
||||
if (tree_method.has_value()) {
|
||||
learner->SetParam("tree_method", tree_method.value());
|
||||
}
|
||||
if (device.has_value()) {
|
||||
auto const& d = device.value();
|
||||
if (std::isdigit(d.front()) || d.front() == '-') {
|
||||
learner->SetParam("gpu_id", d);
|
||||
} else {
|
||||
learner->SetParam("device", d);
|
||||
}
|
||||
}
|
||||
learner->Configure();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
HostDeviceVector<GradientPair> gpair{GenerateRandomGradients(Xy->Info().num_row_)};
|
||||
learner->BoostOneIter(0, Xy, &gpair);
|
||||
}
|
||||
|
||||
Json config{Object{}};
|
||||
learner->SaveConfig(&config);
|
||||
auto updater = config["learner"]["gradient_booster"]["updater"];
|
||||
return updater;
|
||||
};
|
||||
|
||||
// | | hist | gpu_hist | exact | NA |
|
||||
// |--------+---------+----------+-------+-----|
|
||||
// | CUDA:0 | GPU | GPU (w) | Err | GPU |
|
||||
// | CPU | CPU | GPU (w) | CPU | CPU |
|
||||
// |--------+---------+----------+-------+-----|
|
||||
// | -1 | CPU | GPU (w) | CPU | CPU |
|
||||
// | 0 | GPU | GPU (w) | Err | GPU |
|
||||
// |--------+---------+----------+-------+-----|
|
||||
// | NA | CPU | GPU (w) | CPU | CPU |
|
||||
//
|
||||
// - (w): warning
|
||||
// - CPU: Run on CPU.
|
||||
// - GPU: Run on CUDA.
|
||||
// - Err: Not feasible.
|
||||
// - NA: Parameter is not specified.
|
||||
|
||||
// When GPU hist is specified with a CPU context, we should emit an error. However, it's
|
||||
// quite difficult to detect whether the CPU context is being used because it's the
|
||||
// default or because it's specified by the user.
|
||||
|
||||
std::map<std::pair<std::optional<std::string>, std::optional<std::string>>, std::string>
|
||||
expectation{
|
||||
// hist
|
||||
{{"hist", "-1"}, "grow_quantile_histmaker"},
|
||||
{{"hist", "0"}, "grow_gpu_hist"},
|
||||
{{"hist", "cpu"}, "grow_quantile_histmaker"},
|
||||
{{"hist", "cuda"}, "grow_gpu_hist"},
|
||||
{{"hist", "cuda:0"}, "grow_gpu_hist"},
|
||||
{{"hist", std::nullopt}, "grow_quantile_histmaker"},
|
||||
// gpu_hist
|
||||
{{"gpu_hist", "-1"}, "grow_gpu_hist"},
|
||||
{{"gpu_hist", "0"}, "grow_gpu_hist"},
|
||||
{{"gpu_hist", "cpu"}, "grow_gpu_hist"},
|
||||
{{"gpu_hist", "cuda"}, "grow_gpu_hist"},
|
||||
{{"gpu_hist", "cuda:0"}, "grow_gpu_hist"},
|
||||
{{"gpu_hist", std::nullopt}, "grow_gpu_hist"},
|
||||
// exact
|
||||
{{"exact", "-1"}, "grow_colmaker,prune"},
|
||||
{{"exact", "0"}, "err"},
|
||||
{{"exact", "cpu"}, "grow_colmaker,prune"},
|
||||
{{"exact", "cuda"}, "err"},
|
||||
{{"exact", "cuda:0"}, "err"},
|
||||
{{"exact", std::nullopt}, "grow_colmaker,prune"},
|
||||
// NA
|
||||
{{std::nullopt, "-1"}, "grow_quantile_histmaker"},
|
||||
{{std::nullopt, "0"}, "grow_gpu_hist"}, // default to hist
|
||||
{{std::nullopt, "cpu"}, "grow_quantile_histmaker"},
|
||||
{{std::nullopt, "cuda"}, "grow_gpu_hist"},
|
||||
{{std::nullopt, "cuda:0"}, "grow_gpu_hist"},
|
||||
{{std::nullopt, std::nullopt}, "grow_quantile_histmaker"},
|
||||
};
|
||||
|
||||
auto run_test = [&](auto fn) {
|
||||
for (auto const& kv : expectation) {
|
||||
auto device = kv.first.second;
|
||||
auto tm = kv.first.first;
|
||||
|
||||
if (kv.second == "err") {
|
||||
ASSERT_THROW({ fn(device, tm); }, dmlc::Error)
|
||||
<< " device:" << device.value_or("NA") << " tm:" << tm.value_or("NA");
|
||||
continue;
|
||||
}
|
||||
auto up = fn(device, tm);
|
||||
auto ups = get<Array const>(up);
|
||||
auto exp_names = common::Split(kv.second, ',');
|
||||
ASSERT_EQ(exp_names.size(), ups.size());
|
||||
for (std::size_t i = 0; i < exp_names.size(); ++i) {
|
||||
ASSERT_EQ(get<String const>(ups[i]["name"]), exp_names[i])
|
||||
<< " device:" << device.value_or("NA") << " tm:" << tm.value_or("NA");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
run_test(with_update);
|
||||
run_test(with_boost);
|
||||
}
|
||||
#endif // XGBOOST_USE_CUDA
|
||||
|
||||
// Some other parts of test are in `Tree.JsonIO'.
|
||||
TEST(GBTree, JsonIO) {
|
||||
@@ -171,32 +308,52 @@ TEST(GBTree, JsonIO) {
|
||||
Context ctx;
|
||||
LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
|
||||
|
||||
std::unique_ptr<GradientBooster> gbm {
|
||||
CreateTrainedGBM("gbtree", Args{}, kRows, kCols, &mparam, &ctx) };
|
||||
std::unique_ptr<GradientBooster> gbm{
|
||||
CreateTrainedGBM("gbtree", Args{{"tree_method", "exact"}, {"default_direction", "left"}},
|
||||
kRows, kCols, &mparam, &ctx)};
|
||||
|
||||
Json model {Object()};
|
||||
Json model{Object()};
|
||||
model["model"] = Object();
|
||||
auto& j_model = model["model"];
|
||||
auto j_model = model["model"];
|
||||
|
||||
model["config"] = Object();
|
||||
auto& j_param = model["config"];
|
||||
auto j_config = model["config"];
|
||||
|
||||
gbm->SaveModel(&j_model);
|
||||
gbm->SaveConfig(&j_param);
|
||||
gbm->SaveConfig(&j_config);
|
||||
|
||||
std::string model_str;
|
||||
Json::Dump(model, &model_str);
|
||||
|
||||
model = Json::Load({model_str.c_str(), model_str.size()});
|
||||
ASSERT_EQ(get<String>(model["model"]["name"]), "gbtree");
|
||||
j_model = model["model"];
|
||||
j_config = model["config"];
|
||||
ASSERT_EQ(get<String>(j_model["name"]), "gbtree");
|
||||
|
||||
auto const& gbtree_model = model["model"]["model"];
|
||||
auto gbtree_model = j_model["model"];
|
||||
ASSERT_EQ(get<Array>(gbtree_model["trees"]).size(), 1ul);
|
||||
ASSERT_EQ(get<Integer>(get<Object>(get<Array>(gbtree_model["trees"]).front()).at("id")), 0);
|
||||
ASSERT_EQ(get<Array>(gbtree_model["tree_info"]).size(), 1ul);
|
||||
|
||||
auto j_train_param = model["config"]["gbtree_model_param"];
|
||||
auto j_train_param = j_config["gbtree_model_param"];
|
||||
ASSERT_EQ(get<String>(j_train_param["num_parallel_tree"]), "1");
|
||||
|
||||
auto check_config = [](Json j_up_config) {
|
||||
auto colmaker = get<Array const>(j_up_config).front();
|
||||
auto pruner = get<Array const>(j_up_config).back();
|
||||
ASSERT_EQ(get<String const>(colmaker["name"]), "grow_colmaker");
|
||||
ASSERT_EQ(get<String const>(pruner["name"]), "prune");
|
||||
ASSERT_EQ(get<String const>(colmaker["colmaker_train_param"]["default_direction"]), "left");
|
||||
};
|
||||
check_config(j_config["updater"]);
|
||||
|
||||
std::unique_ptr<GradientBooster> loaded(gbm::GBTree::Create("gbtree", &ctx, &mparam));
|
||||
loaded->LoadModel(j_model);
|
||||
loaded->LoadConfig(j_config);
|
||||
|
||||
// roundtrip test
|
||||
Json j_config_rt{Object{}};
|
||||
loaded->SaveConfig(&j_config_rt);
|
||||
check_config(j_config_rt["updater"]);
|
||||
}
|
||||
|
||||
TEST(Dart, JsonIO) {
|
||||
@@ -232,14 +389,15 @@ TEST(Dart, JsonIO) {
|
||||
namespace {
|
||||
class Dart : public testing::TestWithParam<char const*> {
|
||||
public:
|
||||
void Run(std::string predictor) {
|
||||
void Run(std::string device) {
|
||||
size_t constexpr kRows = 16, kCols = 10;
|
||||
|
||||
HostDeviceVector<float> data;
|
||||
auto rng = RandomDataGenerator(kRows, kCols, 0);
|
||||
if (predictor == "gpu_predictor") {
|
||||
rng.Device(0);
|
||||
Context ctx;
|
||||
if (device == "GPU") {
|
||||
ctx = MakeCUDACtx(0);
|
||||
}
|
||||
auto rng = RandomDataGenerator(kRows, kCols, 0).Device(ctx.gpu_id);
|
||||
auto array_str = rng.GenerateArrayInterface(&data);
|
||||
auto p_mat = GetDMatrixFromData(data.HostVector(), kRows, kCols);
|
||||
|
||||
@@ -257,15 +415,14 @@ class Dart : public testing::TestWithParam<char const*> {
|
||||
for (size_t i = 0; i < 16; ++i) {
|
||||
learner->UpdateOneIter(i, p_mat);
|
||||
}
|
||||
|
||||
learner->SetParam("predictor", predictor);
|
||||
learner->SetParam("device", ctx.DeviceName());
|
||||
|
||||
HostDeviceVector<float> predts_training;
|
||||
learner->Predict(p_mat, false, &predts_training, 0, 0, true);
|
||||
|
||||
HostDeviceVector<float>* inplace_predts;
|
||||
std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy{}};
|
||||
if (predictor == "gpu_predictor") {
|
||||
if (ctx.IsCUDA()) {
|
||||
x->SetCUDAArray(array_str.c_str());
|
||||
} else {
|
||||
x->SetArrayData(array_str.c_str());
|
||||
@@ -295,11 +452,10 @@ class Dart : public testing::TestWithParam<char const*> {
|
||||
TEST_P(Dart, Prediction) { this->Run(GetParam()); }
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
INSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart,
|
||||
testing::Values("auto", "cpu_predictor", "gpu_predictor"));
|
||||
INSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart, testing::Values("CPU", "GPU"));
|
||||
#else
|
||||
INSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart, testing::Values("auto", "cpu_predictor"));
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
INSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart, testing::Values("CPU"));
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
|
||||
std::pair<Json, Json> TestModelSlice(std::string booster) {
|
||||
@@ -511,4 +667,85 @@ TEST(GBTree, PredictRange) {
|
||||
dmlc::Error);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GBTree, InplacePredictionError) {
|
||||
std::size_t n_samples{2048}, n_features{32};
|
||||
|
||||
auto test_ext_err = [&](std::string booster, Context const* ctx) {
|
||||
std::shared_ptr<DMatrix> p_fmat =
|
||||
RandomDataGenerator{n_samples, n_features, 0.5f}.Batches(2).GenerateSparsePageDMatrix(
|
||||
"cache", true);
|
||||
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||
learner->SetParams(Args{{"booster", booster}, {"device", ctx->DeviceName()}});
|
||||
learner->Configure();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
learner->UpdateOneIter(i, p_fmat);
|
||||
}
|
||||
HostDeviceVector<float>* out_predt;
|
||||
ASSERT_THROW(
|
||||
{
|
||||
learner->InplacePredict(p_fmat, PredictionType::kValue,
|
||||
std::numeric_limits<float>::quiet_NaN(), &out_predt, 0, 0);
|
||||
},
|
||||
dmlc::Error);
|
||||
};
|
||||
|
||||
{
|
||||
Context ctx;
|
||||
test_ext_err("gbtree", &ctx);
|
||||
test_ext_err("dart", &ctx);
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
{
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
test_ext_err("gbtree", &ctx);
|
||||
test_ext_err("dart", &ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
auto test_qdm_err = [&](std::string booster, Context const* ctx) {
|
||||
std::shared_ptr<DMatrix> p_fmat;
|
||||
bst_bin_t max_bins = 16;
|
||||
auto rng = RandomDataGenerator{n_samples, n_features, 0.5f}.Device(ctx->gpu_id).Bins(max_bins);
|
||||
if (ctx->IsCPU()) {
|
||||
p_fmat = rng.GenerateQuantileDMatrix(true);
|
||||
} else {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
p_fmat = rng.GenerateDeviceDMatrix(true);
|
||||
#else
|
||||
CHECK(p_fmat);
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
};
|
||||
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||
learner->SetParams(Args{{"booster", booster},
|
||||
{"max_bin", std::to_string(max_bins)},
|
||||
{"device", ctx->DeviceName()}});
|
||||
learner->Configure();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
learner->UpdateOneIter(i, p_fmat);
|
||||
}
|
||||
HostDeviceVector<float>* out_predt;
|
||||
ASSERT_THROW(
|
||||
{
|
||||
learner->InplacePredict(p_fmat, PredictionType::kValue,
|
||||
std::numeric_limits<float>::quiet_NaN(), &out_predt, 0, 0);
|
||||
},
|
||||
dmlc::Error);
|
||||
};
|
||||
|
||||
{
|
||||
Context ctx;
|
||||
test_qdm_err("gbtree", &ctx);
|
||||
test_qdm_err("dart", &ctx);
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
{
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
test_qdm_err("gbtree", &ctx);
|
||||
test_qdm_err("dart", &ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
86
tests/cpp/gbm/test_gbtree.cu
Normal file
86
tests/cpp/gbm/test_gbtree.cu
Normal file
@@ -0,0 +1,86 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost contributors
|
||||
*/
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/learner.h> // for Learner
|
||||
#include <xgboost/string_view.h> // for StringView
|
||||
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for shared_ptr
|
||||
#include <string> // for string
|
||||
#include <thread> // for thread
|
||||
|
||||
#include "../../../src/data/adapter.h" // for ArrayAdapter
|
||||
#include "../../../src/data/device_adapter.cuh" // for CupyAdapter
|
||||
#include "../../../src/data/proxy_dmatrix.h" // for DMatrixProxy
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
|
||||
namespace xgboost {
|
||||
void TestInplaceFallback(Context const* ctx) {
|
||||
// prepare data
|
||||
bst_row_t n_samples{1024};
|
||||
bst_feature_t n_features{32};
|
||||
HostDeviceVector<float> X_storage;
|
||||
// use a different device than the learner
|
||||
std::int32_t data_ordinal = ctx->IsCPU() ? 0 : -1;
|
||||
auto X = RandomDataGenerator{n_samples, n_features, 0.0}
|
||||
.Device(data_ordinal)
|
||||
.GenerateArrayInterface(&X_storage);
|
||||
HostDeviceVector<float> y_storage;
|
||||
auto y = RandomDataGenerator{n_samples, 1u, 0.0}.GenerateArrayInterface(&y_storage);
|
||||
|
||||
std::shared_ptr<DMatrix> Xy;
|
||||
if (data_ordinal == Context::kCpuId) {
|
||||
auto X_adapter = data::ArrayAdapter{StringView{X}};
|
||||
Xy.reset(DMatrix::Create(&X_adapter, std::numeric_limits<float>::quiet_NaN(), ctx->Threads()));
|
||||
} else {
|
||||
auto X_adapter = data::CupyAdapter{StringView{X}};
|
||||
Xy.reset(DMatrix::Create(&X_adapter, std::numeric_limits<float>::quiet_NaN(), ctx->Threads()));
|
||||
}
|
||||
|
||||
Xy->SetInfo("label", y);
|
||||
|
||||
// learner is configured to the device specified by ctx
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
learner->UpdateOneIter(i, Xy);
|
||||
}
|
||||
|
||||
std::shared_ptr<DMatrix> p_m{new data::DMatrixProxy};
|
||||
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
|
||||
if (data_ordinal == Context::kCpuId) {
|
||||
proxy->SetArrayData(StringView{X});
|
||||
} else {
|
||||
proxy->SetCUDAArray(X.c_str());
|
||||
}
|
||||
|
||||
HostDeviceVector<float>* out_predt{nullptr};
|
||||
ConsoleLogger::Configure(Args{{"verbosity", "1"}});
|
||||
std::string output;
|
||||
|
||||
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
|
||||
&out_predt, 0, 0);
|
||||
|
||||
// test when the contexts match
|
||||
Context new_ctx = *proxy->Ctx();
|
||||
ASSERT_NE(new_ctx.gpu_id, ctx->gpu_id);
|
||||
|
||||
learner->SetParam("device", new_ctx.DeviceName());
|
||||
HostDeviceVector<float>* out_predt_1{nullptr};
|
||||
// no warning is raised
|
||||
::testing::internal::CaptureStderr();
|
||||
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
|
||||
&out_predt_1, 0, 0);
|
||||
output = testing::internal::GetCapturedStderr();
|
||||
|
||||
ASSERT_TRUE(output.empty());
|
||||
|
||||
ASSERT_EQ(out_predt->ConstHostVector(), out_predt_1->ConstHostVector());
|
||||
}
|
||||
|
||||
TEST(GBTree, InplacePredictFallback) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestInplaceFallback(&ctx);
|
||||
}
|
||||
} // namespace xgboost
|
||||
@@ -210,6 +210,16 @@ SimpleLCG::StateType SimpleLCG::Max() const { return max(); }
|
||||
// Make sure it's compile time constant.
|
||||
static_assert(SimpleLCG::max() - SimpleLCG::min());
|
||||
|
||||
void RandomDataGenerator::GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const {
|
||||
RandomDataGenerator{p_fmat->Info().num_row_, this->n_targets_, 0.0f}.GenerateDense(
|
||||
p_fmat->Info().labels.Data());
|
||||
CHECK_EQ(p_fmat->Info().labels.Size(), this->rows_ * this->n_targets_);
|
||||
p_fmat->Info().labels.Reshape(this->rows_, this->n_targets_);
|
||||
if (device_ != Context::kCpuId) {
|
||||
p_fmat->Info().labels.SetDevice(device_);
|
||||
}
|
||||
}
|
||||
|
||||
void RandomDataGenerator::GenerateDense(HostDeviceVector<float> *out) const {
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(lower_, upper_);
|
||||
CHECK(out);
|
||||
@@ -363,8 +373,9 @@ void RandomDataGenerator::GenerateCSR(
|
||||
CHECK_EQ(columns->Size(), value->Size());
|
||||
}
|
||||
|
||||
std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDMatrix(bool with_label, bool float_label,
|
||||
size_t classes) const {
|
||||
[[nodiscard]] std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDMatrix(bool with_label,
|
||||
bool float_label,
|
||||
size_t classes) const {
|
||||
HostDeviceVector<float> data;
|
||||
HostDeviceVector<bst_row_t> rptrs;
|
||||
HostDeviceVector<bst_feature_t> columns;
|
||||
@@ -395,6 +406,9 @@ std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDMatrix(bool with_label, b
|
||||
for (auto const& page : out->GetBatches<SparsePage>()) {
|
||||
page.data.SetDevice(device_);
|
||||
page.offset.SetDevice(device_);
|
||||
// pull to device
|
||||
page.data.ConstDeviceSpan();
|
||||
page.offset.ConstDeviceSpan();
|
||||
}
|
||||
}
|
||||
if (!ft_.empty()) {
|
||||
@@ -403,13 +417,73 @@ std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDMatrix(bool with_label, b
|
||||
return out;
|
||||
}
|
||||
|
||||
std::shared_ptr<DMatrix> RandomDataGenerator::GenerateQuantileDMatrix() {
|
||||
[[nodiscard]] std::shared_ptr<DMatrix> RandomDataGenerator::GenerateSparsePageDMatrix(
|
||||
std::string prefix, bool with_label) const {
|
||||
CHECK_GE(this->rows_, this->n_batches_);
|
||||
CHECK_GE(this->n_batches_, 1)
|
||||
<< "Must set the n_batches before generating an external memory DMatrix.";
|
||||
std::unique_ptr<ArrayIterForTest> iter;
|
||||
if (device_ == Context::kCpuId) {
|
||||
iter = std::make_unique<NumpyArrayIterForTest>(this->sparsity_, rows_, cols_, n_batches_);
|
||||
} else {
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
iter = std::make_unique<CudaArrayIterForTest>(this->sparsity_, rows_, cols_, n_batches_);
|
||||
#else
|
||||
CHECK(iter);
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
std::unique_ptr<DMatrix> dmat{
|
||||
DMatrix::Create(static_cast<DataIterHandle>(iter.get()), iter->Proxy(), Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), Context{}.Threads(), prefix)};
|
||||
|
||||
auto row_page_path =
|
||||
data::MakeId(prefix, dynamic_cast<data::SparsePageDMatrix*>(dmat.get())) + ".row.page";
|
||||
EXPECT_TRUE(FileExists(row_page_path)) << row_page_path;
|
||||
|
||||
// Loop over the batches and count the number of pages
|
||||
std::size_t batch_count = 0;
|
||||
bst_row_t row_count = 0;
|
||||
for (const auto& batch : dmat->GetBatches<xgboost::SparsePage>()) {
|
||||
batch_count++;
|
||||
row_count += batch.Size();
|
||||
CHECK_NE(batch.data.Size(), 0);
|
||||
}
|
||||
|
||||
EXPECT_EQ(batch_count, n_batches_);
|
||||
EXPECT_EQ(row_count, dmat->Info().num_row_);
|
||||
|
||||
if (with_label) {
|
||||
RandomDataGenerator{dmat->Info().num_row_, this->n_targets_, 0.0f}.GenerateDense(
|
||||
dmat->Info().labels.Data());
|
||||
CHECK_EQ(dmat->Info().labels.Size(), this->rows_ * this->n_targets_);
|
||||
dmat->Info().labels.Reshape(this->rows_, this->n_targets_);
|
||||
}
|
||||
return dmat;
|
||||
}
|
||||
|
||||
std::shared_ptr<DMatrix> RandomDataGenerator::GenerateQuantileDMatrix(bool with_label) {
|
||||
NumpyArrayIterForTest iter{this->sparsity_, this->rows_, this->cols_, 1};
|
||||
auto m = std::make_shared<data::IterativeDMatrix>(
|
||||
&iter, iter.Proxy(), nullptr, Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, bins_);
|
||||
if (with_label) {
|
||||
this->GenerateLabels(m);
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||
CudaArrayIterForTest::CudaArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches)
|
||||
: ArrayIterForTest{sparsity, rows, cols, batches} {
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
|
||||
int CudaArrayIterForTest::Next() {
|
||||
common::AssertGPUSupport();
|
||||
return 0;
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
NumpyArrayIterForTest::NumpyArrayIterForTest(float sparsity, size_t rows, size_t cols,
|
||||
size_t batches)
|
||||
: ArrayIterForTest{sparsity, rows, cols, batches} {
|
||||
@@ -588,7 +662,7 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
|
||||
ArrayIterForTest::ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches)
|
||||
: rows_{rows}, cols_{cols}, n_batches_{batches} {
|
||||
XGProxyDMatrixCreate(&proxy_);
|
||||
rng_.reset(new RandomDataGenerator{rows_, cols_, sparsity});
|
||||
rng_ = std::make_unique<RandomDataGenerator>(rows_, cols_, sparsity);
|
||||
std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
|
||||
}
|
||||
|
||||
|
||||
@@ -28,10 +28,13 @@ int CudaArrayIterForTest::Next() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDeviceDMatrix() {
|
||||
std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDeviceDMatrix(bool with_label) {
|
||||
CudaArrayIterForTest iter{this->sparsity_, this->rows_, this->cols_, 1};
|
||||
auto m = std::make_shared<data::IterativeDMatrix>(
|
||||
&iter, iter.Proxy(), nullptr, Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, bins_);
|
||||
if (with_label) {
|
||||
this->GenerateLabels(m);
|
||||
}
|
||||
return m;
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -35,9 +35,9 @@
|
||||
#endif
|
||||
|
||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#define GPUIDX 0
|
||||
#define GPUIDX (common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank())
|
||||
#else
|
||||
#define GPUIDX -1
|
||||
#define GPUIDX (-1)
|
||||
#endif
|
||||
|
||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
@@ -46,12 +46,6 @@
|
||||
#define DeclareUnifiedDistributedTest(name) name
|
||||
#endif
|
||||
|
||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#define WORLD_SIZE_FOR_TEST (xgboost::common::AllVisibleGPUs())
|
||||
#else
|
||||
#define WORLD_SIZE_FOR_TEST (3)
|
||||
#endif
|
||||
|
||||
namespace xgboost {
|
||||
class ObjFunction;
|
||||
class Metric;
|
||||
@@ -183,7 +177,7 @@ class SimpleRealUniformDistribution {
|
||||
|
||||
for (size_t k = m; k != 0; --k) {
|
||||
sum_value += static_cast<ResultT>((*rng)() - rng->Min()) * r_k;
|
||||
r_k *= r;
|
||||
r_k *= static_cast<ResultT>(r);
|
||||
}
|
||||
|
||||
ResultT res = sum_value / r_k;
|
||||
@@ -238,15 +232,18 @@ class RandomDataGenerator {
|
||||
bst_target_t n_targets_{1};
|
||||
|
||||
std::int32_t device_{Context::kCpuId};
|
||||
std::size_t n_batches_{0};
|
||||
std::uint64_t seed_{0};
|
||||
SimpleLCG lcg_;
|
||||
|
||||
std::size_t bins_{0};
|
||||
bst_bin_t bins_{0};
|
||||
std::vector<FeatureType> ft_;
|
||||
bst_cat_t max_cat_;
|
||||
|
||||
Json ArrayInterfaceImpl(HostDeviceVector<float>* storage, size_t rows, size_t cols) const;
|
||||
|
||||
void GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const;
|
||||
|
||||
public:
|
||||
RandomDataGenerator(bst_row_t rows, size_t cols, float sparsity)
|
||||
: rows_{rows}, cols_{cols}, sparsity_{sparsity}, lcg_{seed_} {}
|
||||
@@ -263,12 +260,16 @@ class RandomDataGenerator {
|
||||
device_ = d;
|
||||
return *this;
|
||||
}
|
||||
RandomDataGenerator& Batches(std::size_t n_batches) {
|
||||
n_batches_ = n_batches;
|
||||
return *this;
|
||||
}
|
||||
RandomDataGenerator& Seed(uint64_t s) {
|
||||
seed_ = s;
|
||||
lcg_.Seed(seed_);
|
||||
return *this;
|
||||
}
|
||||
RandomDataGenerator& Bins(size_t b) {
|
||||
RandomDataGenerator& Bins(bst_bin_t b) {
|
||||
bins_ = b;
|
||||
return *this;
|
||||
}
|
||||
@@ -309,12 +310,17 @@ class RandomDataGenerator {
|
||||
void GenerateCSR(HostDeviceVector<float>* value, HostDeviceVector<bst_row_t>* row_ptr,
|
||||
HostDeviceVector<bst_feature_t>* columns) const;
|
||||
|
||||
std::shared_ptr<DMatrix> GenerateDMatrix(bool with_label = false, bool float_label = true,
|
||||
size_t classes = 1) const;
|
||||
[[nodiscard]] std::shared_ptr<DMatrix> GenerateDMatrix(bool with_label = false,
|
||||
bool float_label = true,
|
||||
size_t classes = 1) const;
|
||||
|
||||
[[nodiscard]] std::shared_ptr<DMatrix> GenerateSparsePageDMatrix(std::string prefix,
|
||||
bool with_label) const;
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
std::shared_ptr<DMatrix> GenerateDeviceDMatrix();
|
||||
std::shared_ptr<DMatrix> GenerateDeviceDMatrix(bool with_label);
|
||||
#endif
|
||||
std::shared_ptr<DMatrix> GenerateQuantileDMatrix();
|
||||
std::shared_ptr<DMatrix> GenerateQuantileDMatrix(bool with_label);
|
||||
};
|
||||
|
||||
// Generate an empty DMatrix, mostly for its meta info.
|
||||
@@ -322,15 +328,14 @@ inline std::shared_ptr<DMatrix> EmptyDMatrix() {
|
||||
return RandomDataGenerator{0, 0, 0.0}.GenerateDMatrix();
|
||||
}
|
||||
|
||||
inline std::vector<float>
|
||||
GenerateRandomCategoricalSingleColumn(int n, size_t num_categories) {
|
||||
inline std::vector<float> GenerateRandomCategoricalSingleColumn(int n, size_t num_categories) {
|
||||
std::vector<float> x(n);
|
||||
std::mt19937 rng(0);
|
||||
std::uniform_int_distribution<size_t> dist(0, num_categories - 1);
|
||||
std::generate(x.begin(), x.end(), [&]() { return dist(rng); });
|
||||
// Make sure each category is present
|
||||
for(size_t i = 0; i < num_categories; i++) {
|
||||
x[i] = i;
|
||||
for (size_t i = 0; i < num_categories; i++) {
|
||||
x[i] = static_cast<decltype(x)::value_type>(i);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
@@ -444,11 +449,11 @@ class ArrayIterForTest {
|
||||
size_t static constexpr Cols() { return 13; }
|
||||
|
||||
public:
|
||||
std::string AsArray() const { return interface_; }
|
||||
[[nodiscard]] std::string AsArray() const { return interface_; }
|
||||
|
||||
virtual int Next() = 0;
|
||||
virtual void Reset() { iter_ = 0; }
|
||||
size_t Iter() const { return iter_; }
|
||||
[[nodiscard]] std::size_t Iter() const { return iter_; }
|
||||
auto Proxy() -> decltype(proxy_) { return proxy_; }
|
||||
|
||||
explicit ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches);
|
||||
@@ -511,11 +516,15 @@ inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint
|
||||
|
||||
inline std::int32_t AllThreadsForTest() { return Context{}.Threads(); }
|
||||
|
||||
template <typename Function, typename... Args>
|
||||
template <bool use_nccl = false, typename Function, typename... Args>
|
||||
void RunWithInMemoryCommunicator(int32_t world_size, Function&& function, Args&&... args) {
|
||||
auto run = [&](auto rank) {
|
||||
Json config{JsonObject()};
|
||||
config["xgboost_communicator"] = String("in-memory");
|
||||
if constexpr (use_nccl) {
|
||||
config["xgboost_communicator"] = String("in-memory-nccl");
|
||||
} else {
|
||||
config["xgboost_communicator"] = String("in-memory");
|
||||
}
|
||||
config["in_memory_world_size"] = world_size;
|
||||
config["in_memory_rank"] = rank;
|
||||
xgboost::collective::Init(config);
|
||||
@@ -537,16 +546,35 @@ void RunWithInMemoryCommunicator(int32_t world_size, Function&& function, Args&&
|
||||
#endif
|
||||
}
|
||||
|
||||
class DeclareUnifiedDistributedTest(MetricTest) : public ::testing::Test {
|
||||
class BaseMGPUTest : public ::testing::Test {
|
||||
protected:
|
||||
int world_size_;
|
||||
bool use_nccl_{false};
|
||||
|
||||
void SetUp() override {
|
||||
world_size_ = WORLD_SIZE_FOR_TEST;
|
||||
if (world_size_ <= 1) {
|
||||
GTEST_SKIP() << "Skipping MGPU test with # GPUs = " << world_size_;
|
||||
auto const n_gpus = common::AllVisibleGPUs();
|
||||
if (n_gpus <= 1) {
|
||||
// Use a single GPU to simulate distributed environment.
|
||||
world_size_ = 3;
|
||||
// NCCL doesn't like sharing a single GPU, so we use the adapter instead.
|
||||
use_nccl_ = false;
|
||||
} else {
|
||||
// Use multiple GPUs for real.
|
||||
world_size_ = n_gpus;
|
||||
use_nccl_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Function, typename... Args>
|
||||
void DoTest(Function&& function, Args&&... args) {
|
||||
if (use_nccl_) {
|
||||
RunWithInMemoryCommunicator<true>(world_size_, function, args...);
|
||||
} else {
|
||||
RunWithInMemoryCommunicator<false>(world_size_, function, args...);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class DeclareUnifiedDistributedTest(MetricTest) : public BaseMGPUTest{};
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
#include "../../src/data/ellpack_page.cuh"
|
||||
#endif
|
||||
|
||||
#include <xgboost/data.h> // for SparsePage
|
||||
|
||||
#include "./helpers.h" // for RandomDataGenerator
|
||||
|
||||
namespace xgboost {
|
||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
namespace {
|
||||
|
||||
@@ -18,51 +18,51 @@ TEST(Metric, DeclareUnifiedTest(MultiClassPRAUC)) { VerifyMultiClassPRAUC(); }
|
||||
TEST(Metric, DeclareUnifiedTest(RankingPRAUC)) { VerifyRankingPRAUC(); }
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), BinaryAUCRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyBinaryAUC, DataSplitMode::kRow);
|
||||
DoTest(VerifyBinaryAUC, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), BinaryAUCColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyBinaryAUC, DataSplitMode::kCol);
|
||||
DoTest(VerifyBinaryAUC, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassAUCRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassAUC, DataSplitMode::kRow);
|
||||
DoTest(VerifyMultiClassAUC, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassAUCColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassAUC, DataSplitMode::kCol);
|
||||
DoTest(VerifyMultiClassAUC, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingAUCRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRankingAUC, DataSplitMode::kRow);
|
||||
DoTest(VerifyRankingAUC, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingAUCColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRankingAUC, DataSplitMode::kCol);
|
||||
DoTest(VerifyRankingAUC, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PRAUCRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyPRAUC, DataSplitMode::kRow);
|
||||
DoTest(VerifyPRAUC, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PRAUCColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyPRAUC, DataSplitMode::kCol);
|
||||
DoTest(VerifyPRAUC, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassPRAUCRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassPRAUC, DataSplitMode::kRow);
|
||||
DoTest(VerifyMultiClassPRAUC, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassPRAUCColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassPRAUC, DataSplitMode::kCol);
|
||||
DoTest(VerifyMultiClassPRAUC, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingPRAUCRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRankingPRAUC, DataSplitMode::kRow);
|
||||
DoTest(VerifyRankingPRAUC, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingPRAUCColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRankingPRAUC, DataSplitMode::kCol);
|
||||
DoTest(VerifyRankingPRAUC, DataSplitMode::kCol);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -26,83 +26,83 @@ TEST(Metric, DeclareUnifiedTest(MultiRMSE)) { VerifyMultiRMSE(); }
|
||||
TEST(Metric, DeclareUnifiedTest(Quantile)) { VerifyQuantile(); }
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSERowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRMSE, DataSplitMode::kRow);
|
||||
DoTest(VerifyRMSE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSEColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRMSE, DataSplitMode::kCol);
|
||||
DoTest(VerifyRMSE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSLERowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRMSLE, DataSplitMode::kRow);
|
||||
DoTest(VerifyRMSLE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSLEColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRMSLE, DataSplitMode::kCol);
|
||||
DoTest(VerifyRMSLE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAERowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMAE, DataSplitMode::kRow);
|
||||
DoTest(VerifyMAE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAEColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMAE, DataSplitMode::kCol);
|
||||
DoTest(VerifyMAE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPERowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMAPE, DataSplitMode::kRow);
|
||||
DoTest(VerifyMAPE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPEColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMAPE, DataSplitMode::kCol);
|
||||
DoTest(VerifyMAPE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MPHERowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMPHE, DataSplitMode::kRow);
|
||||
DoTest(VerifyMPHE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MPHEColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMPHE, DataSplitMode::kCol);
|
||||
DoTest(VerifyMPHE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), LogLossRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyLogLoss, DataSplitMode::kRow);
|
||||
DoTest(VerifyLogLoss, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), LogLossColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyLogLoss, DataSplitMode::kCol);
|
||||
DoTest(VerifyLogLoss, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), ErrorRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyError, DataSplitMode::kRow);
|
||||
DoTest(VerifyError, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), ErrorColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyError, DataSplitMode::kCol);
|
||||
DoTest(VerifyError, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PoissonNegLogLikRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyPoissonNegLogLik, DataSplitMode::kRow);
|
||||
DoTest(VerifyPoissonNegLogLik, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PoissonNegLogLikColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyPoissonNegLogLik, DataSplitMode::kCol);
|
||||
DoTest(VerifyPoissonNegLogLik, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiRMSERowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiRMSE, DataSplitMode::kRow);
|
||||
DoTest(VerifyMultiRMSE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiRMSEColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiRMSE, DataSplitMode::kCol);
|
||||
DoTest(VerifyMultiRMSE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyQuantile, DataSplitMode::kRow);
|
||||
DoTest(VerifyQuantile, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyQuantile, DataSplitMode::kCol);
|
||||
DoTest(VerifyQuantile, DataSplitMode::kCol);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -8,14 +8,10 @@ TEST(Metric, UnknownMetric) {
|
||||
xgboost::Metric* metric = nullptr;
|
||||
EXPECT_ANY_THROW(metric = xgboost::Metric::Create("unknown_name", &ctx));
|
||||
EXPECT_NO_THROW(metric = xgboost::Metric::Create("rmse", &ctx));
|
||||
if (metric) {
|
||||
delete metric;
|
||||
}
|
||||
delete metric;
|
||||
metric = nullptr;
|
||||
EXPECT_ANY_THROW(metric = xgboost::Metric::Create("unknown_name@1", &ctx));
|
||||
EXPECT_NO_THROW(metric = xgboost::Metric::Create("error@0.5f", &ctx));
|
||||
if (metric) {
|
||||
delete metric;
|
||||
}
|
||||
delete metric;
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -11,19 +11,19 @@ TEST(Metric, DeclareUnifiedTest(MultiClassError)) { VerifyMultiClassError(); }
|
||||
TEST(Metric, DeclareUnifiedTest(MultiClassLogLoss)) { VerifyMultiClassLogLoss(); }
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassErrorRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassError, DataSplitMode::kRow);
|
||||
DoTest(VerifyMultiClassError, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassErrorColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassError, DataSplitMode::kCol);
|
||||
DoTest(VerifyMultiClassError, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassLogLossRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassLogLoss, DataSplitMode::kRow);
|
||||
DoTest(VerifyMultiClassLogLoss, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassLogLossColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassLogLoss, DataSplitMode::kCol);
|
||||
DoTest(VerifyMultiClassLogLoss, DataSplitMode::kCol);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -46,7 +46,6 @@ inline void CheckDeterministicMetricMultiClass(StringView name, int32_t device)
|
||||
|
||||
inline void TestMultiClassError(int device, DataSplitMode data_split_mode) {
|
||||
auto ctx = MakeCUDACtx(device);
|
||||
ctx.gpu_id = device;
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("merror", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "merror");
|
||||
@@ -67,7 +66,6 @@ inline void VerifyMultiClassError(DataSplitMode data_split_mode = DataSplitMode:
|
||||
|
||||
inline void TestMultiClassLogLoss(int device, DataSplitMode data_split_mode) {
|
||||
auto ctx = MakeCUDACtx(device);
|
||||
ctx.gpu_id = device;
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("mlogloss", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "mlogloss");
|
||||
|
||||
@@ -50,35 +50,35 @@ TEST(Metric, DeclareUnifiedTest(MAP)) { VerifyMAP(); }
|
||||
TEST(Metric, DeclareUnifiedTest(NDCGExpGain)) { VerifyNDCGExpGain(); }
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PrecisionRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyPrecision, DataSplitMode::kRow);
|
||||
DoTest(VerifyPrecision, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PrecisionColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyPrecision, DataSplitMode::kCol);
|
||||
DoTest(VerifyPrecision, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyNDCG, DataSplitMode::kRow);
|
||||
DoTest(VerifyNDCG, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyNDCG, DataSplitMode::kCol);
|
||||
DoTest(VerifyNDCG, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMAP, DataSplitMode::kRow);
|
||||
DoTest(VerifyMAP, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMAP, DataSplitMode::kCol);
|
||||
DoTest(VerifyMAP, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGExpGainRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyNDCGExpGain, DataSplitMode::kRow);
|
||||
DoTest(VerifyNDCGExpGain, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGExpGainColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyNDCGExpGain, DataSplitMode::kCol);
|
||||
DoTest(VerifyNDCGExpGain, DataSplitMode::kCol);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -12,21 +12,21 @@ namespace common {
|
||||
TEST(Metric, DeclareUnifiedTest(AFTNegLogLik)) { VerifyAFTNegLogLik(); }
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), AFTNegLogLikRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyAFTNegLogLik, DataSplitMode::kRow);
|
||||
DoTest(VerifyAFTNegLogLik, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), AFTNegLogLikColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyAFTNegLogLik, DataSplitMode::kCol);
|
||||
DoTest(VerifyAFTNegLogLik, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(IntervalRegressionAccuracy)) { VerifyIntervalRegressionAccuracy(); }
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), IntervalRegressionAccuracyRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyIntervalRegressionAccuracy, DataSplitMode::kRow);
|
||||
DoTest(VerifyIntervalRegressionAccuracy, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), IntervalRegressionAccuracyColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyIntervalRegressionAccuracy, DataSplitMode::kCol);
|
||||
DoTest(VerifyIntervalRegressionAccuracy, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
// Test configuration of AFT metric
|
||||
|
||||
@@ -18,26 +18,22 @@
|
||||
|
||||
namespace xgboost::obj {
|
||||
TEST(LambdaRank, GPUNDCGJsonIO) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestNDCGJsonIO(&ctx);
|
||||
}
|
||||
|
||||
TEST(LambdaRank, GPUMAPStat) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestMAPStat(&ctx);
|
||||
}
|
||||
|
||||
TEST(LambdaRank, GPUNDCGGPair) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestNDCGGPair(&ctx);
|
||||
}
|
||||
|
||||
void TestGPUMakePair() {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
|
||||
MetaInfo info;
|
||||
HostDeviceVector<float> predt;
|
||||
@@ -131,8 +127,7 @@ void TestGPUMakePair() {
|
||||
TEST(LambdaRank, GPUMakePair) { TestGPUMakePair(); }
|
||||
|
||||
TEST(LambdaRank, GPUUnbiasedNDCG) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestUnbiasedNDCG(&ctx);
|
||||
}
|
||||
|
||||
@@ -166,8 +161,7 @@ TEST(LambdaRank, RankItemCountOnRight) {
|
||||
}
|
||||
|
||||
TEST(LambdaRank, GPUMAPGPair) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestMAPGPair(&ctx);
|
||||
}
|
||||
} // namespace xgboost::obj
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <xgboost/objective.h>
|
||||
|
||||
#include "../helpers.h"
|
||||
#include "../objective_helpers.h"
|
||||
|
||||
TEST(Objective, UnknownFunction) {
|
||||
xgboost::ObjFunction* obj = nullptr;
|
||||
@@ -43,4 +44,61 @@ TEST(Objective, PredTransform) {
|
||||
ASSERT_TRUE(predts.HostCanWrite());
|
||||
}
|
||||
}
|
||||
|
||||
class TestDefaultObjConfig : public ::testing::TestWithParam<std::string> {
|
||||
Context ctx_;
|
||||
|
||||
public:
|
||||
void Run(std::string objective) {
|
||||
auto Xy = MakeFmatForObjTest(objective);
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||
std::unique_ptr<ObjFunction> objfn{ObjFunction::Create(objective, &ctx_)};
|
||||
|
||||
learner->SetParam("objective", objective);
|
||||
if (objective.find("multi") != std::string::npos) {
|
||||
learner->SetParam("num_class", "3");
|
||||
objfn->Configure(Args{{"num_class", "3"}});
|
||||
} else if (objective.find("quantile") != std::string::npos) {
|
||||
learner->SetParam("quantile_alpha", "0.5");
|
||||
objfn->Configure(Args{{"quantile_alpha", "0.5"}});
|
||||
} else {
|
||||
objfn->Configure(Args{});
|
||||
}
|
||||
learner->Configure();
|
||||
learner->UpdateOneIter(0, Xy);
|
||||
learner->EvalOneIter(0, {Xy}, {"train"});
|
||||
Json config{Object{}};
|
||||
learner->SaveConfig(&config);
|
||||
auto jobj = get<Object const>(config["learner"]["objective"]);
|
||||
|
||||
ASSERT_TRUE(jobj.find("name") != jobj.cend());
|
||||
// FIXME(jiamingy): We should have the following check, but some legacy parameter like
|
||||
// "pos_weight", "delta_step" in objectives are not in metrics.
|
||||
|
||||
// if (jobj.size() > 1) {
|
||||
// ASSERT_FALSE(IsA<Null>(objfn->DefaultMetricConfig()));
|
||||
// }
|
||||
auto mconfig = objfn->DefaultMetricConfig();
|
||||
if (!IsA<Null>(mconfig)) {
|
||||
// make sure metric can handle it
|
||||
std::unique_ptr<Metric> metricfn{Metric::Create(get<String const>(mconfig["name"]), &ctx_)};
|
||||
metricfn->LoadConfig(mconfig);
|
||||
Json loaded(Object{});
|
||||
metricfn->SaveConfig(&loaded);
|
||||
metricfn->Configure(Args{});
|
||||
ASSERT_EQ(mconfig, loaded);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(TestDefaultObjConfig, Objective) {
|
||||
std::string objective = GetParam();
|
||||
this->Run(objective);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(Objective, TestDefaultObjConfig,
|
||||
::testing::ValuesIn(MakeObjNamesForTest()),
|
||||
[](const ::testing::TestParamInfo<TestDefaultObjConfig::ParamType>& info) {
|
||||
return ObjTestNameGenerator(info);
|
||||
});
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -305,12 +305,12 @@ TEST(Objective, CPU_vs_CUDA) {
|
||||
|
||||
{
|
||||
// CPU
|
||||
ctx.gpu_id = -1;
|
||||
ctx = ctx.MakeCPU();
|
||||
obj->GetGradient(preds, info, 0, &cpu_out_preds);
|
||||
}
|
||||
{
|
||||
// CUDA
|
||||
ctx.gpu_id = 0;
|
||||
ctx = ctx.MakeCUDA(0);
|
||||
obj->GetGradient(preds, info, 0, &cuda_out_preds);
|
||||
}
|
||||
|
||||
|
||||
31
tests/cpp/objective_helpers.cc
Normal file
31
tests/cpp/objective_helpers.cc
Normal file
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* Copyright (c) 2023, XGBoost contributors
|
||||
*/
|
||||
#include "objective_helpers.h"
|
||||
|
||||
#include "../../src/common/linalg_op.h" // for begin, end
|
||||
#include "helpers.h" // for RandomDataGenerator
|
||||
|
||||
namespace xgboost {
|
||||
std::shared_ptr<DMatrix> MakeFmatForObjTest(std::string const& obj) {
|
||||
auto constexpr kRows = 10, kCols = 10;
|
||||
auto p_fmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
|
||||
auto& h_upper = p_fmat->Info().labels_upper_bound_.HostVector();
|
||||
auto& h_lower = p_fmat->Info().labels_lower_bound_.HostVector();
|
||||
h_lower.resize(kRows);
|
||||
h_upper.resize(kRows);
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
h_lower[i] = 1;
|
||||
h_upper[i] = 10;
|
||||
}
|
||||
if (obj.find("rank:") != std::string::npos) {
|
||||
auto h_label = p_fmat->Info().labels.HostView();
|
||||
std::size_t k = 0;
|
||||
for (auto& v : h_label) {
|
||||
v = k % 2 == 0;
|
||||
++k;
|
||||
}
|
||||
}
|
||||
return p_fmat;
|
||||
};
|
||||
} // namespace xgboost
|
||||
@@ -1,6 +1,8 @@
|
||||
/**
|
||||
* Copyright (c) 2023, XGBoost contributors
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <dmlc/registry.h> // for Registry
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/objective.h> // for ObjFunctionReg
|
||||
@@ -29,4 +31,6 @@ inline std::string ObjTestNameGenerator(const ::testing::TestParamInfo<ParamType
|
||||
}
|
||||
return name;
|
||||
};
|
||||
|
||||
std::shared_ptr<DMatrix> MakeFmatForObjTest(std::string const& obj);
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -37,7 +37,14 @@ class ServerForTest {
|
||||
}
|
||||
|
||||
~ServerForTest() {
|
||||
using namespace std::chrono_literals;
|
||||
while (!server_) {
|
||||
std::this_thread::sleep_for(100ms);
|
||||
}
|
||||
server_->Shutdown();
|
||||
while (!server_thread_) {
|
||||
std::this_thread::sleep_for(100ms);
|
||||
}
|
||||
server_thread_->join();
|
||||
}
|
||||
|
||||
@@ -56,7 +63,7 @@ class BaseFederatedTest : public ::testing::Test {
|
||||
|
||||
void TearDown() override { server_.reset(nullptr); }
|
||||
|
||||
static int constexpr kWorldSize{3};
|
||||
static int constexpr kWorldSize{2};
|
||||
std::unique_ptr<ServerForTest> server_;
|
||||
};
|
||||
|
||||
|
||||
@@ -9,75 +9,68 @@
|
||||
#include <thread>
|
||||
|
||||
#include "../../../plugin/federated/federated_communicator.h"
|
||||
#include "../../../src/collective/communicator-inl.cuh"
|
||||
#include "../../../src/collective/device_communicator_adapter.cuh"
|
||||
#include "./helpers.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost::collective {
|
||||
|
||||
class FederatedAdapterTest : public BaseFederatedTest {};
|
||||
|
||||
TEST(FederatedAdapterSimpleTest, ThrowOnInvalidDeviceOrdinal) {
|
||||
auto construct = []() { DeviceCommunicatorAdapter adapter{-1, nullptr}; };
|
||||
auto construct = []() { DeviceCommunicatorAdapter adapter{-1}; };
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(FederatedAdapterSimpleTest, ThrowOnInvalidCommunicator) {
|
||||
auto construct = []() { DeviceCommunicatorAdapter adapter{0, nullptr}; };
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST_F(FederatedAdapterTest, DeviceAllReduceSum) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back([rank, server_address = server_->Address()] {
|
||||
FederatedCommunicator comm{kWorldSize, rank, server_address};
|
||||
// Assign device 0 to all workers, since we run gtest in a single-GPU machine
|
||||
DeviceCommunicatorAdapter adapter{0, &comm};
|
||||
int count = 3;
|
||||
thrust::device_vector<double> buffer(count, 0);
|
||||
thrust::sequence(buffer.begin(), buffer.end());
|
||||
adapter.AllReduce(buffer.data().get(), count, DataType::kDouble, Operation::kSum);
|
||||
thrust::host_vector<double> host_buffer = buffer;
|
||||
EXPECT_EQ(host_buffer.size(), count);
|
||||
for (auto i = 0; i < count; i++) {
|
||||
EXPECT_EQ(host_buffer[i], i * kWorldSize);
|
||||
}
|
||||
});
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
namespace {
|
||||
void VerifyAllReduceSum() {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
auto const device = GPUIDX;
|
||||
int count = 3;
|
||||
common::SetDevice(device);
|
||||
thrust::device_vector<double> buffer(count, 0);
|
||||
thrust::sequence(buffer.begin(), buffer.end());
|
||||
collective::AllReduce<collective::Operation::kSum>(device, buffer.data().get(), count);
|
||||
thrust::host_vector<double> host_buffer = buffer;
|
||||
EXPECT_EQ(host_buffer.size(), count);
|
||||
for (auto i = 0; i < count; i++) {
|
||||
EXPECT_EQ(host_buffer[i], i * world_size);
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST_F(FederatedAdapterTest, DeviceAllGatherV) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back([rank, server_address = server_->Address()] {
|
||||
FederatedCommunicator comm{kWorldSize, rank, server_address};
|
||||
// Assign device 0 to all workers, since we run gtest in a single-GPU machine
|
||||
DeviceCommunicatorAdapter adapter{0, &comm};
|
||||
|
||||
int const count = rank + 2;
|
||||
thrust::device_vector<char> buffer(count, 0);
|
||||
thrust::sequence(buffer.begin(), buffer.end());
|
||||
std::vector<std::size_t> segments(kWorldSize);
|
||||
dh::caching_device_vector<char> receive_buffer{};
|
||||
|
||||
adapter.AllGatherV(buffer.data().get(), count, &segments, &receive_buffer);
|
||||
|
||||
EXPECT_EQ(segments[0], 2);
|
||||
EXPECT_EQ(segments[1], 3);
|
||||
thrust::host_vector<char> host_buffer = receive_buffer;
|
||||
EXPECT_EQ(host_buffer.size(), 9);
|
||||
int expected[] = {0, 1, 0, 1, 2, 0, 1, 2, 3};
|
||||
for (auto i = 0; i < 9; i++) {
|
||||
EXPECT_EQ(host_buffer[i], expected[i]);
|
||||
}
|
||||
});
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
TEST_F(FederatedAdapterTest, MGPUAllReduceSum) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAllReduceSum);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void VerifyAllGatherV() {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
auto const device = GPUIDX;
|
||||
int const count = rank + 2;
|
||||
common::SetDevice(device);
|
||||
thrust::device_vector<char> buffer(count, 0);
|
||||
thrust::sequence(buffer.begin(), buffer.end());
|
||||
std::vector<std::size_t> segments(world_size);
|
||||
dh::caching_device_vector<char> receive_buffer{};
|
||||
|
||||
collective::AllGatherV(device, buffer.data().get(), count, &segments, &receive_buffer);
|
||||
|
||||
EXPECT_EQ(segments[0], 2);
|
||||
EXPECT_EQ(segments[1], 3);
|
||||
thrust::host_vector<char> host_buffer = receive_buffer;
|
||||
EXPECT_EQ(host_buffer.size(), 5);
|
||||
int expected[] = {0, 1, 0, 1, 2};
|
||||
for (auto i = 0; i < 5; i++) {
|
||||
EXPECT_EQ(host_buffer[i], expected[i]);
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST_F(FederatedAdapterTest, MGPUAllGatherV) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAllGatherV);
|
||||
}
|
||||
} // namespace xgboost::collective
|
||||
|
||||
@@ -31,7 +31,7 @@ class FederatedCommunicatorTest : public BaseFederatedTest {
|
||||
|
||||
protected:
|
||||
static void CheckAllgather(FederatedCommunicator &comm, int rank) {
|
||||
int buffer[kWorldSize] = {0, 0, 0};
|
||||
int buffer[kWorldSize] = {0, 0};
|
||||
buffer[rank] = rank;
|
||||
comm.AllGather(buffer, sizeof(buffer));
|
||||
for (auto i = 0; i < kWorldSize; i++) {
|
||||
@@ -42,7 +42,7 @@ class FederatedCommunicatorTest : public BaseFederatedTest {
|
||||
static void CheckAllreduce(FederatedCommunicator &comm) {
|
||||
int buffer[] = {1, 2, 3, 4, 5};
|
||||
comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kSum);
|
||||
int expected[] = {3, 6, 9, 12, 15};
|
||||
int expected[] = {2, 4, 6, 8, 10};
|
||||
for (auto i = 0; i < 5; i++) {
|
||||
EXPECT_EQ(buffer[i], expected[i]);
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ void VerifyLoadUri() {
|
||||
std::string uri = path + "?format=csv";
|
||||
dmat.reset(DMatrix::Load(uri, false, DataSplitMode::kCol));
|
||||
|
||||
ASSERT_EQ(dmat->Info().num_col_, 8 * collective::GetWorldSize() + 3);
|
||||
ASSERT_EQ(dmat->Info().num_col_, 8 * collective::GetWorldSize() + 1);
|
||||
ASSERT_EQ(dmat->Info().num_row_, kRows);
|
||||
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
|
||||
@@ -39,7 +39,7 @@ class FederatedServerTest : public BaseFederatedTest {
|
||||
|
||||
protected:
|
||||
static void CheckAllgather(federated::FederatedClient& client, int rank) {
|
||||
int data[kWorldSize] = {0, 0, 0};
|
||||
int data[kWorldSize] = {0, 0};
|
||||
data[rank] = rank;
|
||||
std::string send_buffer(reinterpret_cast<char const*>(data), sizeof(data));
|
||||
auto reply = client.Allgather(send_buffer);
|
||||
@@ -54,7 +54,7 @@ class FederatedServerTest : public BaseFederatedTest {
|
||||
std::string send_buffer(reinterpret_cast<char const*>(data), sizeof(data));
|
||||
auto reply = client.Allreduce(send_buffer, federated::INT32, federated::SUM);
|
||||
auto const* result = reinterpret_cast<int const*>(reply.data());
|
||||
int expected[] = {3, 6, 9, 12, 15};
|
||||
int expected[] = {2, 4, 6, 8, 10};
|
||||
for (auto i = 0; i < 5; i++) {
|
||||
EXPECT_EQ(result[i], expected[i]);
|
||||
}
|
||||
|
||||
@@ -148,7 +148,7 @@ TEST(Plugin, CPUvsOneAPI) {
|
||||
|
||||
{
|
||||
// CPU
|
||||
ctx.gpu_id = -1;
|
||||
ctx = ctx.MakeCPU();
|
||||
obj_cpu->GetGradient(preds, info, 0, &cpu_out_preds);
|
||||
}
|
||||
{
|
||||
|
||||
@@ -122,11 +122,13 @@ TEST(CpuPredictor, BasicColumnSplit) {
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, IterationRange) {
|
||||
TestIterationRange("cpu_predictor");
|
||||
Context ctx;
|
||||
TestIterationRange(&ctx);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, IterationRangeColmnSplit) {
|
||||
TestIterationRangeColumnSplit("cpu_predictor");
|
||||
Context ctx;
|
||||
TestIterationRangeColumnSplit(&ctx);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, ExternalMemory) {
|
||||
@@ -139,7 +141,8 @@ TEST(CpuPredictor, ExternalMemory) {
|
||||
TEST(CpuPredictor, InplacePredict) {
|
||||
bst_row_t constexpr kRows{128};
|
||||
bst_feature_t constexpr kCols{64};
|
||||
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(-1);
|
||||
Context ctx;
|
||||
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.gpu_id);
|
||||
{
|
||||
HostDeviceVector<float> data;
|
||||
gen.GenerateDense(&data);
|
||||
@@ -149,7 +152,7 @@ TEST(CpuPredictor, InplacePredict) {
|
||||
std::string arr_str;
|
||||
Json::Dump(array_interface, &arr_str);
|
||||
x->SetArrayData(arr_str.data());
|
||||
TestInplacePrediction(x, "cpu_predictor", kRows, kCols, Context::kCpuId);
|
||||
TestInplacePrediction(&ctx, x, kRows, kCols);
|
||||
}
|
||||
|
||||
{
|
||||
@@ -166,76 +169,81 @@ TEST(CpuPredictor, InplacePredict) {
|
||||
Json::Dump(col_interface, &col_str);
|
||||
std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy};
|
||||
x->SetCSRData(rptr_str.data(), col_str.data(), data_str.data(), kCols, true);
|
||||
TestInplacePrediction(x, "cpu_predictor", kRows, kCols, Context::kCpuId);
|
||||
TestInplacePrediction(&ctx, x, kRows, kCols);
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestUpdatePredictionCache(bool use_subsampling) {
|
||||
size_t constexpr kRows = 64, kCols = 16, kClasses = 4;
|
||||
std::size_t constexpr kRows = 64, kCols = 16, kClasses = 4;
|
||||
LearnerModelParam mparam{MakeMP(kCols, .0, kClasses)};
|
||||
Context ctx;
|
||||
|
||||
std::unique_ptr<gbm::GBTree> gbm;
|
||||
gbm.reset(static_cast<gbm::GBTree*>(GradientBooster::Create("gbtree", &ctx, &mparam)));
|
||||
std::map<std::string, std::string> cfg;
|
||||
cfg["tree_method"] = "hist";
|
||||
cfg["predictor"] = "cpu_predictor";
|
||||
Args args{{"tree_method", "hist"}};
|
||||
if (use_subsampling) {
|
||||
cfg["subsample"] = "0.5";
|
||||
args.emplace_back("subsample", "0.5");
|
||||
}
|
||||
Args args = {cfg.cbegin(), cfg.cend()};
|
||||
gbm->Configure(args);
|
||||
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(true, true, kClasses);
|
||||
|
||||
HostDeviceVector<GradientPair> gpair;
|
||||
auto& h_gpair = gpair.HostVector();
|
||||
h_gpair.resize(kRows*kClasses);
|
||||
for (size_t i = 0; i < kRows*kClasses; ++i) {
|
||||
h_gpair.resize(kRows * kClasses);
|
||||
for (size_t i = 0; i < kRows * kClasses; ++i) {
|
||||
h_gpair[i] = {static_cast<float>(i), 1};
|
||||
}
|
||||
|
||||
PredictionCacheEntry predtion_cache;
|
||||
predtion_cache.predictions.Resize(kRows*kClasses, 0);
|
||||
// after one training iteration predtion_cache is filled with cached in QuantileHistMaker::Builder prediction values
|
||||
predtion_cache.predictions.Resize(kRows * kClasses, 0);
|
||||
// after one training iteration predtion_cache is filled with cached in QuantileHistMaker
|
||||
// prediction values
|
||||
gbm->DoBoost(dmat.get(), &gpair, &predtion_cache, nullptr);
|
||||
|
||||
PredictionCacheEntry out_predictions;
|
||||
// perform fair prediction on the same input data, should be equal to cached result
|
||||
// perform prediction from scratch on the same input data, should be equal to cached result
|
||||
gbm->PredictBatch(dmat.get(), &out_predictions, false, 0, 0);
|
||||
|
||||
std::vector<float> &out_predictions_h = out_predictions.predictions.HostVector();
|
||||
std::vector<float> &predtion_cache_from_train = predtion_cache.predictions.HostVector();
|
||||
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
|
||||
std::vector<float>& predtion_cache_from_train = predtion_cache.predictions.HostVector();
|
||||
for (size_t i = 0; i < out_predictions_h.size(); ++i) {
|
||||
ASSERT_NEAR(out_predictions_h[i], predtion_cache_from_train[i], kRtEps);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(CPUPredictor, GHistIndex) {
|
||||
TEST(CPUPredictor, GHistIndexTraining) {
|
||||
size_t constexpr kRows{128}, kCols{16}, kBins{64};
|
||||
auto p_hist = RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).GenerateQuantileDMatrix();
|
||||
Context ctx;
|
||||
auto p_hist = RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).GenerateQuantileDMatrix(false);
|
||||
HostDeviceVector<float> storage(kRows * kCols);
|
||||
auto columnar = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&storage);
|
||||
auto adapter = data::ArrayAdapter(columnar.c_str());
|
||||
std::shared_ptr<DMatrix> p_full{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
|
||||
TestTrainingPrediction(kRows, kBins, "hist", p_full, p_hist);
|
||||
TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_hist);
|
||||
}
|
||||
|
||||
TEST(CPUPredictor, CategoricalPrediction) {
|
||||
TestCategoricalPrediction("cpu_predictor");
|
||||
Context ctx;
|
||||
TestCategoricalPrediction(&ctx, false);
|
||||
}
|
||||
|
||||
TEST(CPUPredictor, CategoricalPredictionColumnSplit) {
|
||||
TestCategoricalPredictionColumnSplit("cpu_predictor");
|
||||
Context ctx;
|
||||
TestCategoricalPredictionColumnSplit(&ctx);
|
||||
}
|
||||
|
||||
TEST(CPUPredictor, CategoricalPredictLeaf) {
|
||||
TestCategoricalPredictLeaf(StringView{"cpu_predictor"});
|
||||
Context ctx;
|
||||
TestCategoricalPredictLeaf(&ctx, false);
|
||||
}
|
||||
|
||||
TEST(CPUPredictor, CategoricalPredictLeafColumnSplit) {
|
||||
TestCategoricalPredictLeafColumnSplit(StringView{"cpu_predictor"});
|
||||
Context ctx;
|
||||
TestCategoricalPredictLeafColumnSplit(&ctx);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, UpdatePredictionCache) {
|
||||
@@ -244,21 +252,25 @@ TEST(CpuPredictor, UpdatePredictionCache) {
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, LesserFeatures) {
|
||||
TestPredictionWithLesserFeatures("cpu_predictor");
|
||||
Context ctx;
|
||||
TestPredictionWithLesserFeatures(&ctx);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, LesserFeaturesColumnSplit) {
|
||||
TestPredictionWithLesserFeaturesColumnSplit("cpu_predictor");
|
||||
Context ctx;
|
||||
TestPredictionWithLesserFeaturesColumnSplit(&ctx);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, Sparse) {
|
||||
TestSparsePrediction(0.2, "cpu_predictor");
|
||||
TestSparsePrediction(0.8, "cpu_predictor");
|
||||
Context ctx;
|
||||
TestSparsePrediction(&ctx, 0.2);
|
||||
TestSparsePrediction(&ctx, 0.8);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, SparseColumnSplit) {
|
||||
TestSparsePredictionColumnSplit(0.2, "cpu_predictor");
|
||||
TestSparsePredictionColumnSplit(0.8, "cpu_predictor");
|
||||
Context ctx;
|
||||
TestSparsePredictionColumnSplit(&ctx, 0.2);
|
||||
TestSparsePredictionColumnSplit(&ctx, 0.8);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, Multi) {
|
||||
@@ -266,4 +278,6 @@ TEST(CpuPredictor, Multi) {
|
||||
ctx.nthread = 1;
|
||||
TestVectorLeafPrediction(&ctx);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, Access) { TestPredictionDeviceAccess(); }
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -19,8 +19,7 @@
|
||||
#include "../helpers.h"
|
||||
#include "test_predictor.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace predictor {
|
||||
namespace xgboost::predictor {
|
||||
|
||||
TEST(GPUPredictor, Basic) {
|
||||
auto cpu_lparam = MakeCUDACtx(-1);
|
||||
@@ -38,9 +37,8 @@ TEST(GPUPredictor, Basic) {
|
||||
int n_row = i, n_col = i;
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
@@ -61,30 +59,92 @@ TEST(GPUPredictor, Basic) {
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
void VerifyBasicColumnSplit(std::array<std::vector<float>, 32> const& expected_result) {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
|
||||
auto ctx = MakeCUDACtx(GPUIDX);
|
||||
std::unique_ptr<Predictor> predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &ctx));
|
||||
predictor->Configure({});
|
||||
|
||||
for (size_t i = 1; i < 33; i *= 2) {
|
||||
size_t n_row = i, n_col = i;
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
std::unique_ptr<DMatrix> sliced{dmat->SliceCol(world_size, rank)};
|
||||
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
PredictionCacheEntry out_predictions;
|
||||
|
||||
predictor->InitOutPredictions(sliced->Info(), &out_predictions.predictions, model);
|
||||
predictor->PredictBatch(sliced.get(), &out_predictions, model, 0);
|
||||
|
||||
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
|
||||
EXPECT_EQ(out_predictions_h, expected_result[i - 1]);
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
class MGPUPredictorTest : public BaseMGPUTest {};
|
||||
|
||||
TEST_F(MGPUPredictorTest, BasicColumnSplit) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
std::unique_ptr<Predictor> predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &ctx));
|
||||
predictor->Configure({});
|
||||
|
||||
std::array<std::vector<float>, 32> result{};
|
||||
for (size_t i = 1; i < 33; i *= 2) {
|
||||
size_t n_row = i, n_col = i;
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
PredictionCacheEntry out_predictions;
|
||||
|
||||
predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
|
||||
predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
|
||||
|
||||
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
|
||||
result[i - 1] = out_predictions_h;
|
||||
}
|
||||
|
||||
DoTest(VerifyBasicColumnSplit, result);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, EllpackBasic) {
|
||||
size_t constexpr kCols {8};
|
||||
size_t constexpr kCols{8};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
for (size_t bins = 2; bins < 258; bins += 16) {
|
||||
size_t rows = bins * 16;
|
||||
auto p_m = RandomDataGenerator{rows, kCols, 0.0}.Bins(bins).Device(0).GenerateDeviceDMatrix();
|
||||
auto p_m =
|
||||
RandomDataGenerator{rows, kCols, 0.0}.Bins(bins).Device(0).GenerateDeviceDMatrix(false);
|
||||
ASSERT_FALSE(p_m->PageExists<SparsePage>());
|
||||
TestPredictionFromGradientIndex<EllpackPage>("gpu_predictor", rows, kCols, p_m);
|
||||
TestPredictionFromGradientIndex<EllpackPage>("gpu_predictor", bins, kCols, p_m);
|
||||
TestPredictionFromGradientIndex<EllpackPage>(&ctx, rows, kCols, p_m);
|
||||
TestPredictionFromGradientIndex<EllpackPage>(&ctx, bins, kCols, p_m);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, EllpackTraining) {
|
||||
size_t constexpr kRows { 128 }, kCols { 16 }, kBins { 64 };
|
||||
auto p_ellpack =
|
||||
RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).Device(0).GenerateDeviceDMatrix();
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows{128}, kCols{16}, kBins{64};
|
||||
auto p_ellpack = RandomDataGenerator{kRows, kCols, 0.0}
|
||||
.Bins(kBins)
|
||||
.Device(ctx.Ordinal())
|
||||
.GenerateDeviceDMatrix(false);
|
||||
HostDeviceVector<float> storage(kRows * kCols);
|
||||
auto columnar = RandomDataGenerator{kRows, kCols, 0.0}
|
||||
.Device(0)
|
||||
.GenerateArrayInterface(&storage);
|
||||
auto columnar =
|
||||
RandomDataGenerator{kRows, kCols, 0.0}.Device(ctx.Ordinal()).GenerateArrayInterface(&storage);
|
||||
auto adapter = data::CupyAdapter(columnar);
|
||||
std::shared_ptr<DMatrix> p_full {
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)
|
||||
};
|
||||
TestTrainingPrediction(kRows, kBins, "gpu_hist", p_full, p_ellpack);
|
||||
std::shared_ptr<DMatrix> p_full{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
|
||||
TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_ellpack);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, ExternalMemoryTest) {
|
||||
@@ -94,9 +154,8 @@ TEST(GPUPredictor, ExternalMemoryTest) {
|
||||
gpu_predictor->Configure({});
|
||||
|
||||
const int n_classes = 3;
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.gpu_id)};
|
||||
Context ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.Ordinal())};
|
||||
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, n_classes);
|
||||
std::vector<std::unique_ptr<DMatrix>> dmats;
|
||||
@@ -123,46 +182,44 @@ TEST(GPUPredictor, ExternalMemoryTest) {
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, InplacePredictCupy) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows{128}, kCols{64};
|
||||
RandomDataGenerator gen(kRows, kCols, 0.5);
|
||||
gen.Device(0);
|
||||
gen.Device(ctx.Ordinal());
|
||||
HostDeviceVector<float> data;
|
||||
std::string interface_str = gen.GenerateArrayInterface(&data);
|
||||
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
|
||||
dynamic_cast<data::DMatrixProxy*>(p_fmat.get())->SetCUDAArray(interface_str.c_str());
|
||||
TestInplacePrediction(p_fmat, "gpu_predictor", kRows, kCols, 0);
|
||||
TestInplacePrediction(&ctx, p_fmat, kRows, kCols);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, InplacePredictCuDF) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows{128}, kCols{64};
|
||||
RandomDataGenerator gen(kRows, kCols, 0.5);
|
||||
gen.Device(0);
|
||||
gen.Device(ctx.Ordinal());
|
||||
std::vector<HostDeviceVector<float>> storage(kCols);
|
||||
auto interface_str = gen.GenerateColumnarArrayInterface(&storage);
|
||||
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
|
||||
dynamic_cast<data::DMatrixProxy*>(p_fmat.get())->SetCUDAArray(interface_str.c_str());
|
||||
TestInplacePrediction(p_fmat, "gpu_predictor", kRows, kCols, 0);
|
||||
TestInplacePrediction(&ctx, p_fmat, kRows, kCols);
|
||||
}
|
||||
|
||||
TEST(GpuPredictor, LesserFeatures) {
|
||||
TestPredictionWithLesserFeatures("gpu_predictor");
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestPredictionWithLesserFeatures(&ctx);
|
||||
}
|
||||
|
||||
// Very basic test of empty model
|
||||
TEST(GPUPredictor, ShapStump) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
cudaSetDevice(0);
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
hipSetDevice(0);
|
||||
#endif
|
||||
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
|
||||
trees.push_back(std::make_unique<RegTree>());
|
||||
model.CommitModelGroup(std::move(trees), 0);
|
||||
|
||||
auto gpu_lparam = MakeCUDACtx(0);
|
||||
@@ -183,13 +240,12 @@ TEST(GPUPredictor, ShapStump) {
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, Shap) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
|
||||
trees.push_back(std::make_unique<RegTree>());
|
||||
trees[0]->ExpandNode(0, 0, 0.5, true, 1.0, -1.0, 1.0, 0.0, 5.0, 2.0, 3.0);
|
||||
model.CommitModelGroup(std::move(trees), 0);
|
||||
|
||||
@@ -214,15 +270,18 @@ TEST(GPUPredictor, Shap) {
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, IterationRange) {
|
||||
TestIterationRange("gpu_predictor");
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestIterationRange(&ctx);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, CategoricalPrediction) {
|
||||
TestCategoricalPrediction("gpu_predictor");
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestCategoricalPrediction(&ctx, false);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, CategoricalPredictLeaf) {
|
||||
TestCategoricalPredictLeaf(StringView{"gpu_predictor"});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestCategoricalPredictLeaf(&ctx, false);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, PredictLeafBasic) {
|
||||
@@ -246,8 +305,8 @@ TEST(GPUPredictor, PredictLeafBasic) {
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, Sparse) {
|
||||
TestSparsePrediction(0.2, "gpu_predictor");
|
||||
TestSparsePrediction(0.8, "gpu_predictor");
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestSparsePrediction(&ctx, 0.2);
|
||||
TestSparsePrediction(&ctx, 0.8);
|
||||
}
|
||||
} // namespace predictor
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::predictor
|
||||
|
||||
@@ -8,9 +8,11 @@
|
||||
#include <xgboost/data.h> // for DMatrix, BatchIterator, BatchSet, MetaInfo
|
||||
#include <xgboost/host_device_vector.h> // for HostDeviceVector
|
||||
#include <xgboost/predictor.h> // for PredictionCacheEntry, Predictor, Predic...
|
||||
#include <xgboost/string_view.h> // for StringView
|
||||
|
||||
#include <algorithm> // for max
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for shared_ptr
|
||||
#include <unordered_map> // for unordered_map
|
||||
|
||||
#include "../../../src/common/bitfield.h" // for LBitField32
|
||||
@@ -42,67 +44,56 @@ TEST(Predictor, PredictionCache) {
|
||||
EXPECT_ANY_THROW(container.Entry(m));
|
||||
}
|
||||
|
||||
void TestTrainingPrediction(size_t rows, size_t bins,
|
||||
std::string tree_method,
|
||||
std::shared_ptr<DMatrix> p_full,
|
||||
std::shared_ptr<DMatrix> p_hist) {
|
||||
void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
|
||||
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist) {
|
||||
size_t constexpr kCols = 16;
|
||||
size_t constexpr kClasses = 3;
|
||||
size_t constexpr kIters = 3;
|
||||
|
||||
std::unique_ptr<Learner> learner;
|
||||
auto train = [&](std::string predictor) {
|
||||
p_hist->Info().labels.Reshape(rows, 1);
|
||||
auto &h_label = p_hist->Info().labels.Data()->HostVector();
|
||||
|
||||
for (size_t i = 0; i < rows; ++i) {
|
||||
h_label[i] = i % kClasses;
|
||||
}
|
||||
p_hist->Info().labels.Reshape(rows, 1);
|
||||
auto &h_label = p_hist->Info().labels.Data()->HostVector();
|
||||
|
||||
learner.reset(Learner::Create({}));
|
||||
learner->SetParam("tree_method", tree_method);
|
||||
learner->SetParam("objective", "multi:softprob");
|
||||
learner->SetParam("num_feature", std::to_string(kCols));
|
||||
learner->SetParam("num_class", std::to_string(kClasses));
|
||||
learner->SetParam("max_bin", std::to_string(bins));
|
||||
learner->SetParam("predictor", predictor);
|
||||
learner->Configure();
|
||||
for (size_t i = 0; i < rows; ++i) {
|
||||
h_label[i] = i % kClasses;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < kIters; ++i) {
|
||||
learner->UpdateOneIter(i, p_hist);
|
||||
}
|
||||
learner.reset(Learner::Create({}));
|
||||
learner->SetParams(Args{{"objective", "multi:softprob"},
|
||||
{"num_feature", std::to_string(kCols)},
|
||||
{"num_class", std::to_string(kClasses)},
|
||||
{"max_bin", std::to_string(bins)},
|
||||
{"device", ctx->DeviceName()}});
|
||||
learner->Configure();
|
||||
|
||||
Json model{Object{}};
|
||||
learner->SaveModel(&model);
|
||||
for (size_t i = 0; i < kIters; ++i) {
|
||||
learner->UpdateOneIter(i, p_hist);
|
||||
}
|
||||
|
||||
learner.reset(Learner::Create({}));
|
||||
learner->LoadModel(model);
|
||||
learner->SetParam("predictor", predictor);
|
||||
learner->Configure();
|
||||
Json model{Object{}};
|
||||
learner->SaveModel(&model);
|
||||
|
||||
HostDeviceVector<float> from_full;
|
||||
learner->Predict(p_full, false, &from_full, 0, 0);
|
||||
learner.reset(Learner::Create({}));
|
||||
learner->LoadModel(model);
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
learner->Configure();
|
||||
|
||||
HostDeviceVector<float> from_hist;
|
||||
learner->Predict(p_hist, false, &from_hist, 0, 0);
|
||||
HostDeviceVector<float> from_full;
|
||||
learner->Predict(p_full, false, &from_full, 0, 0);
|
||||
|
||||
for (size_t i = 0; i < rows; ++i) {
|
||||
EXPECT_NEAR(from_hist.ConstHostVector()[i],
|
||||
from_full.ConstHostVector()[i], kRtEps);
|
||||
}
|
||||
};
|
||||
HostDeviceVector<float> from_hist;
|
||||
learner->Predict(p_hist, false, &from_hist, 0, 0);
|
||||
|
||||
if (tree_method == "gpu_hist") {
|
||||
train("gpu_predictor");
|
||||
} else {
|
||||
train("cpu_predictor");
|
||||
for (size_t i = 0; i < rows; ++i) {
|
||||
EXPECT_NEAR(from_hist.ConstHostVector()[i], from_full.ConstHostVector()[i], kRtEps);
|
||||
}
|
||||
}
|
||||
|
||||
void TestInplacePrediction(std::shared_ptr<DMatrix> x, std::string predictor, bst_row_t rows,
|
||||
bst_feature_t cols, int32_t device) {
|
||||
size_t constexpr kClasses { 4 };
|
||||
auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(device);
|
||||
void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,
|
||||
bst_feature_t cols) {
|
||||
std::size_t constexpr kClasses { 4 };
|
||||
auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(ctx->gpu_id);
|
||||
std::shared_ptr<DMatrix> m = gen.GenerateDMatrix(true, false, kClasses);
|
||||
|
||||
std::unique_ptr<Learner> learner {
|
||||
@@ -113,12 +104,14 @@ void TestInplacePrediction(std::shared_ptr<DMatrix> x, std::string predictor, bs
|
||||
learner->SetParam("num_class", std::to_string(kClasses));
|
||||
learner->SetParam("seed", "0");
|
||||
learner->SetParam("subsample", "0.5");
|
||||
learner->SetParam("gpu_id", std::to_string(device));
|
||||
learner->SetParam("predictor", predictor);
|
||||
learner->SetParam("tree_method", "hist");
|
||||
for (int32_t it = 0; it < 4; ++it) {
|
||||
learner->UpdateOneIter(it, m);
|
||||
}
|
||||
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
learner->Configure();
|
||||
|
||||
HostDeviceVector<float> *p_out_predictions_0{nullptr};
|
||||
learner->InplacePredict(x, PredictionType::kMargin, std::numeric_limits<float>::quiet_NaN(),
|
||||
&p_out_predictions_0, 0, 2);
|
||||
@@ -149,67 +142,37 @@ void TestInplacePrediction(std::shared_ptr<DMatrix> x, std::string predictor, bs
|
||||
ASSERT_NEAR(h_pred[i], h_pred_0[i] + h_pred_1[i] - 0.5f, kRtEps);
|
||||
}
|
||||
|
||||
learner->SetParam("gpu_id", "-1");
|
||||
learner->SetParam("device", "cpu");
|
||||
learner->Configure();
|
||||
}
|
||||
|
||||
namespace {
|
||||
std::unique_ptr<Learner> LearnerForTest(std::shared_ptr<DMatrix> dmat, size_t iters,
|
||||
size_t forest = 1) {
|
||||
std::unique_ptr<Learner> LearnerForTest(Context const *ctx, std::shared_ptr<DMatrix> dmat,
|
||||
size_t iters, size_t forest = 1) {
|
||||
std::unique_ptr<Learner> learner{Learner::Create({dmat})};
|
||||
learner->SetParams(Args{{"num_parallel_tree", std::to_string(forest)}});
|
||||
learner->SetParams(
|
||||
Args{{"num_parallel_tree", std::to_string(forest)}, {"device", ctx->DeviceName()}});
|
||||
for (size_t i = 0; i < iters; ++i) {
|
||||
learner->UpdateOneIter(i, dmat);
|
||||
}
|
||||
|
||||
return learner;
|
||||
}
|
||||
|
||||
void VerifyPredictionWithLesserFeatures(Learner *learner, std::string const &predictor_name,
|
||||
size_t rows, std::shared_ptr<DMatrix> const &m_test,
|
||||
std::shared_ptr<DMatrix> const &m_invalid) {
|
||||
void VerifyPredictionWithLesserFeatures(Learner *learner, bst_row_t kRows,
|
||||
std::shared_ptr<DMatrix> m_test,
|
||||
std::shared_ptr<DMatrix> m_invalid) {
|
||||
HostDeviceVector<float> prediction;
|
||||
learner->SetParam("predictor", predictor_name);
|
||||
learner->Configure();
|
||||
Json config{Object()};
|
||||
learner->SaveConfig(&config);
|
||||
ASSERT_EQ(get<String>(config["learner"]["gradient_booster"]["gbtree_train_param"]["predictor"]),
|
||||
predictor_name);
|
||||
|
||||
learner->Predict(m_test, false, &prediction, 0, 0);
|
||||
ASSERT_EQ(prediction.Size(), rows);
|
||||
ASSERT_EQ(prediction.Size(), kRows);
|
||||
|
||||
ASSERT_THROW({ learner->Predict(m_invalid, false, &prediction, 0, 0); }, dmlc::Error);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
HostDeviceVector<float> from_cpu;
|
||||
learner->SetParam("predictor", "cpu_predictor");
|
||||
learner->Predict(m_test, false, &from_cpu, 0, 0);
|
||||
|
||||
HostDeviceVector<float> from_cuda;
|
||||
learner->SetParam("predictor", "gpu_predictor");
|
||||
learner->Predict(m_test, false, &from_cuda, 0, 0);
|
||||
|
||||
auto const &h_cpu = from_cpu.ConstHostVector();
|
||||
auto const &h_gpu = from_cuda.ConstHostVector();
|
||||
for (size_t i = 0; i < h_cpu.size(); ++i) {
|
||||
ASSERT_NEAR(h_cpu[i], h_gpu[i], kRtEps);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
void TestPredictionWithLesserFeatures(std::string predictor_name) {
|
||||
size_t constexpr kRows = 256, kTrainCols = 256, kTestCols = 4, kIters = 4;
|
||||
auto m_train = RandomDataGenerator(kRows, kTrainCols, 0.5).GenerateDMatrix(true);
|
||||
auto learner = LearnerForTest(m_train, kIters);
|
||||
auto m_test = RandomDataGenerator(kRows, kTestCols, 0.5).GenerateDMatrix(false);
|
||||
auto m_invalid = RandomDataGenerator(kRows, kTrainCols + 1, 0.5).GenerateDMatrix(false);
|
||||
VerifyPredictionWithLesserFeatures(learner.get(), predictor_name, kRows, m_test, m_invalid);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void VerifyPredictionWithLesserFeaturesColumnSplit(Learner *learner,
|
||||
std::string const &predictor_name, size_t rows,
|
||||
void VerifyPredictionWithLesserFeaturesColumnSplit(Learner *learner, size_t rows,
|
||||
std::shared_ptr<DMatrix> m_test,
|
||||
std::shared_ptr<DMatrix> m_invalid) {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
@@ -217,20 +180,65 @@ void VerifyPredictionWithLesserFeaturesColumnSplit(Learner *learner,
|
||||
std::shared_ptr<DMatrix> sliced_test{m_test->SliceCol(world_size, rank)};
|
||||
std::shared_ptr<DMatrix> sliced_invalid{m_invalid->SliceCol(world_size, rank)};
|
||||
|
||||
VerifyPredictionWithLesserFeatures(learner, predictor_name, rows, sliced_test, sliced_invalid);
|
||||
VerifyPredictionWithLesserFeatures(learner, rows, sliced_test, sliced_invalid);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
void TestPredictionWithLesserFeaturesColumnSplit(std::string predictor_name) {
|
||||
void TestPredictionWithLesserFeatures(Context const *ctx) {
|
||||
size_t constexpr kRows = 256, kTrainCols = 256, kTestCols = 4, kIters = 4;
|
||||
auto m_train = RandomDataGenerator(kRows, kTrainCols, 0.5).GenerateDMatrix(true);
|
||||
auto learner = LearnerForTest(m_train, kIters);
|
||||
auto learner = LearnerForTest(ctx, m_train, kIters);
|
||||
auto m_test = RandomDataGenerator(kRows, kTestCols, 0.5).GenerateDMatrix(false);
|
||||
auto m_invalid = RandomDataGenerator(kRows, kTrainCols + 1, 0.5).GenerateDMatrix(false);
|
||||
VerifyPredictionWithLesserFeatures(learner.get(), kRows, m_test, m_invalid);
|
||||
}
|
||||
|
||||
void TestPredictionDeviceAccess() {
|
||||
Context ctx;
|
||||
size_t constexpr kRows = 256, kTrainCols = 256, kTestCols = 4, kIters = 4;
|
||||
auto m_train = RandomDataGenerator(kRows, kTrainCols, 0.5).GenerateDMatrix(true);
|
||||
auto m_test = RandomDataGenerator(kRows, kTestCols, 0.5).GenerateDMatrix(false);
|
||||
auto learner = LearnerForTest(&ctx, m_train, kIters);
|
||||
|
||||
HostDeviceVector<float> from_cpu;
|
||||
{
|
||||
ASSERT_EQ(from_cpu.DeviceIdx(), Context::kCpuId);
|
||||
Context cpu_ctx;
|
||||
learner->SetParam("device", cpu_ctx.DeviceName());
|
||||
learner->Predict(m_test, false, &from_cpu, 0, 0);
|
||||
ASSERT_TRUE(from_cpu.HostCanWrite());
|
||||
ASSERT_FALSE(from_cpu.DeviceCanRead());
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
HostDeviceVector<float> from_cuda;
|
||||
{
|
||||
Context cuda_ctx = MakeCUDACtx(0);
|
||||
learner->SetParam("device", cuda_ctx.DeviceName());
|
||||
learner->Predict(m_test, false, &from_cuda, 0, 0);
|
||||
ASSERT_EQ(from_cuda.DeviceIdx(), 0);
|
||||
ASSERT_TRUE(from_cuda.DeviceCanWrite());
|
||||
ASSERT_FALSE(from_cuda.HostCanRead());
|
||||
}
|
||||
|
||||
auto const &h_cpu = from_cpu.ConstHostVector();
|
||||
auto const &h_gpu = from_cuda.ConstHostVector();
|
||||
for (size_t i = 0; i < h_cpu.size(); ++i) {
|
||||
ASSERT_NEAR(h_cpu[i], h_gpu[i], kRtEps);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
void TestPredictionWithLesserFeaturesColumnSplit(Context const *ctx) {
|
||||
size_t constexpr kRows = 256, kTrainCols = 256, kTestCols = 4, kIters = 4;
|
||||
auto m_train = RandomDataGenerator(kRows, kTrainCols, 0.5).GenerateDMatrix(true);
|
||||
auto learner = LearnerForTest(ctx, m_train, kIters);
|
||||
auto m_test = RandomDataGenerator(kRows, kTestCols, 0.5).GenerateDMatrix(false);
|
||||
auto m_invalid = RandomDataGenerator(kRows, kTrainCols + 1, 0.5).GenerateDMatrix(false);
|
||||
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, VerifyPredictionWithLesserFeaturesColumnSplit,
|
||||
learner.get(), predictor_name, kRows, m_test, m_invalid);
|
||||
learner.get(), kRows, m_test, m_invalid);
|
||||
}
|
||||
|
||||
void GBTreeModelForTest(gbm::GBTreeModel *model, uint32_t split_ind,
|
||||
@@ -252,7 +260,7 @@ void GBTreeModelForTest(gbm::GBTreeModel *model, uint32_t split_ind,
|
||||
model->CommitModelGroup(std::move(trees), 0);
|
||||
}
|
||||
|
||||
void TestCategoricalPrediction(std::string name, bool is_column_split) {
|
||||
void TestCategoricalPrediction(Context const* ctx, bool is_column_split) {
|
||||
size_t constexpr kCols = 10;
|
||||
PredictionCacheEntry out_predictions;
|
||||
|
||||
@@ -262,13 +270,10 @@ void TestCategoricalPrediction(std::string name, bool is_column_split) {
|
||||
float left_weight = 1.3f;
|
||||
float right_weight = 1.7f;
|
||||
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
gbm::GBTreeModel model(&mparam, ctx);
|
||||
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
|
||||
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
std::unique_ptr<Predictor> predictor{Predictor::Create(name.c_str(), &ctx)};
|
||||
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(ctx)};
|
||||
|
||||
std::vector<float> row(kCols);
|
||||
row[split_ind] = split_cat;
|
||||
@@ -298,12 +303,12 @@ void TestCategoricalPrediction(std::string name, bool is_column_split) {
|
||||
ASSERT_EQ(out_predictions.predictions.HostVector()[0], left_weight + score);
|
||||
}
|
||||
|
||||
void TestCategoricalPredictionColumnSplit(std::string name) {
|
||||
void TestCategoricalPredictionColumnSplit(Context const *ctx) {
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPrediction, name, true);
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPrediction, ctx, true);
|
||||
}
|
||||
|
||||
void TestCategoricalPredictLeaf(StringView name, bool is_column_split) {
|
||||
void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split) {
|
||||
size_t constexpr kCols = 10;
|
||||
PredictionCacheEntry out_predictions;
|
||||
|
||||
@@ -314,14 +319,10 @@ void TestCategoricalPredictLeaf(StringView name, bool is_column_split) {
|
||||
float left_weight = 1.3f;
|
||||
float right_weight = 1.7f;
|
||||
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
gbm::GBTreeModel model(&mparam, ctx);
|
||||
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
|
||||
|
||||
ctx.gpu_id = 0;
|
||||
std::unique_ptr<Predictor> predictor{Predictor::Create(name.c_str(), &ctx)};
|
||||
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(ctx)};
|
||||
|
||||
std::vector<float> row(kCols);
|
||||
row[split_ind] = split_cat;
|
||||
@@ -346,19 +347,21 @@ void TestCategoricalPredictLeaf(StringView name, bool is_column_split) {
|
||||
ASSERT_EQ(out_predictions.predictions.HostVector()[0], 1);
|
||||
}
|
||||
|
||||
void TestCategoricalPredictLeafColumnSplit(StringView name) {
|
||||
void TestCategoricalPredictLeafColumnSplit(Context const *ctx) {
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, name, true);
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, ctx, true);
|
||||
}
|
||||
|
||||
void TestIterationRange(std::string name) {
|
||||
void TestIterationRange(Context const* ctx) {
|
||||
size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10;
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(true, true, kClasses);
|
||||
auto learner = LearnerForTest(dmat, kIters, kForest);
|
||||
learner->SetParams(Args{{"predictor", name}});
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0)
|
||||
.Device(ctx->gpu_id)
|
||||
.GenerateDMatrix(true, true, kClasses);
|
||||
auto learner = LearnerForTest(ctx, dmat, kIters, kForest);
|
||||
|
||||
bool bound = false;
|
||||
std::unique_ptr<Learner> sliced {learner->Slice(0, 3, 1, &bound)};
|
||||
bst_layer_t lend{3};
|
||||
std::unique_ptr<Learner> sliced{learner->Slice(0, lend, 1, &bound)};
|
||||
ASSERT_FALSE(bound);
|
||||
|
||||
HostDeviceVector<float> out_predt_sliced;
|
||||
@@ -366,11 +369,8 @@ void TestIterationRange(std::string name) {
|
||||
|
||||
// margin
|
||||
{
|
||||
sliced->Predict(dmat, true, &out_predt_sliced, 0, 0, false, false, false,
|
||||
false, false);
|
||||
|
||||
learner->Predict(dmat, true, &out_predt_ranged, 0, 3, false, false, false,
|
||||
false, false);
|
||||
sliced->Predict(dmat, true, &out_predt_sliced, 0, 0, false, false, false, false, false);
|
||||
learner->Predict(dmat, true, &out_predt_ranged, 0, lend, false, false, false, false, false);
|
||||
|
||||
auto const &h_sliced = out_predt_sliced.HostVector();
|
||||
auto const &h_range = out_predt_ranged.HostVector();
|
||||
@@ -380,11 +380,8 @@ void TestIterationRange(std::string name) {
|
||||
|
||||
// SHAP
|
||||
{
|
||||
sliced->Predict(dmat, false, &out_predt_sliced, 0, 0, false, false,
|
||||
true, false, false);
|
||||
|
||||
learner->Predict(dmat, false, &out_predt_ranged, 0, 3, false, false, true,
|
||||
false, false);
|
||||
sliced->Predict(dmat, false, &out_predt_sliced, 0, 0, false, false, true, false, false);
|
||||
learner->Predict(dmat, false, &out_predt_ranged, 0, lend, false, false, true, false, false);
|
||||
|
||||
auto const &h_sliced = out_predt_sliced.HostVector();
|
||||
auto const &h_range = out_predt_ranged.HostVector();
|
||||
@@ -394,10 +391,8 @@ void TestIterationRange(std::string name) {
|
||||
|
||||
// SHAP interaction
|
||||
{
|
||||
sliced->Predict(dmat, false, &out_predt_sliced, 0, 0, false, false,
|
||||
false, false, true);
|
||||
learner->Predict(dmat, false, &out_predt_ranged, 0, 3, false, false, false,
|
||||
false, true);
|
||||
sliced->Predict(dmat, false, &out_predt_sliced, 0, 0, false, false, false, false, true);
|
||||
learner->Predict(dmat, false, &out_predt_ranged, 0, lend, false, false, false, false, true);
|
||||
auto const &h_sliced = out_predt_sliced.HostVector();
|
||||
auto const &h_range = out_predt_ranged.HostVector();
|
||||
ASSERT_EQ(h_sliced.size(), h_range.size());
|
||||
@@ -406,10 +401,8 @@ void TestIterationRange(std::string name) {
|
||||
|
||||
// Leaf
|
||||
{
|
||||
sliced->Predict(dmat, false, &out_predt_sliced, 0, 0, false, true,
|
||||
false, false, false);
|
||||
learner->Predict(dmat, false, &out_predt_ranged, 0, 3, false, true, false,
|
||||
false, false);
|
||||
sliced->Predict(dmat, false, &out_predt_sliced, 0, 0, false, true, false, false, false);
|
||||
learner->Predict(dmat, false, &out_predt_ranged, 0, lend, false, true, false, false, false);
|
||||
auto const &h_sliced = out_predt_sliced.HostVector();
|
||||
auto const &h_range = out_predt_ranged.HostVector();
|
||||
ASSERT_EQ(h_sliced.size(), h_range.size());
|
||||
@@ -456,11 +449,12 @@ void VerifyIterationRangeColumnSplit(DMatrix *dmat, Learner *learner, Learner *s
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
void TestIterationRangeColumnSplit(std::string name) {
|
||||
void TestIterationRangeColumnSplit(Context const* ctx) {
|
||||
size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10;
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(true, true, kClasses);
|
||||
auto learner = LearnerForTest(dmat, kIters, kForest);
|
||||
learner->SetParams(Args{{"predictor", name}});
|
||||
auto learner = LearnerForTest(ctx, dmat, kIters, kForest);
|
||||
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
|
||||
bool bound = false;
|
||||
std::unique_ptr<Learner> sliced{learner->Slice(0, 3, 1, &bound)};
|
||||
@@ -488,10 +482,10 @@ void TestIterationRangeColumnSplit(std::string name) {
|
||||
leaf_ranged, leaf_sliced);
|
||||
}
|
||||
|
||||
void TestSparsePrediction(float sparsity, std::string predictor) {
|
||||
void TestSparsePrediction(Context const *ctx, float sparsity) {
|
||||
size_t constexpr kRows = 512, kCols = 128, kIters = 4;
|
||||
auto Xy = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix(true);
|
||||
auto learner = LearnerForTest(Xy, kIters);
|
||||
auto learner = LearnerForTest(ctx, Xy, kIters);
|
||||
|
||||
HostDeviceVector<float> sparse_predt;
|
||||
|
||||
@@ -501,11 +495,14 @@ void TestSparsePrediction(float sparsity, std::string predictor) {
|
||||
learner.reset(Learner::Create({Xy}));
|
||||
learner->LoadModel(model);
|
||||
|
||||
learner->SetParam("predictor", predictor);
|
||||
if (ctx->IsCUDA()) {
|
||||
learner->SetParam("tree_method", "gpu_hist");
|
||||
learner->SetParam("gpu_id", std::to_string(ctx->gpu_id));
|
||||
}
|
||||
learner->Predict(Xy, false, &sparse_predt, 0, 0);
|
||||
|
||||
HostDeviceVector<float> with_nan(kRows * kCols, std::numeric_limits<float>::quiet_NaN());
|
||||
auto& h_with_nan = with_nan.HostVector();
|
||||
auto &h_with_nan = with_nan.HostVector();
|
||||
for (auto const &page : Xy->GetBatches<SparsePage>()) {
|
||||
auto batch = page.GetView();
|
||||
for (size_t i = 0; i < batch.Size(); ++i) {
|
||||
@@ -516,7 +513,8 @@ void TestSparsePrediction(float sparsity, std::string predictor) {
|
||||
}
|
||||
}
|
||||
|
||||
learner->SetParam("predictor", "cpu_predictor");
|
||||
learner->SetParam("tree_method", "hist");
|
||||
learner->SetParam("gpu_id", "-1");
|
||||
// Xcode_12.4 doesn't compile with `std::make_shared`.
|
||||
auto dense = std::shared_ptr<DMatrix>(new data::DMatrixProxy{});
|
||||
auto array_interface = GetArrayInterface(&with_nan, kRows, kCols);
|
||||
@@ -527,8 +525,8 @@ void TestSparsePrediction(float sparsity, std::string predictor) {
|
||||
learner->InplacePredict(dense, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
|
||||
&p_dense_predt, 0, 0);
|
||||
|
||||
auto const& dense_predt = *p_dense_predt;
|
||||
if (predictor == "cpu_predictor") {
|
||||
auto const &dense_predt = *p_dense_predt;
|
||||
if (ctx->IsCPU()) {
|
||||
ASSERT_EQ(dense_predt.HostVector(), sparse_predt.HostVector());
|
||||
} else {
|
||||
auto const &h_dense = dense_predt.HostVector();
|
||||
@@ -556,10 +554,10 @@ void VerifySparsePredictionColumnSplit(DMatrix *dmat, Learner *learner,
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
void TestSparsePredictionColumnSplit(float sparsity, std::string predictor) {
|
||||
void TestSparsePredictionColumnSplit(Context const* ctx, float sparsity) {
|
||||
size_t constexpr kRows = 512, kCols = 128, kIters = 4;
|
||||
auto Xy = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix(true);
|
||||
auto learner = LearnerForTest(Xy, kIters);
|
||||
auto learner = LearnerForTest(ctx, Xy, kIters);
|
||||
|
||||
HostDeviceVector<float> sparse_predt;
|
||||
|
||||
@@ -569,7 +567,7 @@ void TestSparsePredictionColumnSplit(float sparsity, std::string predictor) {
|
||||
learner.reset(Learner::Create({Xy}));
|
||||
learner->LoadModel(model);
|
||||
|
||||
learner->SetParam("predictor", predictor);
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
learner->Predict(Xy, false, &sparse_predt, 0, 0);
|
||||
|
||||
auto constexpr kWorldSize = 2;
|
||||
|
||||
@@ -31,8 +31,17 @@ inline gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, Context
|
||||
return model;
|
||||
}
|
||||
|
||||
inline auto CreatePredictorForTest(Context const* ctx) {
|
||||
if (ctx->IsCPU()) {
|
||||
return Predictor::Create("cpu_predictor", ctx);
|
||||
} else {
|
||||
return Predictor::Create("gpu_predictor", ctx);
|
||||
}
|
||||
}
|
||||
|
||||
// fixme: cpu test
|
||||
template <typename Page>
|
||||
void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
|
||||
void TestPredictionFromGradientIndex(Context const* ctx, size_t rows, size_t cols,
|
||||
std::shared_ptr<DMatrix> p_hist) {
|
||||
constexpr size_t kClasses { 3 };
|
||||
|
||||
@@ -40,12 +49,10 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
|
||||
auto cuda_ctx = MakeCUDACtx(0);
|
||||
|
||||
std::unique_ptr<Predictor> predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create(name, &cuda_ctx));
|
||||
std::unique_ptr<Predictor>(CreatePredictorForTest(&cuda_ctx));
|
||||
predictor->Configure({});
|
||||
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, kClasses);
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, ctx, kClasses);
|
||||
|
||||
{
|
||||
auto p_precise = RandomDataGenerator(rows, cols, 0).GenerateDMatrix();
|
||||
@@ -77,32 +84,33 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
|
||||
}
|
||||
|
||||
// p_full and p_hist should come from the same data set.
|
||||
void TestTrainingPrediction(size_t rows, size_t bins, std::string tree_method,
|
||||
std::shared_ptr<DMatrix> p_full,
|
||||
std::shared_ptr<DMatrix> p_hist);
|
||||
void TestTrainingPrediction(Context const* ctx, size_t rows, size_t bins,
|
||||
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist);
|
||||
|
||||
void TestInplacePrediction(std::shared_ptr<DMatrix> x, std::string predictor, bst_row_t rows,
|
||||
bst_feature_t cols, int32_t device = -1);
|
||||
void TestInplacePrediction(Context const* ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,
|
||||
bst_feature_t cols);
|
||||
|
||||
void TestPredictionWithLesserFeatures(std::string preditor_name);
|
||||
void TestPredictionWithLesserFeatures(Context const* ctx);
|
||||
|
||||
void TestPredictionWithLesserFeaturesColumnSplit(std::string preditor_name);
|
||||
void TestPredictionDeviceAccess();
|
||||
|
||||
void TestCategoricalPrediction(std::string name, bool is_column_split = false);
|
||||
void TestCategoricalPrediction(Context const* ctx, bool is_column_split);
|
||||
|
||||
void TestCategoricalPredictionColumnSplit(std::string name);
|
||||
void TestCategoricalPredictionColumnSplit(Context const* ctx);
|
||||
|
||||
void TestCategoricalPredictLeaf(StringView name, bool is_column_split = false);
|
||||
void TestPredictionWithLesserFeaturesColumnSplit(Context const* ctx);
|
||||
|
||||
void TestCategoricalPredictLeafColumnSplit(StringView name);
|
||||
void TestCategoricalPredictLeaf(Context const* ctx, bool is_column_split);
|
||||
|
||||
void TestIterationRange(std::string name);
|
||||
void TestCategoricalPredictLeafColumnSplit(Context const* ctx);
|
||||
|
||||
void TestIterationRangeColumnSplit(std::string name);
|
||||
void TestIterationRange(Context const* ctx);
|
||||
|
||||
void TestSparsePrediction(float sparsity, std::string predictor);
|
||||
void TestIterationRangeColumnSplit(Context const* ctx);
|
||||
|
||||
void TestSparsePredictionColumnSplit(float sparsity, std::string predictor);
|
||||
void TestSparsePrediction(Context const* ctx, float sparsity);
|
||||
|
||||
void TestSparsePredictionColumnSplit(Context const* ctx, float sparsity);
|
||||
|
||||
void TestVectorLeafPrediction(Context const* ctx);
|
||||
} // namespace xgboost
|
||||
|
||||
31
tests/cpp/test_context.cc
Normal file
31
tests/cpp/test_context.cc
Normal file
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/context.h>
|
||||
|
||||
namespace xgboost {
|
||||
TEST(Context, CPU) {
|
||||
Context ctx;
|
||||
ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());
|
||||
ASSERT_EQ(ctx.Ordinal(), Context::kCpuId);
|
||||
|
||||
std::int32_t flag{0};
|
||||
ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });
|
||||
ASSERT_EQ(flag, -1);
|
||||
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "oops"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "-1"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "CPU"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "CUDA"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "CPU:0"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "gpu:+0"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "gpu:0-"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "gpu:"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":gpu"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":0"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ""}}), dmlc::Error);
|
||||
}
|
||||
} // namespace xgboost
|
||||
99
tests/cpp/test_context.cu
Normal file
99
tests/cpp/test_context.cu
Normal file
@@ -0,0 +1,99 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for Args
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/json.h> // for FromJson, ToJson
|
||||
|
||||
#include <string> // for string, to_string
|
||||
|
||||
#include "../../src/common/common.h" // for AllVisibleGPUs
|
||||
|
||||
namespace xgboost {
|
||||
namespace {
|
||||
void TestCUDA(Context const& ctx, bst_d_ordinal_t ord) {
|
||||
ASSERT_EQ(ctx.gpu_id, ord);
|
||||
ASSERT_EQ(ctx.Device().ordinal, ord);
|
||||
ASSERT_EQ(ctx.DeviceName(), "cuda:" + std::to_string(ord));
|
||||
ASSERT_EQ(ctx.Ordinal(), ord);
|
||||
ASSERT_TRUE(ctx.IsCUDA());
|
||||
ASSERT_FALSE(ctx.IsCPU());
|
||||
ASSERT_EQ(ctx.Device(), DeviceOrd::CUDA(ord));
|
||||
|
||||
Json jctx{ToJson(ctx)};
|
||||
Context new_ctx;
|
||||
FromJson(jctx, &new_ctx);
|
||||
ASSERT_EQ(new_ctx.Device(), ctx.Device());
|
||||
ASSERT_EQ(new_ctx.gpu_id, ctx.gpu_id);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(Context, DeviceOrdinal) {
|
||||
Context ctx;
|
||||
auto n_vis = common::AllVisibleGPUs();
|
||||
auto ord = n_vis - 1;
|
||||
|
||||
std::string device = "cuda:" + std::to_string(ord);
|
||||
ctx.UpdateAllowUnknown(Args{{"device", device}});
|
||||
TestCUDA(ctx, ord);
|
||||
|
||||
device = "cuda:" + std::to_string(1001);
|
||||
ctx.UpdateAllowUnknown(Args{{"device", device}});
|
||||
ord = 1001 % n_vis;
|
||||
|
||||
TestCUDA(ctx, ord);
|
||||
|
||||
std::int32_t flag{0};
|
||||
ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });
|
||||
ASSERT_EQ(flag, 1);
|
||||
|
||||
Context new_ctx = ctx;
|
||||
TestCUDA(new_ctx, ctx.Ordinal());
|
||||
|
||||
auto cpu_ctx = ctx.MakeCPU();
|
||||
ASSERT_TRUE(cpu_ctx.IsCPU());
|
||||
ASSERT_EQ(cpu_ctx.Ordinal(), Context::kCpuId);
|
||||
ASSERT_EQ(cpu_ctx.Device(), DeviceOrd::CPU());
|
||||
|
||||
auto cuda_ctx = cpu_ctx.MakeCUDA(ctx.Ordinal());
|
||||
TestCUDA(cuda_ctx, ctx.Ordinal());
|
||||
|
||||
cuda_ctx.UpdateAllowUnknown(Args{{"fail_on_invalid_gpu_id", "true"}});
|
||||
ASSERT_THROW({ cuda_ctx.UpdateAllowUnknown(Args{{"device", "cuda:9999"}}); }, dmlc::Error);
|
||||
cuda_ctx.UpdateAllowUnknown(Args{{"device", "cuda:00"}});
|
||||
ASSERT_EQ(cuda_ctx.Ordinal(), 0);
|
||||
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "cpu"}});
|
||||
// Test alias
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "gpu:0"}});
|
||||
TestCUDA(ctx, 0);
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "gpu"}});
|
||||
TestCUDA(ctx, 0);
|
||||
|
||||
// Test the thread local memory in dmlc is not linking different instances together.
|
||||
cpu_ctx.UpdateAllowUnknown(Args{{"device", "cpu"}});
|
||||
TestCUDA(ctx, 0);
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
TestCUDA(ctx, 0);
|
||||
}
|
||||
|
||||
TEST(Context, GPUId) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
TestCUDA(ctx, 0);
|
||||
|
||||
auto n_vis = common::AllVisibleGPUs();
|
||||
auto ord = n_vis - 1;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", std::to_string(ord)}});
|
||||
TestCUDA(ctx, ord);
|
||||
|
||||
auto device = "cuda:" + std::to_string(1001);
|
||||
ctx.UpdateAllowUnknown(Args{{"device", device}});
|
||||
ord = 1001 % n_vis;
|
||||
TestCUDA(ctx, ord);
|
||||
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "-1"}});
|
||||
ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());
|
||||
}
|
||||
} // namespace xgboost
|
||||
@@ -27,7 +27,6 @@
|
||||
#include "../../src/common/io.h" // for LoadSequentialFile
|
||||
#include "../../src/common/linalg_op.h" // for ElementWiseTransformHost, begin, end
|
||||
#include "../../src/common/random.h" // for GlobalRandom
|
||||
#include "../../src/common/transform_iterator.h" // for IndexTransformIter
|
||||
#include "dmlc/io.h" // for Stream
|
||||
#include "dmlc/omp.h" // for omp_get_max_threads
|
||||
#include "dmlc/registry.h" // for Registry
|
||||
@@ -35,14 +34,13 @@
|
||||
#include "helpers.h" // for GetBaseScore, RandomDataGenerator
|
||||
#include "objective_helpers.h" // for MakeObjNamesForTest, ObjTestNameGenerator
|
||||
#include "xgboost/base.h" // for bst_float, Args, bst_feature_t, bst_int
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/context.h" // for Context, DeviceOrd
|
||||
#include "xgboost/data.h" // for DMatrix, MetaInfo, DataType
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/json.h" // for Json, Object, get, String, IsA, opera...
|
||||
#include "xgboost/linalg.h" // for Tensor, TensorView
|
||||
#include "xgboost/logging.h" // for ConsoleLogger
|
||||
#include "xgboost/predictor.h" // for PredictionCacheEntry
|
||||
#include "xgboost/span.h" // for Span, operator!=, SpanIterator
|
||||
#include "xgboost/string_view.h" // for StringView
|
||||
|
||||
namespace xgboost {
|
||||
@@ -58,9 +56,9 @@ TEST(Learner, Basic) {
|
||||
auto minor = XGBOOST_VER_MINOR;
|
||||
auto patch = XGBOOST_VER_PATCH;
|
||||
|
||||
static_assert(std::is_integral<decltype(major)>::value, "Wrong major version type");
|
||||
static_assert(std::is_integral<decltype(minor)>::value, "Wrong minor version type");
|
||||
static_assert(std::is_integral<decltype(patch)>::value, "Wrong patch version type");
|
||||
static_assert(std::is_integral_v<decltype(major)>, "Wrong major version type");
|
||||
static_assert(std::is_integral_v<decltype(minor)>, "Wrong minor version type");
|
||||
static_assert(std::is_integral_v<decltype(patch)>, "Wrong patch version type");
|
||||
}
|
||||
|
||||
TEST(Learner, ParameterValidation) {
|
||||
@@ -92,10 +90,9 @@ TEST(Learner, CheckGroup) {
|
||||
size_t constexpr kNumRows = 17;
|
||||
bst_feature_t constexpr kNumCols = 15;
|
||||
|
||||
std::shared_ptr<DMatrix> p_mat{
|
||||
RandomDataGenerator{kNumRows, kNumCols, 0.0f}.GenerateDMatrix()};
|
||||
std::shared_ptr<DMatrix> p_mat{RandomDataGenerator{kNumRows, kNumCols, 0.0f}.GenerateDMatrix()};
|
||||
std::vector<bst_float> weight(kNumGroups, 1);
|
||||
std::vector<bst_int> group(kNumGroups);
|
||||
std::vector<bst_group_t> group(kNumGroups);
|
||||
group[0] = 2;
|
||||
group[1] = 3;
|
||||
group[2] = 7;
|
||||
@@ -218,6 +215,34 @@ TEST(Learner, JsonModelIO) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Learner, ConfigIO) {
|
||||
bst_row_t n_samples = 128;
|
||||
bst_feature_t n_features = 12;
|
||||
std::shared_ptr<DMatrix> p_fmat{
|
||||
RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true, false, 2)};
|
||||
|
||||
auto serialised_model_tmp = std::string{};
|
||||
std::string eval_res_0;
|
||||
std::string eval_res_1;
|
||||
{
|
||||
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||
learner->SetParams(Args{{"eval_metric", "ndcg"}, {"eval_metric", "map"}});
|
||||
learner->Configure();
|
||||
learner->UpdateOneIter(0, p_fmat);
|
||||
eval_res_0 = learner->EvalOneIter(0, {p_fmat}, {"Train"});
|
||||
common::MemoryBufferStream fo(&serialised_model_tmp);
|
||||
learner->Save(&fo);
|
||||
}
|
||||
|
||||
{
|
||||
common::MemoryBufferStream fi(&serialised_model_tmp);
|
||||
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||
learner->Load(&fi);
|
||||
eval_res_1 = learner->EvalOneIter(0, {p_fmat}, {"Train"});
|
||||
}
|
||||
ASSERT_EQ(eval_res_0, eval_res_1);
|
||||
}
|
||||
|
||||
// Crashes the test runner if there are race condiditions.
|
||||
//
|
||||
// Build with additional cmake flags to enable thread sanitizer
|
||||
@@ -312,45 +337,36 @@ TEST(Learner, GPUConfiguration) {
|
||||
learner->SetParams({Arg{"booster", "gblinear"},
|
||||
Arg{"updater", "gpu_coord_descent"}});
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
|
||||
}
|
||||
{
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
std::unique_ptr<Learner> learner{Learner::Create(mat)};
|
||||
learner->SetParams({Arg{"tree_method", "gpu_hist"}});
|
||||
learner->Configure();
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
|
||||
}
|
||||
{
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->SetParams({Arg{"tree_method", "gpu_hist"},
|
||||
Arg{"gpu_id", "-1"}});
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
|
||||
}
|
||||
{
|
||||
// with CPU algorithm
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->SetParams({Arg{"tree_method", "hist"}});
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, -1);
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CPU());
|
||||
}
|
||||
{
|
||||
// with CPU algorithm, but `gpu_id` takes priority
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->SetParams({Arg{"tree_method", "hist"},
|
||||
Arg{"gpu_id", "0"}});
|
||||
learner->SetParams({Arg{"tree_method", "hist"}, Arg{"gpu_id", "0"}});
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
|
||||
}
|
||||
{
|
||||
// With CPU algorithm but GPU Predictor, this is to simulate when
|
||||
// XGBoost is only used for prediction, so tree method is not
|
||||
// specified.
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->SetParams({Arg{"tree_method", "hist"},
|
||||
Arg{"predictor", "gpu_predictor"}});
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
|
||||
}
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
@@ -379,6 +395,8 @@ TEST(Learner, Seed) {
|
||||
TEST(Learner, ConstantSeed) {
|
||||
auto m = RandomDataGenerator{10, 10, 0}.GenerateDMatrix(true);
|
||||
std::unique_ptr<Learner> learner{Learner::Create({m})};
|
||||
// Use exact as it doesn't initialize column sampler at construction, which alters the rng.
|
||||
learner->SetParam("tree_method", "exact");
|
||||
learner->Configure(); // seed the global random
|
||||
|
||||
std::uniform_real_distribution<float> dist;
|
||||
@@ -637,33 +655,11 @@ TEST_F(InitBaseScore, InitWithPredict) { this->TestInitWithPredt(); }
|
||||
TEST_F(InitBaseScore, UpdateProcess) { this->TestUpdateProcess(); }
|
||||
|
||||
class TestColumnSplit : public ::testing::TestWithParam<std::string> {
|
||||
static auto MakeFmat(std::string const& obj) {
|
||||
auto constexpr kRows = 10, kCols = 10;
|
||||
auto p_fmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
|
||||
auto& h_upper = p_fmat->Info().labels_upper_bound_.HostVector();
|
||||
auto& h_lower = p_fmat->Info().labels_lower_bound_.HostVector();
|
||||
h_lower.resize(kRows);
|
||||
h_upper.resize(kRows);
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
h_lower[i] = 1;
|
||||
h_upper[i] = 10;
|
||||
}
|
||||
if (obj.find("rank:") != std::string::npos) {
|
||||
auto h_label = p_fmat->Info().labels.HostView();
|
||||
std::size_t k = 0;
|
||||
for (auto& v : h_label) {
|
||||
v = k % 2 == 0;
|
||||
++k;
|
||||
}
|
||||
}
|
||||
return p_fmat;
|
||||
};
|
||||
|
||||
void TestBaseScore(std::string objective, float expected_base_score, Json expected_model) {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
|
||||
auto p_fmat = MakeFmat(objective);
|
||||
auto p_fmat = MakeFmatForObjTest(objective);
|
||||
std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};
|
||||
std::unique_ptr<Learner> learner{Learner::Create({sliced})};
|
||||
learner->SetParam("tree_method", "approx");
|
||||
@@ -687,7 +683,7 @@ class TestColumnSplit : public ::testing::TestWithParam<std::string> {
|
||||
|
||||
public:
|
||||
void Run(std::string objective) {
|
||||
auto p_fmat = MakeFmat(objective);
|
||||
auto p_fmat = MakeFmatForObjTest(objective);
|
||||
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||
learner->SetParam("tree_method", "approx");
|
||||
learner->SetParam("objective", objective);
|
||||
|
||||
@@ -698,10 +698,6 @@ TEST_F(MultiClassesSerializationTest, GpuHist) {
|
||||
{"seed", "0"},
|
||||
{"nthread", "1"},
|
||||
{"max_depth", std::to_string(kClasses)},
|
||||
// Somehow rebuilding the cache can generate slightly
|
||||
// different result (1e-7) with CPU predictor for some
|
||||
// entries.
|
||||
{"predictor", "gpu_predictor"},
|
||||
// Mitigate the difference caused by hardware fused multiply
|
||||
// add to tree weight during update prediction cache.
|
||||
{"learning_rate", "1.0"},
|
||||
|
||||
@@ -289,8 +289,6 @@ TEST(GpuHist, PartitionTwoNodes) {
|
||||
dh::ToSpan(feature_histogram_b)};
|
||||
thrust::device_vector<GPUExpandEntry> results(2);
|
||||
evaluator.EvaluateSplits({0, 1}, 1, dh::ToSpan(inputs), shared_inputs, dh::ToSpan(results));
|
||||
GPUExpandEntry result_a = results[0];
|
||||
GPUExpandEntry result_b = results[1];
|
||||
EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(0)[0]),
|
||||
std::bitset<32>("10000000000000000000000000000000"));
|
||||
EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(1)[0]),
|
||||
|
||||
@@ -43,7 +43,8 @@ void VerifySampling(size_t page_size,
|
||||
EXPECT_NE(page->n_rows, kRows);
|
||||
}
|
||||
|
||||
GradientBasedSampler sampler(&ctx, page, kRows, param, subsample, sampling_method);
|
||||
GradientBasedSampler sampler(&ctx, kRows, param, subsample, sampling_method,
|
||||
!fixed_size_sampling);
|
||||
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
|
||||
|
||||
if (fixed_size_sampling) {
|
||||
@@ -97,7 +98,7 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
EXPECT_NE(page->n_rows, kRows);
|
||||
|
||||
GradientBasedSampler sampler(&ctx, page, kRows, param, kSubsample, TrainParam::kUniform);
|
||||
GradientBasedSampler sampler(&ctx, kRows, param, kSubsample, TrainParam::kUniform, true);
|
||||
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
|
||||
auto sampled_page = sample.page;
|
||||
EXPECT_EQ(sample.sample_rows, kRows);
|
||||
@@ -145,7 +146,8 @@ TEST(GradientBasedSampler, GradientBasedSampling) {
|
||||
constexpr size_t kPageSize = 0;
|
||||
constexpr float kSubsample = 0.8;
|
||||
constexpr int kSamplingMethod = TrainParam::kGradientBased;
|
||||
VerifySampling(kPageSize, kSubsample, kSamplingMethod);
|
||||
constexpr bool kFixedSizeSampling = true;
|
||||
VerifySampling(kPageSize, kSubsample, kSamplingMethod, kFixedSizeSampling);
|
||||
}
|
||||
|
||||
TEST(GradientBasedSampler, GradientBasedSamplingExternalMemory) {
|
||||
|
||||
@@ -50,15 +50,9 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
quantiser);
|
||||
|
||||
std::vector<GradientPairInt64> histogram_h(num_bins);
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaMemcpy(histogram_h.data(), d_histogram.data(),
|
||||
num_bins * sizeof(GradientPairInt64),
|
||||
cudaMemcpyDeviceToHost));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipMemcpy(histogram_h.data(), d_histogram.data(),
|
||||
num_bins * sizeof(GradientPairInt64),
|
||||
hipMemcpyDeviceToHost));
|
||||
#endif
|
||||
|
||||
for (size_t i = 0; i < kRounds; ++i) {
|
||||
dh::device_vector<GradientPairInt64> new_histogram(num_bins);
|
||||
@@ -70,15 +64,9 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
d_new_histogram, quantiser);
|
||||
|
||||
std::vector<GradientPairInt64> new_histogram_h(num_bins);
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaMemcpy(new_histogram_h.data(), d_new_histogram.data(),
|
||||
num_bins * sizeof(GradientPairInt64),
|
||||
cudaMemcpyDeviceToHost));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipMemcpy(new_histogram_h.data(), d_new_histogram.data(),
|
||||
num_bins * sizeof(GradientPairInt64),
|
||||
hipMemcpyDeviceToHost));
|
||||
#endif
|
||||
for (size_t j = 0; j < new_histogram_h.size(); ++j) {
|
||||
ASSERT_EQ(new_histogram_h[j].GetQuantisedGrad(), histogram_h[j].GetQuantisedGrad());
|
||||
ASSERT_EQ(new_histogram_h[j].GetQuantisedHess(), histogram_h[j].GetQuantisedHess());
|
||||
@@ -98,15 +86,9 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
dh::ToSpan(baseline), quantiser);
|
||||
|
||||
std::vector<GradientPairInt64> baseline_h(num_bins);
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaMemcpy(baseline_h.data(), baseline.data().get(),
|
||||
num_bins * sizeof(GradientPairInt64),
|
||||
cudaMemcpyDeviceToHost));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipMemcpy(baseline_h.data(), baseline.data().get(),
|
||||
num_bins * sizeof(GradientPairInt64),
|
||||
hipMemcpyDeviceToHost));
|
||||
#endif
|
||||
|
||||
for (size_t i = 0; i < baseline.size(); ++i) {
|
||||
EXPECT_NEAR(baseline_h[i].GetQuantisedGrad(), histogram_h[i].GetQuantisedGrad(),
|
||||
|
||||
@@ -72,19 +72,13 @@ void TestSortPositionBatch(const std::vector<int>& ridx_in, const std::vector<Se
|
||||
total_rows += segments.at(i).Size();
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaMemcpyAsync(d_batch_info.data().get(), h_batch_info.data(),
|
||||
h_batch_info.size() * sizeof(PerNodeData<int>), cudaMemcpyDefault,
|
||||
nullptr));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipMemcpyAsync(d_batch_info.data().get(), h_batch_info.data(),
|
||||
h_batch_info.size() * sizeof(PerNodeData<int>), hipMemcpyDefault,
|
||||
nullptr));
|
||||
#endif
|
||||
dh::device_vector<int8_t> tmp;
|
||||
SortPositionBatch<uint32_t, decltype(op), int>(dh::ToSpan(d_batch_info), dh::ToSpan(ridx),
|
||||
dh::ToSpan(ridx_tmp), dh::ToSpan(counts),
|
||||
total_rows, op, &tmp, nullptr);
|
||||
total_rows, op, &tmp);
|
||||
|
||||
auto op_without_data = [=] __device__(auto ridx) { return ridx % 2 == 0; };
|
||||
for (size_t i = 0; i < segments.size(); i++) {
|
||||
|
||||
@@ -4,13 +4,13 @@
|
||||
#include "../test_evaluate_splits.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for GradientPairPrecise, Args, Gradie...
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/data.h> // for FeatureType, DMatrix, MetaInfo
|
||||
#include <xgboost/logging.h> // for CHECK_EQ
|
||||
#include <xgboost/tree_model.h> // for RegTree, RTreeNodeStat
|
||||
#include <xgboost/base.h> // for GradientPairPrecise, Args, Gradie...
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/data.h> // for FeatureType, DMatrix, MetaInfo
|
||||
#include <xgboost/logging.h> // for CHECK_EQ
|
||||
#include <xgboost/tree_model.h> // for RegTree, RTreeNodeStat
|
||||
|
||||
#include <memory> // for make_shared, shared_ptr, addressof
|
||||
#include <memory> // for make_shared, shared_ptr, addressof
|
||||
|
||||
#include "../../../../src/common/hist_util.h" // for HistCollection, HistogramCuts
|
||||
#include "../../../../src/common/random.h" // for ColumnSampler
|
||||
@@ -18,6 +18,8 @@
|
||||
#include "../../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../../src/tree/hist/evaluate_splits.h" // for HistEvaluator
|
||||
#include "../../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry
|
||||
#include "../../../../src/tree/hist/hist_cache.h" // for BoundedHistCollection
|
||||
#include "../../../../src/tree/hist/param.h" // for HistMakerTrainParam
|
||||
#include "../../../../src/tree/param.h" // for GradStats, TrainParam
|
||||
#include "../../helpers.h" // for RandomDataGenerator, AllThreadsFo...
|
||||
|
||||
@@ -34,7 +36,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();
|
||||
|
||||
auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler};
|
||||
common::HistCollection hist;
|
||||
BoundedHistCollection hist;
|
||||
std::vector<GradientPair> row_gpairs = {
|
||||
{1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
|
||||
{0.27f, 0.29f}, {0.37f, 0.39f}, {-0.47f, 0.49f}, {0.57f, 0.59f}};
|
||||
@@ -48,12 +50,10 @@ void TestEvaluateSplits(bool force_read_by_column) {
|
||||
std::iota(row_indices.begin(), row_indices.end(), 0);
|
||||
row_set_collection.Init();
|
||||
|
||||
auto hist_builder = common::GHistBuilder(gmat.cut.Ptrs().back());
|
||||
hist.Init(gmat.cut.Ptrs().back());
|
||||
hist.AddHistRow(0);
|
||||
hist.AllocateAllData();
|
||||
hist_builder.template BuildHist<false>(row_gpairs, row_set_collection[0],
|
||||
gmat, hist[0], force_read_by_column);
|
||||
HistMakerTrainParam hist_param;
|
||||
hist.Reset(gmat.cut.Ptrs().back(), hist_param.max_cached_hist_node);
|
||||
hist.AllocateHistograms({0});
|
||||
common::BuildHist<false>(row_gpairs, row_set_collection[0], gmat, hist[0], force_read_by_column);
|
||||
|
||||
// Compute total gradient for all data points
|
||||
GradientPairPrecise total_gpair;
|
||||
@@ -113,13 +113,13 @@ TEST(HistMultiEvaluator, Evaluate) {
|
||||
RandomDataGenerator{n_samples, n_features, 0.5}.Targets(n_targets).GenerateDMatrix(true);
|
||||
|
||||
HistMultiEvaluator evaluator{&ctx, p_fmat->Info(), ¶m, sampler};
|
||||
std::vector<common::HistCollection> histogram(n_targets);
|
||||
HistMakerTrainParam hist_param;
|
||||
std::vector<BoundedHistCollection> histogram(n_targets);
|
||||
linalg::Vector<GradientPairPrecise> root_sum({2}, Context::kCpuId);
|
||||
for (bst_target_t t{0}; t < n_targets; ++t) {
|
||||
auto &hist = histogram[t];
|
||||
hist.Init(n_bins * n_features);
|
||||
hist.AddHistRow(0);
|
||||
hist.AllocateAllData();
|
||||
hist.Reset(n_bins * n_features, hist_param.max_cached_hist_node);
|
||||
hist.AllocateHistograms({0});
|
||||
auto node_hist = hist[0];
|
||||
node_hist[0] = {-0.5, 0.5};
|
||||
node_hist[1] = {2.0, 0.5};
|
||||
@@ -145,7 +145,7 @@ TEST(HistMultiEvaluator, Evaluate) {
|
||||
|
||||
std::vector<MultiExpandEntry> entries(1, {/*nidx=*/0, /*depth=*/0});
|
||||
|
||||
std::vector<common::HistCollection const *> ptrs;
|
||||
std::vector<BoundedHistCollection const *> ptrs;
|
||||
std::transform(histogram.cbegin(), histogram.cend(), std::back_inserter(ptrs),
|
||||
[](auto const &h) { return std::addressof(h); });
|
||||
|
||||
@@ -227,16 +227,16 @@ auto CompareOneHotAndPartition(bool onehot) {
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler};
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
HistMakerTrainParam hist_param;
|
||||
|
||||
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>(&ctx, {32, param.sparse_threshold})) {
|
||||
common::HistCollection hist;
|
||||
BoundedHistCollection hist;
|
||||
|
||||
entries.front().nid = 0;
|
||||
entries.front().depth = 0;
|
||||
|
||||
hist.Init(gmat.cut.TotalBins());
|
||||
hist.AddHistRow(0);
|
||||
hist.AllocateAllData();
|
||||
hist.Reset(gmat.cut.TotalBins(), hist_param.max_cached_hist_node);
|
||||
hist.AllocateHistograms({0});
|
||||
auto node_hist = hist[0];
|
||||
|
||||
CHECK_EQ(node_hist.size(), n_cats);
|
||||
@@ -263,10 +263,10 @@ TEST(HistEvaluator, Categorical) {
|
||||
}
|
||||
|
||||
TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
|
||||
common::HistCollection hist;
|
||||
hist.Init(cuts_.TotalBins());
|
||||
hist.AddHistRow(0);
|
||||
hist.AllocateAllData();
|
||||
BoundedHistCollection hist;
|
||||
HistMakerTrainParam hist_param;
|
||||
hist.Reset(cuts_.TotalBins(), hist_param.max_cached_hist_node);
|
||||
hist.AllocateHistograms({0});
|
||||
auto node_hist = hist[0];
|
||||
ASSERT_EQ(node_hist.size(), feature_histogram_.size());
|
||||
std::copy(feature_histogram_.cbegin(), feature_histogram_.cend(), node_hist.begin());
|
||||
|
||||
@@ -2,19 +2,40 @@
|
||||
* Copyright 2018-2023 by Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h> // Context
|
||||
#include <xgboost/base.h> // for bst_node_t, bst_bin_t, Gradient...
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/data.h> // for BatchIterator, BatchSet, DMatrix
|
||||
#include <xgboost/host_device_vector.h> // for HostDeviceVector
|
||||
#include <xgboost/linalg.h> // for MakeTensorView
|
||||
#include <xgboost/logging.h> // for Error, LogCheck_EQ, LogCheck_LT
|
||||
#include <xgboost/span.h> // for Span, operator!=
|
||||
#include <xgboost/tree_model.h> // for RegTree
|
||||
|
||||
#include <limits>
|
||||
#include <algorithm> // for max
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for int32_t, uint32_t
|
||||
#include <functional> // for function
|
||||
#include <iterator> // for back_inserter
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for shared_ptr, allocator, unique_ptr
|
||||
#include <numeric> // for iota, accumulate
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../../src/common/categorical.h"
|
||||
#include "../../../../src/common/row_set.h"
|
||||
#include "../../../../src/tree/hist/expand_entry.h"
|
||||
#include "../../../../src/tree/hist/histogram.h"
|
||||
#include "../../categorical_helpers.h"
|
||||
#include "../../helpers.h"
|
||||
#include "../../../../src/collective/communicator-inl.h" // for GetRank, GetWorldSize
|
||||
#include "../../../../src/common/hist_util.h" // for GHistRow, HistogramCuts, Sketch...
|
||||
#include "../../../../src/common/ref_resource_view.h" // for RefResourceView
|
||||
#include "../../../../src/common/row_set.h" // for RowSetCollection
|
||||
#include "../../../../src/common/threading_utils.h" // for BlockedSpace2d
|
||||
#include "../../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../../src/tree/common_row_partitioner.h" // for CommonRowPartitioner
|
||||
#include "../../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry
|
||||
#include "../../../../src/tree/hist/hist_cache.h" // for BoundedHistCollection
|
||||
#include "../../../../src/tree/hist/histogram.h" // for HistogramBuilder
|
||||
#include "../../../../src/tree/hist/param.h" // for HistMakerTrainParam
|
||||
#include "../../categorical_helpers.h" // for OneHotEncodeFeature
|
||||
#include "../../helpers.h" // for RandomDataGenerator, GenerateRa...
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
namespace {
|
||||
void InitRowPartitionForTest(common::RowSetCollection *row_set, size_t n_samples, size_t base_rowid = 0) {
|
||||
auto &row_indices = *row_set->Data();
|
||||
@@ -26,10 +47,8 @@ void InitRowPartitionForTest(common::RowSetCollection *row_set, size_t n_samples
|
||||
|
||||
void TestAddHistRows(bool is_distributed) {
|
||||
Context ctx;
|
||||
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build_;
|
||||
std::vector<CPUExpandEntry> nodes_for_subtraction_trick_;
|
||||
int starting_index = std::numeric_limits<int>::max();
|
||||
int sync_count = 0;
|
||||
std::vector<bst_node_t> nodes_to_build;
|
||||
std::vector<bst_node_t> nodes_to_sub;
|
||||
|
||||
size_t constexpr kNRows = 8, kNCols = 16;
|
||||
int32_t constexpr kMaxBins = 4;
|
||||
@@ -42,26 +61,22 @@ void TestAddHistRows(bool is_distributed) {
|
||||
tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
|
||||
tree.ExpandNode(tree[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
|
||||
tree.ExpandNode(tree[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
|
||||
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3));
|
||||
nodes_for_explicit_hist_build_.emplace_back(4, tree.GetDepth(4));
|
||||
nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5));
|
||||
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6));
|
||||
nodes_to_build.emplace_back(3);
|
||||
nodes_to_build.emplace_back(4);
|
||||
nodes_to_sub.emplace_back(5);
|
||||
nodes_to_sub.emplace_back(6);
|
||||
|
||||
HistogramBuilder<CPUExpandEntry> histogram_builder;
|
||||
histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1,
|
||||
is_distributed, false);
|
||||
histogram_builder.AddHistRows(&starting_index, &sync_count,
|
||||
nodes_for_explicit_hist_build_,
|
||||
nodes_for_subtraction_trick_, &tree);
|
||||
HistMakerTrainParam hist_param;
|
||||
HistogramBuilder histogram_builder;
|
||||
histogram_builder.Reset(&ctx, gmat.cut.TotalBins(), {kMaxBins, 0.5}, is_distributed, false,
|
||||
&hist_param);
|
||||
histogram_builder.AddHistRows(&tree, &nodes_to_build, &nodes_to_sub, false);
|
||||
|
||||
ASSERT_EQ(sync_count, 2);
|
||||
ASSERT_EQ(starting_index, 3);
|
||||
|
||||
for (const CPUExpandEntry &node : nodes_for_explicit_hist_build_) {
|
||||
ASSERT_EQ(histogram_builder.Histogram().RowExists(node.nid), true);
|
||||
for (bst_node_t const &nidx : nodes_to_build) {
|
||||
ASSERT_TRUE(histogram_builder.Histogram().HistogramExists(nidx));
|
||||
}
|
||||
for (const CPUExpandEntry &node : nodes_for_subtraction_trick_) {
|
||||
ASSERT_EQ(histogram_builder.Histogram().RowExists(node.nid), true);
|
||||
for (bst_node_t const &nidx : nodes_to_sub) {
|
||||
ASSERT_TRUE(histogram_builder.Histogram().HistogramExists(nidx));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,87 +87,77 @@ TEST(CPUHistogram, AddRows) {
|
||||
}
|
||||
|
||||
void TestSyncHist(bool is_distributed) {
|
||||
size_t constexpr kNRows = 8, kNCols = 16;
|
||||
int32_t constexpr kMaxBins = 4;
|
||||
std::size_t constexpr kNRows = 8, kNCols = 16;
|
||||
bst_bin_t constexpr kMaxBins = 4;
|
||||
Context ctx;
|
||||
|
||||
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build_;
|
||||
std::vector<CPUExpandEntry> nodes_for_subtraction_trick_;
|
||||
int starting_index = std::numeric_limits<int>::max();
|
||||
int sync_count = 0;
|
||||
std::vector<bst_bin_t> nodes_for_explicit_hist_build;
|
||||
std::vector<bst_bin_t> nodes_for_subtraction_trick;
|
||||
RegTree tree;
|
||||
|
||||
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
|
||||
auto const &gmat =
|
||||
*(p_fmat->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{kMaxBins, 0.5}).begin());
|
||||
|
||||
HistogramBuilder<CPUExpandEntry> histogram;
|
||||
HistogramBuilder histogram;
|
||||
uint32_t total_bins = gmat.cut.Ptrs().back();
|
||||
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed, false);
|
||||
HistMakerTrainParam hist_param;
|
||||
histogram.Reset(&ctx, total_bins, {kMaxBins, 0.5}, is_distributed, false, &hist_param);
|
||||
|
||||
common::RowSetCollection row_set_collection_;
|
||||
common::RowSetCollection row_set_collection;
|
||||
{
|
||||
row_set_collection_.Clear();
|
||||
std::vector<size_t> &row_indices = *row_set_collection_.Data();
|
||||
row_set_collection.Clear();
|
||||
std::vector<size_t> &row_indices = *row_set_collection.Data();
|
||||
row_indices.resize(kNRows);
|
||||
std::iota(row_indices.begin(), row_indices.end(), 0);
|
||||
row_set_collection_.Init();
|
||||
row_set_collection.Init();
|
||||
}
|
||||
|
||||
// level 0
|
||||
nodes_for_explicit_hist_build_.emplace_back(0, tree.GetDepth(0));
|
||||
histogram.AddHistRows(&starting_index, &sync_count,
|
||||
nodes_for_explicit_hist_build_,
|
||||
nodes_for_subtraction_trick_, &tree);
|
||||
nodes_for_explicit_hist_build.emplace_back(0);
|
||||
histogram.AddHistRows(&tree, &nodes_for_explicit_hist_build, &nodes_for_subtraction_trick, false);
|
||||
|
||||
tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
|
||||
nodes_for_explicit_hist_build_.clear();
|
||||
nodes_for_subtraction_trick_.clear();
|
||||
nodes_for_explicit_hist_build.clear();
|
||||
nodes_for_subtraction_trick.clear();
|
||||
|
||||
// level 1
|
||||
nodes_for_explicit_hist_build_.emplace_back(tree[0].LeftChild(), tree.GetDepth(1));
|
||||
nodes_for_subtraction_trick_.emplace_back(tree[0].RightChild(), tree.GetDepth(2));
|
||||
nodes_for_explicit_hist_build.emplace_back(tree[0].LeftChild());
|
||||
nodes_for_subtraction_trick.emplace_back(tree[0].RightChild());
|
||||
|
||||
histogram.AddHistRows(&starting_index, &sync_count,
|
||||
nodes_for_explicit_hist_build_,
|
||||
nodes_for_subtraction_trick_, &tree);
|
||||
histogram.AddHistRows(&tree, &nodes_for_explicit_hist_build, &nodes_for_subtraction_trick, false);
|
||||
|
||||
tree.ExpandNode(tree[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
|
||||
tree.ExpandNode(tree[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
|
||||
|
||||
nodes_for_explicit_hist_build_.clear();
|
||||
nodes_for_subtraction_trick_.clear();
|
||||
nodes_for_explicit_hist_build.clear();
|
||||
nodes_for_subtraction_trick.clear();
|
||||
// level 2
|
||||
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3));
|
||||
nodes_for_subtraction_trick_.emplace_back(4, tree.GetDepth(4));
|
||||
nodes_for_explicit_hist_build_.emplace_back(5, tree.GetDepth(5));
|
||||
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6));
|
||||
nodes_for_explicit_hist_build.emplace_back(3);
|
||||
nodes_for_subtraction_trick.emplace_back(4);
|
||||
nodes_for_explicit_hist_build.emplace_back(5);
|
||||
nodes_for_subtraction_trick.emplace_back(6);
|
||||
|
||||
histogram.AddHistRows(&starting_index, &sync_count,
|
||||
nodes_for_explicit_hist_build_,
|
||||
nodes_for_subtraction_trick_, &tree);
|
||||
histogram.AddHistRows(&tree, &nodes_for_explicit_hist_build, &nodes_for_subtraction_trick, false);
|
||||
|
||||
const size_t n_nodes = nodes_for_explicit_hist_build_.size();
|
||||
const size_t n_nodes = nodes_for_explicit_hist_build.size();
|
||||
ASSERT_EQ(n_nodes, 2ul);
|
||||
row_set_collection_.AddSplit(0, tree[0].LeftChild(), tree[0].RightChild(), 4,
|
||||
4);
|
||||
row_set_collection_.AddSplit(1, tree[1].LeftChild(), tree[1].RightChild(), 2,
|
||||
2);
|
||||
row_set_collection_.AddSplit(2, tree[2].LeftChild(), tree[2].RightChild(), 2,
|
||||
2);
|
||||
row_set_collection.AddSplit(0, tree[0].LeftChild(), tree[0].RightChild(), 4, 4);
|
||||
row_set_collection.AddSplit(1, tree[1].LeftChild(), tree[1].RightChild(), 2, 2);
|
||||
row_set_collection.AddSplit(2, tree[2].LeftChild(), tree[2].RightChild(), 2, 2);
|
||||
|
||||
common::BlockedSpace2d space(
|
||||
n_nodes,
|
||||
[&](size_t node) {
|
||||
const int32_t nid = nodes_for_explicit_hist_build_[node].nid;
|
||||
return row_set_collection_[nid].Size();
|
||||
[&](std::size_t nidx_in_set) {
|
||||
bst_node_t nidx = nodes_for_explicit_hist_build[nidx_in_set];
|
||||
return row_set_collection[nidx].Size();
|
||||
},
|
||||
256);
|
||||
|
||||
std::vector<common::GHistRow> target_hists(n_nodes);
|
||||
for (size_t i = 0; i < nodes_for_explicit_hist_build_.size(); ++i) {
|
||||
const int32_t nid = nodes_for_explicit_hist_build_[i].nid;
|
||||
target_hists[i] = histogram.Histogram()[nid];
|
||||
for (size_t i = 0; i < nodes_for_explicit_hist_build.size(); ++i) {
|
||||
bst_node_t nidx = nodes_for_explicit_hist_build[i];
|
||||
target_hists[i] = histogram.Histogram()[nidx];
|
||||
}
|
||||
|
||||
// set values to specific nodes hist
|
||||
@@ -176,14 +181,7 @@ void TestSyncHist(bool is_distributed) {
|
||||
|
||||
histogram.Buffer().Reset(1, n_nodes, space, target_hists);
|
||||
// sync hist
|
||||
if (is_distributed) {
|
||||
histogram.SyncHistogramDistributed(&tree, nodes_for_explicit_hist_build_,
|
||||
nodes_for_subtraction_trick_,
|
||||
starting_index, sync_count);
|
||||
} else {
|
||||
histogram.SyncHistogramLocal(&tree, nodes_for_explicit_hist_build_,
|
||||
nodes_for_subtraction_trick_);
|
||||
}
|
||||
histogram.SyncHistogram(&tree, nodes_for_explicit_hist_build, nodes_for_subtraction_trick);
|
||||
|
||||
using GHistRowT = common::GHistRow;
|
||||
auto check_hist = [](const GHistRowT parent, const GHistRowT left, const GHistRowT right,
|
||||
@@ -196,11 +194,10 @@ void TestSyncHist(bool is_distributed) {
|
||||
}
|
||||
};
|
||||
size_t node_id = 0;
|
||||
for (const CPUExpandEntry &node : nodes_for_explicit_hist_build_) {
|
||||
auto this_hist = histogram.Histogram()[node.nid];
|
||||
const size_t parent_id = tree[node.nid].Parent();
|
||||
const size_t subtraction_node_id =
|
||||
nodes_for_subtraction_trick_[node_id].nid;
|
||||
for (auto const &nidx : nodes_for_explicit_hist_build) {
|
||||
auto this_hist = histogram.Histogram()[nidx];
|
||||
const size_t parent_id = tree[nidx].Parent();
|
||||
const size_t subtraction_node_id = nodes_for_subtraction_trick[node_id];
|
||||
auto parent_hist = histogram.Histogram()[parent_id];
|
||||
auto sibling_hist = histogram.Histogram()[subtraction_node_id];
|
||||
|
||||
@@ -208,11 +205,10 @@ void TestSyncHist(bool is_distributed) {
|
||||
++node_id;
|
||||
}
|
||||
node_id = 0;
|
||||
for (const CPUExpandEntry &node : nodes_for_subtraction_trick_) {
|
||||
auto this_hist = histogram.Histogram()[node.nid];
|
||||
const size_t parent_id = tree[node.nid].Parent();
|
||||
const size_t subtraction_node_id =
|
||||
nodes_for_explicit_hist_build_[node_id].nid;
|
||||
for (auto const &nidx : nodes_for_subtraction_trick) {
|
||||
auto this_hist = histogram.Histogram()[nidx];
|
||||
const size_t parent_id = tree[nidx].Parent();
|
||||
const size_t subtraction_node_id = nodes_for_explicit_hist_build[node_id];
|
||||
auto parent_hist = histogram.Histogram()[parent_id];
|
||||
auto sibling_hist = histogram.Histogram()[subtraction_node_id];
|
||||
|
||||
@@ -246,9 +242,9 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_
|
||||
{0.27f, 0.29f}, {0.37f, 0.39f}, {0.47f, 0.49f}, {0.57f, 0.59f}};
|
||||
|
||||
bst_node_t nid = 0;
|
||||
HistogramBuilder<CPUExpandEntry> histogram;
|
||||
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed,
|
||||
is_col_split);
|
||||
HistogramBuilder histogram;
|
||||
HistMakerTrainParam hist_param;
|
||||
histogram.Reset(&ctx, total_bins, {kMaxBins, 0.5}, is_distributed, is_col_split, &hist_param);
|
||||
|
||||
RegTree tree;
|
||||
|
||||
@@ -260,12 +256,17 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_
|
||||
row_set_collection.Init();
|
||||
|
||||
CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
|
||||
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
|
||||
nodes_for_explicit_hist_build.push_back(node);
|
||||
std::vector<bst_node_t> nodes_to_build{node.nid};
|
||||
std::vector<bst_node_t> dummy_sub;
|
||||
|
||||
histogram.AddHistRows(&tree, &nodes_to_build, &dummy_sub, false);
|
||||
common::BlockedSpace2d space{
|
||||
1, [&](std::size_t nidx_in_set) { return row_set_collection[nidx_in_set].Size(); }, 256};
|
||||
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(&ctx, {kMaxBins, 0.5})) {
|
||||
histogram.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
|
||||
gpair, force_read_by_column);
|
||||
histogram.BuildHist(0, space, gidx, row_set_collection, nodes_to_build,
|
||||
linalg::MakeTensorView(&ctx, gpair, gpair.size()), force_read_by_column);
|
||||
}
|
||||
histogram.SyncHistogram(&tree, nodes_to_build, {});
|
||||
|
||||
// Check if number of histogram bins is correct
|
||||
ASSERT_EQ(histogram.Histogram()[nid].size(), gmat.cut.Ptrs().back());
|
||||
@@ -326,18 +327,18 @@ void ValidateCategoricalHistogram(size_t n_categories,
|
||||
|
||||
void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
size_t constexpr kRows = 340;
|
||||
int32_t constexpr kBins = 256;
|
||||
bst_bin_t constexpr kBins = 256;
|
||||
auto x = GenerateRandomCategoricalSingleColumn(kRows, n_categories);
|
||||
auto cat_m = GetDMatrixFromData(x, kRows, 1);
|
||||
cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
|
||||
Context ctx;
|
||||
|
||||
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
|
||||
BatchParam batch_param{0, kBins};
|
||||
|
||||
RegTree tree;
|
||||
CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
|
||||
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
|
||||
nodes_for_explicit_hist_build.push_back(node);
|
||||
CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(RegTree::kRoot)};
|
||||
std::vector<bst_node_t> nodes_to_build;
|
||||
nodes_to_build.push_back(node.nid);
|
||||
|
||||
auto gpair = GenerateRandomGradients(kRows, 0, 2);
|
||||
|
||||
@@ -347,30 +348,41 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
row_indices.resize(kRows);
|
||||
std::iota(row_indices.begin(), row_indices.end(), 0);
|
||||
row_set_collection.Init();
|
||||
HistMakerTrainParam hist_param;
|
||||
std::vector<bst_node_t> dummy_sub;
|
||||
|
||||
common::BlockedSpace2d space{
|
||||
1, [&](std::size_t nidx_in_set) { return row_set_collection[nidx_in_set].Size(); }, 256};
|
||||
|
||||
/**
|
||||
* Generate hist with cat data.
|
||||
*/
|
||||
HistogramBuilder<CPUExpandEntry> cat_hist;
|
||||
HistogramBuilder cat_hist;
|
||||
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>(&ctx, {kBins, 0.5})) {
|
||||
auto total_bins = gidx.cut.TotalBins();
|
||||
cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
|
||||
cat_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
|
||||
gpair.HostVector(), force_read_by_column);
|
||||
cat_hist.Reset(&ctx, total_bins, {kBins, 0.5}, false, false, &hist_param);
|
||||
cat_hist.AddHistRows(&tree, &nodes_to_build, &dummy_sub, false);
|
||||
cat_hist.BuildHist(0, space, gidx, row_set_collection, nodes_to_build,
|
||||
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size()),
|
||||
force_read_by_column);
|
||||
}
|
||||
cat_hist.SyncHistogram(&tree, nodes_to_build, {});
|
||||
|
||||
/**
|
||||
* Generate hist with one hot encoded data.
|
||||
*/
|
||||
auto x_encoded = OneHotEncodeFeature(x, n_categories);
|
||||
auto encode_m = GetDMatrixFromData(x_encoded, kRows, n_categories);
|
||||
HistogramBuilder<CPUExpandEntry> onehot_hist;
|
||||
HistogramBuilder onehot_hist;
|
||||
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>(&ctx, {kBins, 0.5})) {
|
||||
auto total_bins = gidx.cut.TotalBins();
|
||||
onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
|
||||
onehot_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
|
||||
gpair.HostVector(), force_read_by_column);
|
||||
onehot_hist.Reset(&ctx, total_bins, {kBins, 0.5}, false, false, &hist_param);
|
||||
onehot_hist.AddHistRows(&tree, &nodes_to_build, &dummy_sub, false);
|
||||
onehot_hist.BuildHist(0, space, gidx, row_set_collection, nodes_to_build,
|
||||
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size()),
|
||||
force_read_by_column);
|
||||
}
|
||||
onehot_hist.SyncHistogram(&tree, nodes_to_build, {});
|
||||
|
||||
auto cat = cat_hist.Histogram()[0];
|
||||
auto onehot = onehot_hist.Histogram()[0];
|
||||
@@ -397,19 +409,22 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
|
||||
batch_param.hess = hess;
|
||||
}
|
||||
|
||||
std::vector<size_t> partition_size(1, 0);
|
||||
size_t total_bins{0};
|
||||
size_t n_samples{0};
|
||||
std::vector<std::size_t> partition_size(1, 0);
|
||||
bst_bin_t total_bins{0};
|
||||
bst_row_t n_samples{0};
|
||||
|
||||
auto gpair = GenerateRandomGradients(m->Info().num_row_, 0.0, 1.0);
|
||||
auto const &h_gpair = gpair.HostVector();
|
||||
|
||||
RegTree tree;
|
||||
std::vector<CPUExpandEntry> nodes;
|
||||
nodes.emplace_back(0, tree.GetDepth(0));
|
||||
std::vector<bst_node_t> nodes{RegTree::kRoot};
|
||||
common::BlockedSpace2d space{
|
||||
1, [&](std::size_t nidx_in_set) { return partition_size.at(nidx_in_set); }, 256};
|
||||
|
||||
common::GHistRow multi_page;
|
||||
HistogramBuilder<CPUExpandEntry> multi_build;
|
||||
HistogramBuilder multi_build;
|
||||
HistMakerTrainParam hist_param;
|
||||
std::vector<bst_node_t> dummy_sub;
|
||||
{
|
||||
/**
|
||||
* Multi page
|
||||
@@ -427,23 +442,21 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
|
||||
}
|
||||
ASSERT_EQ(n_samples, m->Info().num_row_);
|
||||
|
||||
common::BlockedSpace2d space{
|
||||
1, [&](size_t nidx_in_set) { return partition_size.at(nidx_in_set); },
|
||||
256};
|
||||
|
||||
multi_build.Reset(total_bins, batch_param, ctx->Threads(), rows_set.size(), false, false);
|
||||
|
||||
size_t page_idx{0};
|
||||
multi_build.Reset(ctx, total_bins, batch_param, false, false, &hist_param);
|
||||
multi_build.AddHistRows(&tree, &nodes, &dummy_sub, false);
|
||||
std::size_t page_idx{0};
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(ctx, batch_param)) {
|
||||
multi_build.BuildHist(page_idx, space, page, &tree, rows_set.at(page_idx), nodes, {}, h_gpair,
|
||||
multi_build.BuildHist(page_idx, space, page, rows_set[page_idx], nodes,
|
||||
linalg::MakeTensorView(ctx, h_gpair, h_gpair.size()),
|
||||
force_read_by_column);
|
||||
++page_idx;
|
||||
}
|
||||
ASSERT_EQ(page_idx, 2);
|
||||
multi_page = multi_build.Histogram()[0];
|
||||
multi_build.SyncHistogram(&tree, nodes, {});
|
||||
|
||||
multi_page = multi_build.Histogram()[RegTree::kRoot];
|
||||
}
|
||||
|
||||
HistogramBuilder<CPUExpandEntry> single_build;
|
||||
HistogramBuilder single_build;
|
||||
common::GHistRow single_page;
|
||||
{
|
||||
/**
|
||||
@@ -452,18 +465,24 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
|
||||
common::RowSetCollection row_set_collection;
|
||||
InitRowPartitionForTest(&row_set_collection, n_samples);
|
||||
|
||||
single_build.Reset(total_bins, batch_param, ctx->Threads(), 1, false, false);
|
||||
single_build.Reset(ctx, total_bins, batch_param, false, false, &hist_param);
|
||||
SparsePage concat;
|
||||
std::vector<float> hess(m->Info().num_row_, 1.0f);
|
||||
for (auto const& page : m->GetBatches<SparsePage>()) {
|
||||
for (auto const &page : m->GetBatches<SparsePage>()) {
|
||||
concat.Push(page);
|
||||
}
|
||||
|
||||
auto cut = common::SketchOnDMatrix(ctx, m.get(), batch_param.max_bin, false, hess);
|
||||
GHistIndexMatrix gmat(concat, {}, cut, batch_param.max_bin, false,
|
||||
std::numeric_limits<double>::quiet_NaN(), ctx->Threads());
|
||||
single_build.BuildHist(0, gmat, &tree, row_set_collection, nodes, {}, h_gpair, force_read_by_column);
|
||||
single_page = single_build.Histogram()[0];
|
||||
|
||||
single_build.AddHistRows(&tree, &nodes, &dummy_sub, false);
|
||||
single_build.BuildHist(0, space, gmat, row_set_collection, nodes,
|
||||
linalg::MakeTensorView(ctx, h_gpair, h_gpair.size()),
|
||||
force_read_by_column);
|
||||
single_build.SyncHistogram(&tree, nodes, {});
|
||||
|
||||
single_page = single_build.Histogram()[RegTree::kRoot];
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < single_page.size(); ++i) {
|
||||
@@ -487,5 +506,108 @@ TEST(CPUHistogram, ExternalMemory) {
|
||||
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false);
|
||||
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
namespace {
|
||||
class OverflowTest : public ::testing::TestWithParam<std::tuple<bool, bool>> {
|
||||
public:
|
||||
std::vector<GradientPairPrecise> TestOverflow(bool limit, bool is_distributed,
|
||||
bool is_col_split) {
|
||||
bst_bin_t constexpr kBins = 256;
|
||||
Context ctx;
|
||||
HistMakerTrainParam hist_param;
|
||||
if (limit) {
|
||||
hist_param.Init(Args{{"max_cached_hist_node", "1"}});
|
||||
}
|
||||
|
||||
std::shared_ptr<DMatrix> Xy =
|
||||
is_col_split ? RandomDataGenerator{8192, 16, 0.5}.GenerateDMatrix(true)
|
||||
: RandomDataGenerator{8192, 16, 0.5}.Bins(kBins).GenerateQuantileDMatrix(true);
|
||||
if (is_col_split) {
|
||||
Xy =
|
||||
std::shared_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||
}
|
||||
|
||||
double sparse_thresh{TrainParam::DftSparseThreshold()};
|
||||
auto batch = BatchParam{kBins, sparse_thresh};
|
||||
bst_bin_t n_total_bins{0};
|
||||
float split_cond{0};
|
||||
for (auto const &page : Xy->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
n_total_bins = page.cut.TotalBins();
|
||||
// use a cut point in the second column for split
|
||||
split_cond = page.cut.Values()[kBins + kBins / 2];
|
||||
}
|
||||
|
||||
RegTree tree;
|
||||
MultiHistogramBuilder hist_builder;
|
||||
CHECK_EQ(Xy->Info().IsColumnSplit(), is_col_split);
|
||||
|
||||
hist_builder.Reset(&ctx, n_total_bins, tree.NumTargets(), batch, is_distributed,
|
||||
Xy->Info().IsColumnSplit(), &hist_param);
|
||||
|
||||
std::vector<CommonRowPartitioner> partitioners;
|
||||
partitioners.emplace_back(&ctx, Xy->Info().num_row_, /*base_rowid=*/0,
|
||||
Xy->Info().IsColumnSplit());
|
||||
|
||||
auto gpair = GenerateRandomGradients(Xy->Info().num_row_, 0.0, 1.0);
|
||||
|
||||
CPUExpandEntry best;
|
||||
hist_builder.BuildRootHist(Xy.get(), &tree, partitioners,
|
||||
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size(), 1),
|
||||
best, batch);
|
||||
|
||||
best.split.Update(1.0f, 1, split_cond, false, false, GradStats{1.0, 1.0}, GradStats{1.0, 1.0});
|
||||
tree.ExpandNode(best.nid, best.split.SplitIndex(), best.split.split_value, false,
|
||||
/*base_weight=*/2.0f,
|
||||
/*left_leaf_weight=*/1.0f, /*right_leaf_weight=*/1.0f, best.GetLossChange(),
|
||||
/*sum_hess=*/2.0f, best.split.left_sum.GetHess(),
|
||||
best.split.right_sum.GetHess());
|
||||
|
||||
std::vector<CPUExpandEntry> valid_candidates{best};
|
||||
for (auto const &page : Xy->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
partitioners.front().UpdatePosition(&ctx, page, valid_candidates, &tree);
|
||||
}
|
||||
CHECK_NE(partitioners.front()[tree.LeftChild(best.nid)].Size(), 0);
|
||||
CHECK_NE(partitioners.front()[tree.RightChild(best.nid)].Size(), 0);
|
||||
|
||||
hist_builder.BuildHistLeftRight(
|
||||
Xy.get(), &tree, partitioners, valid_candidates,
|
||||
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size(), 1), batch);
|
||||
|
||||
if (limit) {
|
||||
CHECK(!hist_builder.Histogram(0).HistogramExists(best.nid));
|
||||
} else {
|
||||
CHECK(hist_builder.Histogram(0).HistogramExists(best.nid));
|
||||
}
|
||||
|
||||
std::vector<GradientPairPrecise> result;
|
||||
auto hist = hist_builder.Histogram(0)[tree.LeftChild(best.nid)];
|
||||
std::copy(hist.cbegin(), hist.cend(), std::back_inserter(result));
|
||||
hist = hist_builder.Histogram(0)[tree.RightChild(best.nid)];
|
||||
std::copy(hist.cbegin(), hist.cend(), std::back_inserter(result));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void RunTest() {
|
||||
auto param = GetParam();
|
||||
auto res0 = this->TestOverflow(false, std::get<0>(param), std::get<1>(param));
|
||||
auto res1 = this->TestOverflow(true, std::get<0>(param), std::get<1>(param));
|
||||
ASSERT_EQ(res0, res1);
|
||||
}
|
||||
};
|
||||
|
||||
auto MakeParamsForTest() {
|
||||
std::vector<std::tuple<bool, bool>> configs;
|
||||
for (auto i : {true, false}) {
|
||||
for (auto j : {true, false}) {
|
||||
configs.emplace_back(i, j);
|
||||
}
|
||||
}
|
||||
return configs;
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST_P(OverflowTest, Overflow) { this->RunTest(); }
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(CPUHistogram, OverflowTest, ::testing::ValuesIn(MakeParamsForTest()));
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2019 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/copy.h>
|
||||
@@ -59,7 +59,7 @@ void CompareBitField(LBitField64 d_field, std::set<uint32_t> positions) {
|
||||
LBitField64 h_field{ {h_field_storage.data(),
|
||||
h_field_storage.data() + h_field_storage.size()} };
|
||||
|
||||
for (size_t i = 0; i < h_field.Size(); ++i) {
|
||||
for (size_t i = 0; i < h_field.Capacity(); ++i) {
|
||||
if (positions.find(i) != positions.cend()) {
|
||||
ASSERT_TRUE(h_field.Check(i));
|
||||
} else {
|
||||
@@ -88,7 +88,7 @@ TEST(GPUFeatureInteractionConstraint, Init) {
|
||||
{h_node_storage.data(), h_node_storage.data() + h_node_storage.size()}
|
||||
};
|
||||
// no feature is attached to node.
|
||||
for (size_t i = 0; i < h_node.Size(); ++i) {
|
||||
for (size_t i = 0; i < h_node.Capacity(); ++i) {
|
||||
ASSERT_FALSE(h_node.Check(i));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,22 +2,24 @@
|
||||
* Copyright 2022-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for GradientPairInternal, GradientPairPrecise
|
||||
#include <xgboost/data.h> // for MetaInfo
|
||||
#include <xgboost/host_device_vector.h> // for HostDeviceVector
|
||||
#include <xgboost/span.h> // for operator!=, Span, SpanIterator
|
||||
#include <xgboost/base.h> // for GradientPairInternal, GradientPairPrecise
|
||||
#include <xgboost/data.h> // for MetaInfo
|
||||
#include <xgboost/host_device_vector.h> // for HostDeviceVector
|
||||
#include <xgboost/span.h> // for operator!=, Span, SpanIterator
|
||||
|
||||
#include <algorithm> // for max, max_element, next_permutation, copy
|
||||
#include <cmath> // for isnan
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for int32_t, uint64_t, uint32_t
|
||||
#include <limits> // for numeric_limits
|
||||
#include <numeric> // for iota
|
||||
#include <tuple> // for make_tuple, tie, tuple
|
||||
#include <utility> // for pair
|
||||
#include <vector> // for vector
|
||||
#include <algorithm> // for max, max_element, next_permutation, copy
|
||||
#include <cmath> // for isnan
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for int32_t, uint64_t, uint32_t
|
||||
#include <limits> // for numeric_limits
|
||||
#include <numeric> // for iota
|
||||
#include <tuple> // for make_tuple, tie, tuple
|
||||
#include <utility> // for pair
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/hist_util.h" // for HistogramCuts, HistCollection, GHistRow
|
||||
#include "../../../src/tree/hist/hist_cache.h" // for HistogramCollection
|
||||
#include "../../../src/tree/hist/param.h" // for HistMakerTrainParam
|
||||
#include "../../../src/tree/param.h" // for TrainParam, GradStats
|
||||
#include "../../../src/tree/split_evaluator.h" // for TreeEvaluator
|
||||
#include "../helpers.h" // for SimpleLCG, SimpleRealUniformDistribution
|
||||
@@ -35,7 +37,7 @@ class TestPartitionBasedSplit : public ::testing::Test {
|
||||
MetaInfo info_;
|
||||
float best_score_{-std::numeric_limits<float>::infinity()};
|
||||
common::HistogramCuts cuts_;
|
||||
common::HistCollection hist_;
|
||||
BoundedHistCollection hist_;
|
||||
GradientPairPrecise total_gpair_;
|
||||
|
||||
void SetUp() override {
|
||||
@@ -56,9 +58,9 @@ class TestPartitionBasedSplit : public ::testing::Test {
|
||||
|
||||
cuts_.min_vals_.Resize(1);
|
||||
|
||||
hist_.Init(cuts_.TotalBins());
|
||||
hist_.AddHistRow(0);
|
||||
hist_.AllocateAllData();
|
||||
HistMakerTrainParam hist_param;
|
||||
hist_.Reset(cuts_.TotalBins(), hist_param.max_cached_hist_node);
|
||||
hist_.AllocateHistograms({0});
|
||||
auto node_hist = hist_[0];
|
||||
|
||||
SimpleLCG lcg;
|
||||
|
||||
@@ -11,16 +11,15 @@
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/common/common.h"
|
||||
#include "../../../src/data/sparse_page_source.h"
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
#include "../../../src/tree/constraints.cuh"
|
||||
#include "../../../src/data/ellpack_page.cuh" // for EllpackPageImpl
|
||||
#include "../../../src/data/ellpack_page.h" // for EllpackPage
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../../../src/tree/updater_gpu_common.cuh"
|
||||
#include "../../../src/tree/updater_gpu_hist.cu"
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
#include "../../../src/tree/constraints.hip.h"
|
||||
#include "../../../src/data/ellpack_page.hip.h" // for EllpackPageImpl
|
||||
#include "../../../src/data/ellpack_page.h" // for EllpackPage
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../../../src/tree/updater_gpu_common.hip.h"
|
||||
#include "../../../src/tree/updater_gpu_hist.hip"
|
||||
#endif
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
@@ -32,11 +31,7 @@
|
||||
namespace xgboost::tree {
|
||||
TEST(GpuHist, DeviceHistogram) {
|
||||
// Ensures that node allocates correctly after reaching `kStopGrowingSize`.
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(0));
|
||||
#endif
|
||||
constexpr size_t kNBins = 128;
|
||||
constexpr int kNNodes = 4;
|
||||
constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;
|
||||
@@ -103,8 +98,9 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
auto page = BuildEllpackPage(kNRows, kNCols);
|
||||
BatchParam batch_param{};
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
GPUHistMakerDevice<GradientSumT> maker(&ctx, page.get(), {}, kNRows, param, kNCols, kNCols,
|
||||
batch_param);
|
||||
auto cs = std::make_shared<common::ColumnSampler>(0);
|
||||
GPUHistMakerDevice maker(&ctx, /*is_external_memory=*/false, {}, kNRows, param, cs, kNCols,
|
||||
batch_param);
|
||||
xgboost::SimpleLCG gen;
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
|
||||
HostDeviceVector<GradientPair> gpair(kNRows);
|
||||
@@ -116,10 +112,16 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
gpair.SetDevice(0);
|
||||
|
||||
thrust::host_vector<common::CompressedByteT> h_gidx_buffer (page->gidx_buffer.HostVector());
|
||||
maker.row_partitioner.reset(new RowPartitioner(0, kNRows));
|
||||
maker.row_partitioner = std::make_unique<RowPartitioner>(0, kNRows);
|
||||
|
||||
maker.hist.Init(0, page->Cuts().TotalBins());
|
||||
maker.hist.AllocateHistograms({0});
|
||||
|
||||
maker.gpair = gpair.DeviceSpan();
|
||||
maker.quantiser.reset(new GradientQuantiser(maker.gpair));
|
||||
maker.quantiser = std::make_unique<GradientQuantiser>(maker.gpair);
|
||||
maker.page = page.get();
|
||||
|
||||
maker.InitFeatureGroupsOnce();
|
||||
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
maker.feature_groups->DeviceAccessor(0), gpair.DeviceSpan(),
|
||||
@@ -132,19 +134,14 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
// d_hist.data stored in float, not gradient pair
|
||||
thrust::host_vector<GradientPairInt64> h_result (node_histogram.size());
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaMemcpy(h_result.data(), node_histogram.data(), node_histogram.size_bytes(),
|
||||
cudaMemcpyDeviceToHost));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipMemcpy(h_result.data(), node_histogram.data(), node_histogram.size_bytes(),
|
||||
hipMemcpyDeviceToHost));
|
||||
#endif
|
||||
|
||||
std::vector<GradientPairPrecise> solution = GetHostHistGpair();
|
||||
for (size_t i = 0; i < h_result.size(); ++i) {
|
||||
auto result = maker.quantiser->ToFloatingPoint(h_result[i]);
|
||||
EXPECT_NEAR(result.GetGrad(), solution[i].GetGrad(), 0.01f);
|
||||
EXPECT_NEAR(result.GetHess(), solution[i].GetHess(), 0.01f);
|
||||
ASSERT_NEAR(result.GetGrad(), solution[i].GetGrad(), 0.01f);
|
||||
ASSERT_NEAR(result.GetHess(), solution[i].GetHess(), 0.01f);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -257,6 +254,7 @@ void UpdateTree(Context const* ctx, HostDeviceVector<GradientPair>* gpair, DMatr
|
||||
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
tree::GPUHistMaker hist_maker{ctx, &task};
|
||||
hist_maker.Configure(Args{});
|
||||
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
hist_maker.Update(¶m, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
|
||||
@@ -408,14 +406,14 @@ TEST(GpuHist, ConfigIO) {
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_gpu_hist", &ctx, &task)};
|
||||
updater->Configure(Args{});
|
||||
|
||||
Json j_updater { Object() };
|
||||
Json j_updater{Object{}};
|
||||
updater->SaveConfig(&j_updater);
|
||||
ASSERT_TRUE(IsA<Object>(j_updater["gpu_hist_train_param"]));
|
||||
ASSERT_TRUE(IsA<Object>(j_updater["hist_train_param"]));
|
||||
updater->LoadConfig(j_updater);
|
||||
|
||||
Json j_updater_roundtrip { Object() };
|
||||
Json j_updater_roundtrip{Object{}};
|
||||
updater->SaveConfig(&j_updater_roundtrip);
|
||||
ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["gpu_hist_train_param"]));
|
||||
ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["hist_train_param"]));
|
||||
|
||||
ASSERT_EQ(j_updater, j_updater_roundtrip);
|
||||
}
|
||||
|
||||
@@ -39,6 +39,7 @@ TEST(GrowHistMaker, InteractionConstraint) {
|
||||
param.UpdateAllowUnknown(
|
||||
Args{{"interaction_constraints", "[[0, 1]]"}, {"num_feature", std::to_string(kCols)}});
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
updater->Configure(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), p_dmat.get(), position, {&tree});
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 4);
|
||||
@@ -55,6 +56,7 @@ TEST(GrowHistMaker, InteractionConstraint) {
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
updater->Configure(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), p_dmat.get(), position, {&tree});
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 10);
|
||||
@@ -81,6 +83,7 @@ void VerifyColumnSplit(int32_t rows, bst_feature_t cols, bool categorical,
|
||||
RegTree tree{1u, cols};
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
updater->Configure(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), sliced.get(), position, {&tree});
|
||||
|
||||
Json json{Object{}};
|
||||
@@ -104,6 +107,7 @@ void TestColumnSplit(bool categorical) {
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
updater->Configure(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), p_dmat.get(), position, {&expected_tree});
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,9 @@
|
||||
#include <xgboost/task.h> // for ObjInfo
|
||||
#include <xgboost/tree_updater.h> // for TreeUpdater
|
||||
|
||||
#include <memory> // for unique_ptr
|
||||
#include <memory> // for unique_ptr
|
||||
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
TEST(Updater, HasNodePosition) {
|
||||
@@ -19,7 +21,7 @@ TEST(Updater, HasNodePosition) {
|
||||
ASSERT_TRUE(up->HasNodePosition());
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
ctx.gpu_id = 0;
|
||||
ctx = MakeCUDACtx(0);
|
||||
up.reset(TreeUpdater::Create("grow_gpu_hist", &ctx, &task));
|
||||
ASSERT_TRUE(up->HasNodePosition());
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
|
||||
@@ -24,15 +24,11 @@ class TestPredictionCache : public ::testing::Test {
|
||||
Xy_ = RandomDataGenerator{n_samples_, n_features, 0}.Targets(n_targets).GenerateDMatrix(true);
|
||||
}
|
||||
|
||||
void RunLearnerTest(std::string updater_name, float subsample, std::string const& grow_policy,
|
||||
std::string const& strategy) {
|
||||
void RunLearnerTest(Context const* ctx, std::string updater_name, float subsample,
|
||||
std::string const& grow_policy, std::string const& strategy) {
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
|
||||
if (updater_name == "grow_gpu_hist") {
|
||||
// gpu_id setup
|
||||
learner->SetParam("tree_method", "gpu_hist");
|
||||
} else {
|
||||
learner->SetParam("updater", updater_name);
|
||||
}
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
learner->SetParam("updater", updater_name);
|
||||
learner->SetParam("multi_strategy", strategy);
|
||||
learner->SetParam("grow_policy", grow_policy);
|
||||
learner->SetParam("subsample", std::to_string(subsample));
|
||||
@@ -65,54 +61,62 @@ class TestPredictionCache : public ::testing::Test {
|
||||
}
|
||||
}
|
||||
|
||||
void RunTest(std::string const& updater_name, std::string const& strategy) {
|
||||
void RunTest(Context* ctx, std::string const& updater_name, std::string const& strategy) {
|
||||
{
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{{"nthread", "8"}});
|
||||
if (updater_name == "grow_gpu_hist") {
|
||||
ctx.gpu_id = 0;
|
||||
} else {
|
||||
ctx.gpu_id = Context::kCpuId;
|
||||
}
|
||||
ctx->InitAllowUnknown(Args{{"nthread", "8"}});
|
||||
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(updater_name, &ctx, &task)};
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(updater_name, ctx, &task)};
|
||||
RegTree tree;
|
||||
std::vector<RegTree *> trees{&tree};
|
||||
std::vector<RegTree*> trees{&tree};
|
||||
auto gpair = GenerateRandomGradients(n_samples_);
|
||||
tree::TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{{"max_bin", "64"}});
|
||||
|
||||
updater->Configure(Args{});
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
updater->Update(¶m, &gpair, Xy_.get(), position, trees);
|
||||
HostDeviceVector<float> out_prediction_cached;
|
||||
out_prediction_cached.SetDevice(ctx.gpu_id);
|
||||
out_prediction_cached.SetDevice(ctx->Device());
|
||||
out_prediction_cached.Resize(n_samples_);
|
||||
auto cache =
|
||||
linalg::MakeTensorView(&ctx, &out_prediction_cached, out_prediction_cached.Size(), 1);
|
||||
linalg::MakeTensorView(ctx, &out_prediction_cached, out_prediction_cached.Size(), 1);
|
||||
ASSERT_TRUE(updater->UpdatePredictionCache(Xy_.get(), cache));
|
||||
}
|
||||
|
||||
for (auto policy : {"depthwise", "lossguide"}) {
|
||||
for (auto subsample : {1.0f, 0.4f}) {
|
||||
this->RunLearnerTest(updater_name, subsample, policy, strategy);
|
||||
this->RunLearnerTest(updater_name, subsample, policy, strategy);
|
||||
this->RunLearnerTest(ctx, updater_name, subsample, policy, strategy);
|
||||
this->RunLearnerTest(ctx, updater_name, subsample, policy, strategy);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(TestPredictionCache, Approx) { this->RunTest("grow_histmaker", "one_output_per_tree"); }
|
||||
TEST_F(TestPredictionCache, Approx) {
|
||||
Context ctx;
|
||||
this->RunTest(&ctx, "grow_histmaker", "one_output_per_tree");
|
||||
}
|
||||
|
||||
TEST_F(TestPredictionCache, Hist) {
|
||||
this->RunTest("grow_quantile_histmaker", "one_output_per_tree");
|
||||
Context ctx;
|
||||
this->RunTest(&ctx, "grow_quantile_histmaker", "one_output_per_tree");
|
||||
}
|
||||
|
||||
TEST_F(TestPredictionCache, HistMulti) {
|
||||
this->RunTest("grow_quantile_histmaker", "multi_output_tree");
|
||||
Context ctx;
|
||||
this->RunTest(&ctx, "grow_quantile_histmaker", "multi_output_tree");
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
TEST_F(TestPredictionCache, GpuHist) { this->RunTest("grow_gpu_hist", "one_output_per_tree"); }
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
TEST_F(TestPredictionCache, GpuHist) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
this->RunTest(&ctx, "grow_gpu_hist", "one_output_per_tree");
|
||||
}
|
||||
|
||||
TEST_F(TestPredictionCache, GpuApprox) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
this->RunTest(&ctx, "grow_gpu_approx", "one_output_per_tree");
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
#include "../../../src/tree/common_row_partitioner.h"
|
||||
#include "../../../src/tree/hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
|
||||
#include "../../../src/tree/param.h"
|
||||
#include "../../../src/tree/split_evaluator.h"
|
||||
#include "../helpers.h"
|
||||
#include "test_partitioner.h"
|
||||
#include "xgboost/data.h"
|
||||
@@ -49,7 +48,7 @@ void TestPartitioner(bst_target_t n_targets) {
|
||||
auto min_value = gmat.cut.MinValues()[split_ind];
|
||||
RegTree tree{n_targets, n_features};
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
|
||||
if constexpr (std::is_same_v<ExpandEntry, CPUExpandEntry>) {
|
||||
GetSplit(&tree, min_value, &candidates);
|
||||
} else {
|
||||
GetMultiSplitForTest(&tree, min_value, &candidates);
|
||||
@@ -217,6 +216,7 @@ void VerifyColumnSplit(bst_row_t rows, bst_feature_t cols, bst_target_t n_target
|
||||
RegTree tree{n_targets, cols};
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
updater->Configure(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), sliced.get(), position, {&tree});
|
||||
|
||||
Json json{Object{}};
|
||||
@@ -241,6 +241,7 @@ void TestColumnSplit(bst_target_t n_targets) {
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
updater->Configure(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), Xy.get(), position, {&expected_tree});
|
||||
}
|
||||
|
||||
|
||||
@@ -62,8 +62,10 @@ class RegenTest : public ::testing::Test {
|
||||
auto constexpr Iter() const { return 4; }
|
||||
|
||||
template <typename Page>
|
||||
size_t TestTreeMethod(std::string tree_method, std::string obj, bool reset = true) const {
|
||||
size_t TestTreeMethod(Context const* ctx, std::string tree_method, std::string obj,
|
||||
bool reset = true) const {
|
||||
auto learner = std::unique_ptr<Learner>{Learner::Create({p_fmat_})};
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
learner->SetParam("tree_method", tree_method);
|
||||
learner->SetParam("objective", obj);
|
||||
learner->Configure();
|
||||
@@ -87,40 +89,71 @@ class RegenTest : public ::testing::Test {
|
||||
} // anonymous namespace
|
||||
|
||||
TEST_F(RegenTest, Approx) {
|
||||
auto n = this->TestTreeMethod<GHistIndexMatrix>("approx", "reg:squarederror");
|
||||
Context ctx;
|
||||
auto n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "approx", "reg:squarederror");
|
||||
ASSERT_EQ(n, 1);
|
||||
n = this->TestTreeMethod<GHistIndexMatrix>("approx", "reg:logistic");
|
||||
n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "approx", "reg:logistic");
|
||||
ASSERT_EQ(n, this->Iter());
|
||||
}
|
||||
|
||||
TEST_F(RegenTest, Hist) {
|
||||
auto n = this->TestTreeMethod<GHistIndexMatrix>("hist", "reg:squarederror");
|
||||
Context ctx;
|
||||
auto n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "hist", "reg:squarederror");
|
||||
ASSERT_EQ(n, 1);
|
||||
n = this->TestTreeMethod<GHistIndexMatrix>("hist", "reg:logistic");
|
||||
n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "hist", "reg:logistic");
|
||||
ASSERT_EQ(n, 1);
|
||||
}
|
||||
|
||||
TEST_F(RegenTest, Mixed) {
|
||||
auto n = this->TestTreeMethod<GHistIndexMatrix>("hist", "reg:squarederror", false);
|
||||
Context ctx;
|
||||
auto n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "hist", "reg:squarederror", false);
|
||||
ASSERT_EQ(n, 1);
|
||||
n = this->TestTreeMethod<GHistIndexMatrix>("approx", "reg:logistic", true);
|
||||
n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "approx", "reg:logistic", true);
|
||||
ASSERT_EQ(n, this->Iter() + 1);
|
||||
|
||||
n = this->TestTreeMethod<GHistIndexMatrix>("approx", "reg:logistic", false);
|
||||
n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "approx", "reg:logistic", false);
|
||||
ASSERT_EQ(n, this->Iter());
|
||||
n = this->TestTreeMethod<GHistIndexMatrix>("hist", "reg:squarederror", true);
|
||||
n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "hist", "reg:squarederror", true);
|
||||
ASSERT_EQ(n, this->Iter() + 1);
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
TEST_F(RegenTest, GpuHist) {
|
||||
auto n = this->TestTreeMethod<EllpackPage>("gpu_hist", "reg:squarederror");
|
||||
TEST_F(RegenTest, GpuApprox) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto n = this->TestTreeMethod<EllpackPage>(&ctx, "approx", "reg:squarederror", true);
|
||||
ASSERT_EQ(n, 1);
|
||||
n = this->TestTreeMethod<EllpackPage>("gpu_hist", "reg:logistic", false);
|
||||
n = this->TestTreeMethod<EllpackPage>(&ctx, "approx", "reg:logistic", false);
|
||||
ASSERT_EQ(n, this->Iter());
|
||||
|
||||
n = this->TestTreeMethod<EllpackPage>(&ctx, "approx", "reg:logistic", true);
|
||||
ASSERT_EQ(n, this->Iter() * 2);
|
||||
}
|
||||
|
||||
TEST_F(RegenTest, GpuHist) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto n = this->TestTreeMethod<EllpackPage>(&ctx, "hist", "reg:squarederror", true);
|
||||
ASSERT_EQ(n, 1);
|
||||
n = this->TestTreeMethod<EllpackPage>(&ctx, "hist", "reg:logistic", false);
|
||||
ASSERT_EQ(n, 1);
|
||||
|
||||
n = this->TestTreeMethod<EllpackPage>("hist", "reg:logistic");
|
||||
ASSERT_EQ(n, 2);
|
||||
{
|
||||
Context ctx;
|
||||
n = this->TestTreeMethod<EllpackPage>(&ctx, "hist", "reg:logistic");
|
||||
ASSERT_EQ(n, 2);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RegenTest, GpuMixed) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto n = this->TestTreeMethod<EllpackPage>(&ctx, "hist", "reg:squarederror", false);
|
||||
ASSERT_EQ(n, 1);
|
||||
n = this->TestTreeMethod<EllpackPage>(&ctx, "approx", "reg:logistic", true);
|
||||
ASSERT_EQ(n, this->Iter() + 1);
|
||||
|
||||
n = this->TestTreeMethod<EllpackPage>(&ctx, "approx", "reg:logistic", false);
|
||||
ASSERT_EQ(n, this->Iter());
|
||||
n = this->TestTreeMethod<EllpackPage>(&ctx, "hist", "reg:squarederror", true);
|
||||
ASSERT_EQ(n, this->Iter() + 1);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -20,10 +20,11 @@ class TestGrowPolicy : public ::testing::Test {
|
||||
true);
|
||||
}
|
||||
|
||||
std::unique_ptr<Learner> TrainOneIter(std::string tree_method, std::string policy,
|
||||
int32_t max_leaves, int32_t max_depth) {
|
||||
std::unique_ptr<Learner> TrainOneIter(Context const* ctx, std::string tree_method,
|
||||
std::string policy, int32_t max_leaves, int32_t max_depth) {
|
||||
std::unique_ptr<Learner> learner{Learner::Create({this->Xy_})};
|
||||
learner->SetParam("tree_method", tree_method);
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
if (max_leaves >= 0) {
|
||||
learner->SetParam("max_leaves", std::to_string(max_leaves));
|
||||
}
|
||||
@@ -63,7 +64,7 @@ class TestGrowPolicy : public ::testing::Test {
|
||||
|
||||
if (max_leaves == 0 && max_depth == 0) {
|
||||
// unconstrainted
|
||||
if (tree_method != "gpu_hist") {
|
||||
if (ctx->IsCPU()) {
|
||||
// GPU pre-allocates for all nodes.
|
||||
learner->UpdateOneIter(0, Xy_);
|
||||
}
|
||||
@@ -86,23 +87,23 @@ class TestGrowPolicy : public ::testing::Test {
|
||||
return learner;
|
||||
}
|
||||
|
||||
void TestCombination(std::string tree_method) {
|
||||
void TestCombination(Context const* ctx, std::string tree_method) {
|
||||
for (auto policy : {"depthwise", "lossguide"}) {
|
||||
// -1 means default
|
||||
for (auto leaves : {-1, 0, 3}) {
|
||||
for (auto depth : {-1, 0, 3}) {
|
||||
this->TrainOneIter(tree_method, policy, leaves, depth);
|
||||
this->TrainOneIter(ctx, tree_method, policy, leaves, depth);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TestTreeGrowPolicy(std::string tree_method, std::string policy) {
|
||||
void TestTreeGrowPolicy(Context const* ctx, std::string tree_method, std::string policy) {
|
||||
{
|
||||
/**
|
||||
* max_leaves
|
||||
*/
|
||||
auto learner = this->TrainOneIter(tree_method, policy, 16, -1);
|
||||
auto learner = this->TrainOneIter(ctx, tree_method, policy, 16, -1);
|
||||
Json model{Object{}};
|
||||
learner->SaveModel(&model);
|
||||
|
||||
@@ -115,7 +116,7 @@ class TestGrowPolicy : public ::testing::Test {
|
||||
/**
|
||||
* max_depth
|
||||
*/
|
||||
auto learner = this->TrainOneIter(tree_method, policy, -1, 3);
|
||||
auto learner = this->TrainOneIter(ctx, tree_method, policy, -1, 3);
|
||||
Json model{Object{}};
|
||||
learner->SaveModel(&model);
|
||||
|
||||
@@ -133,25 +134,36 @@ class TestGrowPolicy : public ::testing::Test {
|
||||
};
|
||||
|
||||
TEST_F(TestGrowPolicy, Approx) {
|
||||
this->TestTreeGrowPolicy("approx", "depthwise");
|
||||
this->TestTreeGrowPolicy("approx", "lossguide");
|
||||
Context ctx;
|
||||
this->TestTreeGrowPolicy(&ctx, "approx", "depthwise");
|
||||
this->TestTreeGrowPolicy(&ctx, "approx", "lossguide");
|
||||
|
||||
this->TestCombination("approx");
|
||||
this->TestCombination(&ctx, "approx");
|
||||
}
|
||||
|
||||
TEST_F(TestGrowPolicy, Hist) {
|
||||
this->TestTreeGrowPolicy("hist", "depthwise");
|
||||
this->TestTreeGrowPolicy("hist", "lossguide");
|
||||
Context ctx;
|
||||
this->TestTreeGrowPolicy(&ctx, "hist", "depthwise");
|
||||
this->TestTreeGrowPolicy(&ctx, "hist", "lossguide");
|
||||
|
||||
this->TestCombination("hist");
|
||||
this->TestCombination(&ctx, "hist");
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
TEST_F(TestGrowPolicy, GpuHist) {
|
||||
this->TestTreeGrowPolicy("gpu_hist", "depthwise");
|
||||
this->TestTreeGrowPolicy("gpu_hist", "lossguide");
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
this->TestTreeGrowPolicy(&ctx, "hist", "depthwise");
|
||||
this->TestTreeGrowPolicy(&ctx, "hist", "lossguide");
|
||||
|
||||
this->TestCombination("gpu_hist");
|
||||
this->TestCombination(&ctx, "hist");
|
||||
}
|
||||
|
||||
TEST_F(TestGrowPolicy, GpuApprox) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
this->TestTreeGrowPolicy(&ctx, "approx", "depthwise");
|
||||
this->TestTreeGrowPolicy(&ctx, "approx", "lossguide");
|
||||
|
||||
this->TestCombination(&ctx, "approx");
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -135,7 +135,7 @@ class TestMinSplitLoss : public ::testing::Test {
|
||||
gpair_ = GenerateRandomGradients(kRows);
|
||||
}
|
||||
|
||||
std::int32_t Update(std::string updater, float gamma) {
|
||||
std::int32_t Update(Context const* ctx, std::string updater, float gamma) {
|
||||
Args args{{"max_depth", "1"},
|
||||
{"max_leaves", "0"},
|
||||
|
||||
@@ -154,8 +154,7 @@ class TestMinSplitLoss : public ::testing::Test {
|
||||
param.UpdateAllowUnknown(args);
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
|
||||
Context ctx{MakeCUDACtx(updater == "grow_gpu_hist" ? 0 : Context::kCpuId)};
|
||||
auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
|
||||
auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, ctx, &task)};
|
||||
up->Configure({});
|
||||
|
||||
RegTree tree;
|
||||
@@ -167,16 +166,16 @@ class TestMinSplitLoss : public ::testing::Test {
|
||||
}
|
||||
|
||||
public:
|
||||
void RunTest(std::string updater) {
|
||||
void RunTest(Context const* ctx, std::string updater) {
|
||||
{
|
||||
int32_t n_nodes = Update(updater, 0.01);
|
||||
int32_t n_nodes = Update(ctx, updater, 0.01);
|
||||
// This is not strictly verified, meaning the numeber `2` is whatever GPU_Hist retured
|
||||
// when writing this test, and only used for testing larger gamma (below) does prevent
|
||||
// building tree.
|
||||
ASSERT_EQ(n_nodes, 2);
|
||||
}
|
||||
{
|
||||
int32_t n_nodes = Update(updater, 100.0);
|
||||
int32_t n_nodes = Update(ctx, updater, 100.0);
|
||||
// No new nodes with gamma == 100.
|
||||
ASSERT_EQ(n_nodes, static_cast<decltype(n_nodes)>(0));
|
||||
}
|
||||
@@ -185,10 +184,25 @@ class TestMinSplitLoss : public ::testing::Test {
|
||||
|
||||
/* Exact tree method requires a pruner as an additional updater, so not tested here. */
|
||||
|
||||
TEST_F(TestMinSplitLoss, Approx) { this->RunTest("grow_histmaker"); }
|
||||
TEST_F(TestMinSplitLoss, Approx) {
|
||||
Context ctx;
|
||||
this->RunTest(&ctx, "grow_histmaker");
|
||||
}
|
||||
|
||||
TEST_F(TestMinSplitLoss, Hist) {
|
||||
Context ctx;
|
||||
this->RunTest(&ctx, "grow_quantile_histmaker");
|
||||
}
|
||||
|
||||
TEST_F(TestMinSplitLoss, Hist) { this->RunTest("grow_quantile_histmaker"); }
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
TEST_F(TestMinSplitLoss, GpuHist) { this->RunTest("grow_gpu_hist"); }
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
TEST_F(TestMinSplitLoss, GpuHist) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
this->RunTest(&ctx, "grow_gpu_hist");
|
||||
}
|
||||
|
||||
TEST_F(TestMinSplitLoss, GpuApprox) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
this->RunTest(&ctx, "grow_gpu_approx");
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} // namespace xgboost
|
||||
|
||||
Reference in New Issue
Block a user