Merge branch 'master' into dev-hui

This commit is contained in:
amdsc21
2023-03-08 00:39:33 +01:00
221 changed files with 3122 additions and 1486 deletions

View File

@@ -267,7 +267,7 @@ TEST(CAPI, DMatrixSetFeatureName) {
}
char const* feat_types [] {"i", "q"};
static_assert(sizeof(feat_types)/ sizeof(feat_types[0]) == kCols, "");
static_assert(sizeof(feat_types) / sizeof(feat_types[0]) == kCols);
XGDMatrixSetStrFeatureInfo(handle, "feature_type", feat_types, kCols);
char const **c_out_types;
XGDMatrixGetStrFeatureInfo(handle, u8"feature_type", &out_len,

View File

@@ -0,0 +1,35 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/context.h> // Context
#include <xgboost/span.h>
#include <algorithm> // is_sorted
#include "../../../src/common/algorithm.h"
namespace xgboost {
namespace common {
TEST(Algorithm, ArgSort) {
Context ctx;
std::vector<float> inputs{3.0, 2.0, 1.0};
auto ret = ArgSort<bst_feature_t>(&ctx, inputs.cbegin(), inputs.cend());
std::vector<bst_feature_t> sol{2, 1, 0};
ASSERT_EQ(ret, sol);
}
TEST(Algorithm, Sort) {
Context ctx;
ctx.Init(Args{{"nthread", "8"}});
std::vector<float> inputs{3.0, 1.0, 2.0};
Sort(&ctx, inputs.begin(), inputs.end(), std::less<>{});
ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend()));
inputs = {3.0, 1.0, 2.0};
StableSort(&ctx, inputs.begin(), inputs.end(), std::less<>{});
ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend()));
}
} // namespace common
} // namespace xgboost

View File

@@ -52,9 +52,9 @@ void TestSegmentedArgSort() {
}
}
TEST(Algorithms, SegmentedArgSort) { TestSegmentedArgSort(); }
TEST(Algorithm, SegmentedArgSort) { TestSegmentedArgSort(); }
TEST(Algorithms, ArgSort) {
TEST(Algorithm, GpuArgSort) {
Context ctx;
ctx.gpu_id = 0;
@@ -80,7 +80,7 @@ TEST(Algorithms, ArgSort) {
thrust::is_sorted(sorted_idx.begin() + 10, sorted_idx.end(), thrust::greater<size_t>{}));
}
TEST(Algorithms, SegmentedSequence) {
TEST(Algorithm, SegmentedSequence) {
dh::device_vector<std::size_t> idx(16);
dh::device_vector<std::size_t> ptr(3);
Context ctx = CreateEmptyGenericParam(0);

View File

@@ -128,7 +128,7 @@ TEST(Ryu, Regression) {
TestRyu("2E2", 200.0f);
TestRyu("3.3554432E7", 3.3554432E7f);
static_assert(1.1920929E-7f == std::numeric_limits<float>::epsilon(), "");
static_assert(1.1920929E-7f == std::numeric_limits<float>::epsilon());
TestRyu("1.1920929E-7", std::numeric_limits<float>::epsilon());
}

View File

@@ -1,14 +0,0 @@
#include <gtest/gtest.h>
#include <xgboost/span.h>
#include "../../../src/common/common.h"
namespace xgboost {
namespace common {
TEST(ArgSort, Basic) {
std::vector<float> inputs {3.0, 2.0, 1.0};
auto ret = ArgSort<bst_feature_t>(Span<float>{inputs});
std::vector<bst_feature_t> sol{2, 1, 0};
ASSERT_EQ(ret, sol);
}
} // namespace common
} // namespace xgboost

View File

@@ -43,8 +43,8 @@ TEST(GroupData, ParallelGroupBuilder) {
builder2.Push(2, Entry(0, 4), 0);
builder2.Push(2, Entry(1, 5), 0);
expected_data.emplace_back(Entry(0, 4));
expected_data.emplace_back(Entry(1, 5));
expected_data.emplace_back(0, 4);
expected_data.emplace_back(1, 5);
expected_offsets.emplace_back(6);
EXPECT_EQ(data, expected_data);

View File

@@ -143,7 +143,7 @@ void TestMixedSketch() {
size_t n_samples = 1000, n_features = 2, n_categories = 3;
std::vector<float> data(n_samples * n_features);
SimpleLCG gen;
SimpleRealUniformDistribution<float> cat_d{0.0f, float(n_categories)};
SimpleRealUniformDistribution<float> cat_d{0.0f, static_cast<float>(n_categories)};
SimpleRealUniformDistribution<float> num_d{0.0f, 3.0f};
for (size_t i = 0; i < n_samples * n_features; ++i) {
if (i % 2 == 0) {

View File

@@ -13,9 +13,9 @@ class NotCopyConstructible {
NotCopyConstructible(NotCopyConstructible&& that) = default;
};
static_assert(
!std::is_trivially_copy_constructible<NotCopyConstructible>::value, "");
!std::is_trivially_copy_constructible<NotCopyConstructible>::value);
static_assert(
!std::is_trivially_copy_assignable<NotCopyConstructible>::value, "");
!std::is_trivially_copy_assignable<NotCopyConstructible>::value);
class ForIntrusivePtrTest {
public:

View File

@@ -1,22 +1,23 @@
/*!
* Copyright 2021 by XGBoost Contributors
/**
* Copyright 2021-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/context.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/linalg.h>
#include <numeric>
#include <cstddef> // size_t
#include <numeric> // iota
#include <vector>
#include "../../../src/common/linalg_op.h"
namespace xgboost {
namespace linalg {
namespace xgboost::linalg {
namespace {
auto kCpuId = Context::kCpuId;
}
auto MakeMatrixFromTest(HostDeviceVector<float> *storage, size_t n_rows, size_t n_cols) {
auto MakeMatrixFromTest(HostDeviceVector<float> *storage, std::size_t n_rows, std::size_t n_cols) {
storage->Resize(n_rows * n_cols);
auto &h_storage = storage->HostVector();
@@ -48,10 +49,11 @@ TEST(Linalg, VectorView) {
}
TEST(Linalg, TensorView) {
Context ctx;
std::vector<double> data(2 * 3 * 4, 0);
std::iota(data.begin(), data.end(), 0);
auto t = MakeTensorView(data, {2, 3, 4}, -1);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
ASSERT_EQ(t.Shape()[0], 2);
ASSERT_EQ(t.Shape()[1], 3);
ASSERT_EQ(t.Shape()[2], 4);
@@ -106,12 +108,12 @@ TEST(Linalg, TensorView) {
{
// Don't assign the initial dimension, tensor should be able to deduce the correct dim
// for Slice.
auto t = MakeTensorView(data, {2, 3, 4}, 0);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
auto s = t.Slice(1, 2, All());
static_assert(decltype(s)::kDimension == 1, "");
static_assert(decltype(s)::kDimension == 1);
}
{
auto t = MakeTensorView(data, {2, 3, 4}, 0);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
auto s = t.Slice(1, linalg::All(), 1);
ASSERT_EQ(s(0), 13);
ASSERT_EQ(s(1), 17);
@@ -119,9 +121,9 @@ TEST(Linalg, TensorView) {
}
{
// range slice
auto t = MakeTensorView(data, {2, 3, 4}, 0);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
auto s = t.Slice(linalg::All(), linalg::Range(1, 3), 2);
static_assert(decltype(s)::kDimension == 2, "");
static_assert(decltype(s)::kDimension == 2);
std::vector<double> sol{6, 10, 18, 22};
auto k = 0;
for (size_t i = 0; i < s.Shape(0); ++i) {
@@ -134,9 +136,9 @@ TEST(Linalg, TensorView) {
}
{
// range slice
auto t = MakeTensorView(data, {2, 3, 4}, 0);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
auto s = t.Slice(1, linalg::Range(1, 3), linalg::Range(1, 3));
static_assert(decltype(s)::kDimension == 2, "");
static_assert(decltype(s)::kDimension == 2);
std::vector<double> sol{17, 18, 21, 22};
auto k = 0;
for (size_t i = 0; i < s.Shape(0); ++i) {
@@ -149,9 +151,9 @@ TEST(Linalg, TensorView) {
}
{
// same as no slice.
auto t = MakeTensorView(data, {2, 3, 4}, 0);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4));
static_assert(decltype(s)::kDimension == 3, "");
static_assert(decltype(s)::kDimension == 3);
auto all = t.Slice(linalg::All(), linalg::All(), linalg::All());
for (size_t i = 0; i < s.Shape(0); ++i) {
for (size_t j = 0; j < s.Shape(1); ++j) {
@@ -166,7 +168,7 @@ TEST(Linalg, TensorView) {
{
// copy and move constructor.
auto t = MakeTensorView(data, {2, 3, 4}, kCpuId);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
auto from_copy = t;
auto from_move = std::move(t);
for (size_t i = 0; i < t.Shape().size(); ++i) {
@@ -177,7 +179,7 @@ TEST(Linalg, TensorView) {
{
// multiple slices
auto t = MakeTensorView(data, {2, 3, 4}, kCpuId);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
auto s_0 = t.Slice(linalg::All(), linalg::Range(0, 2), linalg::Range(1, 4));
ASSERT_FALSE(s_0.CContiguous());
auto s_1 = s_0.Slice(1, 1, linalg::Range(0, 2));
@@ -208,7 +210,7 @@ TEST(Linalg, TensorView) {
TEST(Linalg, Tensor) {
{
Tensor<float, 3> t{{2, 3, 4}, kCpuId};
Tensor<float, 3> t{{2, 3, 4}, kCpuId, Order::kC};
auto view = t.View(kCpuId);
auto const &as_const = t;
@@ -227,7 +229,7 @@ TEST(Linalg, Tensor) {
}
{
// Reshape
Tensor<float, 3> t{{2, 3, 4}, kCpuId};
Tensor<float, 3> t{{2, 3, 4}, kCpuId, Order::kC};
t.Reshape(4, 3, 2);
ASSERT_EQ(t.Size(), 24);
ASSERT_EQ(t.Shape(2), 2);
@@ -245,7 +247,7 @@ TEST(Linalg, Tensor) {
TEST(Linalg, Empty) {
{
auto t = TensorView<double, 2>{{}, {0, 3}, kCpuId};
auto t = TensorView<double, 2>{{}, {0, 3}, kCpuId, Order::kC};
for (int32_t i : {0, 1, 2}) {
auto s = t.Slice(All(), i);
ASSERT_EQ(s.Size(), 0);
@@ -254,7 +256,7 @@ TEST(Linalg, Empty) {
}
}
{
auto t = Tensor<double, 2>{{0, 3}, kCpuId};
auto t = Tensor<double, 2>{{0, 3}, kCpuId, Order::kC};
ASSERT_EQ(t.Size(), 0);
auto view = t.View(kCpuId);
@@ -269,7 +271,7 @@ TEST(Linalg, Empty) {
TEST(Linalg, ArrayInterface) {
auto cpu = kCpuId;
auto t = Tensor<double, 2>{{3, 3}, cpu};
auto t = Tensor<double, 2>{{3, 3}, cpu, Order::kC};
auto v = t.View(cpu);
std::iota(v.Values().begin(), v.Values().end(), 0);
auto arr = Json::Load(StringView{ArrayInterfaceStr(v)});
@@ -313,21 +315,48 @@ TEST(Linalg, Popc) {
}
TEST(Linalg, Stack) {
Tensor<float, 3> l{{2, 3, 4}, kCpuId};
Tensor<float, 3> l{{2, 3, 4}, kCpuId, Order::kC};
ElementWiseTransformHost(l.View(kCpuId), omp_get_max_threads(),
[=](size_t i, float) { return i; });
Tensor<float, 3> r_0{{2, 3, 4}, kCpuId};
Tensor<float, 3> r_0{{2, 3, 4}, kCpuId, Order::kC};
ElementWiseTransformHost(r_0.View(kCpuId), omp_get_max_threads(),
[=](size_t i, float) { return i; });
Stack(&l, r_0);
Tensor<float, 3> r_1{{0, 3, 4}, kCpuId};
Tensor<float, 3> r_1{{0, 3, 4}, kCpuId, Order::kC};
Stack(&l, r_1);
ASSERT_EQ(l.Shape(0), 4);
Stack(&r_1, l);
ASSERT_EQ(r_1.Shape(0), l.Shape(0));
}
} // namespace linalg
} // namespace xgboost
TEST(Linalg, FOrder) {
std::size_t constexpr kRows = 16, kCols = 3;
std::vector<float> data(kRows * kCols);
MatrixView<float> mat{data, {kRows, kCols}, Context::kCpuId, Order::kF};
float k{0};
for (std::size_t i = 0; i < kRows; ++i) {
for (std::size_t j = 0; j < kCols; ++j) {
mat(i, j) = k;
k++;
}
}
auto column = mat.Slice(linalg::All(), 1);
ASSERT_TRUE(column.FContiguous());
ASSERT_EQ(column.Stride(0), 1);
ASSERT_TRUE(column.CContiguous());
k = 1;
for (auto it = linalg::cbegin(column); it != linalg::cend(column); ++it) {
ASSERT_EQ(*it, k);
k += kCols;
}
k = 1;
auto ptr = column.Values().data();
for (auto it = ptr; it != ptr + kRows; ++it) {
ASSERT_EQ(*it, k);
k += kCols;
}
}
} // namespace xgboost::linalg

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2021-2022 by XGBoost Contributors
/**
* Copyright 2021-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
@@ -7,8 +7,7 @@
#include "xgboost/context.h"
#include "xgboost/linalg.h"
namespace xgboost {
namespace linalg {
namespace xgboost::linalg {
namespace {
void TestElementWiseKernel() {
Tensor<float, 3> l{{2, 3, 4}, 0};
@@ -55,12 +54,14 @@ void TestElementWiseKernel() {
}
void TestSlice() {
Context ctx;
ctx.gpu_id = 1;
thrust::device_vector<double> data(2 * 3 * 4);
auto t = MakeTensorView(dh::ToSpan(data), {2, 3, 4}, 0);
auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4);
dh::LaunchN(1, [=] __device__(size_t) {
auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4));
auto all = t.Slice(linalg::All(), linalg::All(), linalg::All());
static_assert(decltype(s)::kDimension == 3, "");
static_assert(decltype(s)::kDimension == 3);
for (size_t i = 0; i < s.Shape(0); ++i) {
for (size_t j = 0; j < s.Shape(1); ++j) {
for (size_t k = 0; k < s.Shape(2); ++k) {
@@ -75,5 +76,4 @@ void TestSlice() {
TEST(Linalg, GPUElementWise) { TestElementWiseKernel(); }
TEST(Linalg, GPUTensorView) { TestSlice(); }
} // namespace linalg
} // namespace xgboost
} // namespace xgboost::linalg

View File

@@ -2,16 +2,18 @@
#include "../../../src/common/random.h"
#include "../helpers.h"
#include "gtest/gtest.h"
#include "xgboost/context.h" // Context
namespace xgboost {
namespace common {
TEST(ColumnSampler, Test) {
Context ctx;
int n = 128;
ColumnSampler cs;
std::vector<float> feature_weights;
// No node sampling
cs.Init(n, feature_weights, 1.0f, 0.5f, 0.5f);
cs.Init(&ctx, n, feature_weights, 1.0f, 0.5f, 0.5f);
auto set0 = cs.GetFeatureSet(0);
ASSERT_EQ(set0->Size(), 32);
@@ -24,7 +26,7 @@ TEST(ColumnSampler, Test) {
ASSERT_EQ(set2->Size(), 32);
// Node sampling
cs.Init(n, feature_weights, 0.5f, 1.0f, 0.5f);
cs.Init(&ctx, n, feature_weights, 0.5f, 1.0f, 0.5f);
auto set3 = cs.GetFeatureSet(0);
ASSERT_EQ(set3->Size(), 32);
@@ -34,24 +36,25 @@ TEST(ColumnSampler, Test) {
ASSERT_EQ(set4->Size(), 32);
// No level or node sampling, should be the same at different depth
cs.Init(n, feature_weights, 1.0f, 1.0f, 0.5f);
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 0.5f);
ASSERT_EQ(cs.GetFeatureSet(0)->HostVector(),
cs.GetFeatureSet(1)->HostVector());
cs.Init(n, feature_weights, 1.0f, 1.0f, 1.0f);
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
auto set5 = cs.GetFeatureSet(0);
ASSERT_EQ(set5->Size(), n);
cs.Init(n, feature_weights, 1.0f, 1.0f, 1.0f);
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
auto set6 = cs.GetFeatureSet(0);
ASSERT_EQ(set5->HostVector(), set6->HostVector());
// Should always be a minimum of one feature
cs.Init(n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
cs.Init(&ctx, n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
ASSERT_EQ(cs.GetFeatureSet(0)->Size(), 1);
}
// Test if different threads using the same seed produce the same result
TEST(ColumnSampler, ThreadSynchronisation) {
Context ctx;
const int64_t num_threads = 100;
int n = 128;
size_t iterations = 10;
@@ -63,7 +66,7 @@ TEST(ColumnSampler, ThreadSynchronisation) {
{
for (auto j = 0ull; j < iterations; j++) {
ColumnSampler cs(j);
cs.Init(n, feature_weights, 0.5f, 0.5f, 0.5f);
cs.Init(&ctx, n, feature_weights, 0.5f, 0.5f, 0.5f);
for (auto level = 0ull; level < levels; level++) {
auto result = cs.GetFeatureSet(level)->ConstHostVector();
#pragma omp single
@@ -80,11 +83,12 @@ TEST(ColumnSampler, ThreadSynchronisation) {
TEST(ColumnSampler, WeightedSampling) {
auto test_basic = [](int first) {
Context ctx;
std::vector<float> feature_weights(2);
feature_weights[0] = std::abs(first - 1.0f);
feature_weights[1] = first - 0.0f;
ColumnSampler cs{0};
cs.Init(2, feature_weights, 1.0, 1.0, 0.5);
cs.Init(&ctx, 2, feature_weights, 1.0, 1.0, 0.5);
auto feature_sets = cs.GetFeatureSet(0);
auto const &h_feat_set = feature_sets->HostVector();
ASSERT_EQ(h_feat_set.size(), 1);
@@ -100,7 +104,8 @@ TEST(ColumnSampler, WeightedSampling) {
SimpleRealUniformDistribution<float> dist(.0f, 12.0f);
std::generate(feature_weights.begin(), feature_weights.end(), [&]() { return dist(&rng); });
ColumnSampler cs{0};
cs.Init(kCols, feature_weights, 0.5f, 1.0f, 1.0f);
Context ctx;
cs.Init(&ctx, kCols, feature_weights, 0.5f, 1.0f, 1.0f);
std::vector<bst_feature_t> features(kCols);
std::iota(features.begin(), features.end(), 0);
std::vector<float> freq(kCols, 0);
@@ -135,7 +140,8 @@ TEST(ColumnSampler, WeightedMultiSampling) {
}
ColumnSampler cs{0};
float bytree{0.5}, bylevel{0.5}, bynode{0.5};
cs.Init(feature_weights.size(), feature_weights, bytree, bylevel, bynode);
Context ctx;
cs.Init(&ctx, feature_weights.size(), feature_weights, bytree, bylevel, bynode);
auto feature_set = cs.GetFeatureSet(0);
size_t n_sampled = kCols * bytree * bylevel * bynode;
ASSERT_EQ(feature_set->Size(), n_sampled);

View File

@@ -522,9 +522,9 @@ TEST(Span, Empty) {
TEST(SpanDeathTest, Empty) {
std::vector<float> data(1, 0);
ASSERT_TRUE(data.data());
Span<float> s{data.data(), Span<float>::index_type(0)}; // ok to define 0 size span.
// ok to define 0 size span.
Span<float> s{data.data(), static_cast<Span<float>::index_type>(0)};
EXPECT_DEATH(s[0], ""); // not ok to use it.
}
} // namespace common
} // namespace xgboost

View File

@@ -11,19 +11,20 @@
namespace xgboost {
namespace common {
TEST(Stats, Quantile) {
Context ctx;
{
linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, Context::kCpuId);
std::vector<size_t> index{0, 2, 3, 4, 6};
auto h_arr = arr.HostView();
auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(index[i]); });
auto end = beg + index.size();
auto q = Quantile(0.40f, beg, end);
auto q = Quantile(&ctx, 0.40f, beg, end);
ASSERT_EQ(q, 26.0);
q = Quantile(0.20f, beg, end);
q = Quantile(&ctx, 0.20f, beg, end);
ASSERT_EQ(q, 16.0);
q = Quantile(0.10f, beg, end);
q = Quantile(&ctx, 0.10f, beg, end);
ASSERT_EQ(q, 15.0);
}
@@ -31,12 +32,13 @@ TEST(Stats, Quantile) {
std::vector<float> vec{1., 2., 3., 4., 5.};
auto beg = MakeIndexTransformIter([&](size_t i) { return vec[i]; });
auto end = beg + vec.size();
auto q = Quantile(0.5f, beg, end);
auto q = Quantile(&ctx, 0.5f, beg, end);
ASSERT_EQ(q, 3.);
}
}
TEST(Stats, WeightedQuantile) {
Context ctx;
linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, Context::kCpuId);
linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, Context::kCpuId);
@@ -47,13 +49,13 @@ TEST(Stats, WeightedQuantile) {
auto end = beg + arr.Size();
auto w = MakeIndexTransformIter([&](size_t i) { return h_weight(i); });
auto q = WeightedQuantile(0.50f, beg, end, w);
auto q = WeightedQuantile(&ctx, 0.50f, beg, end, w);
ASSERT_EQ(q, 3);
q = WeightedQuantile(0.0, beg, end, w);
q = WeightedQuantile(&ctx, 0.0, beg, end, w);
ASSERT_EQ(q, 1);
q = WeightedQuantile(1.0, beg, end, w);
q = WeightedQuantile(&ctx, 1.0, beg, end, w);
ASSERT_EQ(q, 5);
}

View File

@@ -119,13 +119,13 @@ TEST(ArrayInterface, TrivialDim) {
}
TEST(ArrayInterface, ToDType) {
static_assert(ToDType<float>::kType == ArrayInterfaceHandler::kF4, "");
static_assert(ToDType<double>::kType == ArrayInterfaceHandler::kF8, "");
static_assert(ToDType<float>::kType == ArrayInterfaceHandler::kF4);
static_assert(ToDType<double>::kType == ArrayInterfaceHandler::kF8);
static_assert(ToDType<uint32_t>::kType == ArrayInterfaceHandler::kU4, "");
static_assert(ToDType<uint64_t>::kType == ArrayInterfaceHandler::kU8, "");
static_assert(ToDType<uint32_t>::kType == ArrayInterfaceHandler::kU4);
static_assert(ToDType<uint64_t>::kType == ArrayInterfaceHandler::kU8);
static_assert(ToDType<int32_t>::kType == ArrayInterfaceHandler::kI4, "");
static_assert(ToDType<int64_t>::kType == ArrayInterfaceHandler::kI8, "");
static_assert(ToDType<int32_t>::kType == ArrayInterfaceHandler::kI4);
static_assert(ToDType<int64_t>::kType == ArrayInterfaceHandler::kI8);
}
} // namespace xgboost

View File

@@ -21,7 +21,7 @@ TEST(SparsePage, PushCSC) {
offset = {0, 1, 4};
for (size_t i = 0; i < offset.back(); ++i) {
data.emplace_back(Entry(i, 0.1f));
data.emplace_back(i, 0.1f);
}
SparsePage other;

View File

@@ -68,6 +68,30 @@ TEST(GradientIndex, FromCategoricalBasic) {
}
}
TEST(GradientIndex, FromCategoricalLarge) {
size_t constexpr kRows = 1000, kCats = 512, kCols = 1;
bst_bin_t max_bins = 8;
auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats);
auto m = GetDMatrixFromData(x, kRows, 1);
Context ctx;
auto &h_ft = m->Info().feature_types.HostVector();
h_ft.resize(kCols, FeatureType::kCategorical);
BatchParam p{max_bins, 0.8};
{
GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {});
ASSERT_TRUE(gidx.index.GetBinTypeSize() == common::kUint16BinsTypeSize);
}
{
for (auto const &page : m->GetBatches<GHistIndexMatrix>(p)) {
common::HistogramCuts cut = page.cut;
GHistIndexMatrix gidx{m->Info(), std::move(cut), max_bins};
ASSERT_EQ(gidx.MaxNumBinPerFeat(), kCats);
}
}
}
TEST(GradientIndex, PushBatch) {
size_t constexpr kRows = 64, kCols = 4;
bst_bin_t max_bins = 64;

View File

@@ -189,8 +189,8 @@ TEST(SimpleCSRSource, FromColumnarSparse) {
auto& mask = column_bitfields[0];
mask.resize(8);
for (size_t j = 0; j < mask.size(); ++j) {
mask[j] = ~0;
for (auto && j : mask) {
j = ~0;
}
// the 2^th entry of first column is invalid
// [0 0 0 0 0 1 0 0]
@@ -201,8 +201,8 @@ TEST(SimpleCSRSource, FromColumnarSparse) {
auto& mask = column_bitfields[1];
mask.resize(8);
for (size_t j = 0; j < mask.size(); ++j) {
mask[j] = ~0;
for (auto && j : mask) {
j = ~0;
}
// the 19^th entry of second column is invalid
// [~0~], [~0~], [0 0 0 0 1 0 0 0]

View File

@@ -96,7 +96,7 @@ void TestRetainPage() {
// make sure it's const and the caller can not modify the content of page.
for (auto& page : m->GetBatches<Page>()) {
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value, "");
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
}
}

View File

@@ -1,5 +1,6 @@
// Copyright by Contributors
/**
* Copyright 2019-2023 by XGBoost Contributors
*/
#include "../../../src/common/compressed_iterator.h"
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/data/sparse_page_dmatrix.h"
@@ -69,7 +70,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
std::vector<std::shared_ptr<EllpackPage const>> iterators;
for (auto it = begin; it != end; ++it) {
iterators.push_back(it.Page());
gidx_buffers.emplace_back(HostDeviceVector<common::CompressedByteT>{});
gidx_buffers.emplace_back();
gidx_buffers.back().Resize((*it).Impl()->gidx_buffer.Size());
gidx_buffers.back().Copy((*it).Impl()->gidx_buffer);
}
@@ -87,7 +88,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
// make sure it's const and the caller can not modify the content of page.
for (auto& page : m->GetBatches<EllpackPage>({0, 32})) {
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value, "");
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
}
// The above iteration clears out all references inside DMatrix.

View File

@@ -186,7 +186,7 @@ SimpleLCG::StateType SimpleLCG::operator()() {
SimpleLCG::StateType SimpleLCG::Min() const { return min(); }
SimpleLCG::StateType SimpleLCG::Max() const { return max(); }
// Make sure it's compile time constant.
static_assert(SimpleLCG::max() - SimpleLCG::min(), "");
static_assert(SimpleLCG::max() - SimpleLCG::min());
void RandomDataGenerator::GenerateDense(HostDeviceVector<float> *out) const {
xgboost::SimpleRealUniformDistribution<bst_float> dist(lower_, upper_);

View File

@@ -46,7 +46,7 @@ class GradientBooster;
template <typename Float>
Float RelError(Float l, Float r) {
static_assert(std::is_floating_point<Float>::value, "");
static_assert(std::is_floating_point<Float>::value);
return std::abs(1.0f - l / r);
}
@@ -164,7 +164,7 @@ class SimpleRealUniformDistribution {
ResultT sum_value = 0, r_k = 1;
for (size_t k = m; k != 0; --k) {
sum_value += ResultT((*rng)() - rng->Min()) * r_k;
sum_value += static_cast<ResultT>((*rng)() - rng->Min()) * r_k;
r_k *= r;
}
@@ -191,12 +191,10 @@ Json GetArrayInterface(HostDeviceVector<T> *storage, size_t rows, size_t cols) {
Json array_interface{Object()};
array_interface["data"] = std::vector<Json>(2);
if (storage->DeviceCanRead()) {
array_interface["data"][0] =
Integer(reinterpret_cast<int64_t>(storage->ConstDevicePointer()));
array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(storage->ConstDevicePointer())};
array_interface["stream"] = nullptr;
} else {
array_interface["data"][0] =
Integer(reinterpret_cast<int64_t>(storage->ConstHostPointer()));
array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(storage->ConstHostPointer())};
}
array_interface["data"][1] = Boolean(false);

View File

@@ -1,4 +1,6 @@
// Copyright by Contributors
/**
* Copyright 2016-2023 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/context.h>
#include <xgboost/objective.h>
@@ -25,11 +27,14 @@ TEST(Objective, PredTransform) {
tparam.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
size_t n = 100;
for (const auto &entry :
::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) {
std::unique_ptr<xgboost::ObjFunction> obj{
xgboost::ObjFunction::Create(entry->name, &tparam)};
obj->Configure(Args{{"num_class", "2"}});
for (const auto& entry : ::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) {
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create(entry->name, &tparam)};
if (entry->name.find("multi") != std::string::npos) {
obj->Configure(Args{{"num_class", "2"}});
}
if (entry->name.find("quantile") != std::string::npos) {
obj->Configure(Args{{"quantile_alpha", "0.5"}});
}
HostDeviceVector<float> predts;
predts.Resize(n, 3.14f); // prediction is performed on host.
ASSERT_FALSE(predts.DeviceCanRead());

View File

@@ -0,0 +1,74 @@
/**
* Copyright 2023 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h> // Args
#include <xgboost/context.h> // Context
#include <xgboost/objective.h> // ObjFunction
#include <xgboost/span.h> // Span
#include <memory> // std::unique_ptr
#include <vector> // std::vector
#include "../helpers.h" // CheckConfigReload,CreateEmptyGenericParam,DeclareUnifiedTest
namespace xgboost {
TEST(Objective, DeclareUnifiedTest(Quantile)) {
Context ctx = CreateEmptyGenericParam(GPUIDX);
{
Args args{{"quantile_alpha", "[0.6, 0.8]"}};
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:quantileerror", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "reg:quantileerror");
}
Args args{{"quantile_alpha", "0.6"}};
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:quantileerror", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "reg:quantileerror");
std::vector<float> predts{1.0f, 2.0f, 3.0f};
std::vector<float> labels{3.0f, 2.0f, 1.0f};
std::vector<float> weights{1.0f, 1.0f, 1.0f};
std::vector<float> grad{-0.6f, 0.4f, 0.4f};
std::vector<float> hess = weights;
CheckObjFunction(obj, predts, labels, weights, grad, hess);
}
TEST(Objective, DeclareUnifiedTest(QuantileIntercept)) {
Context ctx = CreateEmptyGenericParam(GPUIDX);
Args args{{"quantile_alpha", "[0.6, 0.8]"}};
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:quantileerror", &ctx)};
obj->Configure(args);
MetaInfo info;
info.num_row_ = 10;
info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {
data->SetDevice(ctx.gpu_id);
data->Resize(info.num_row_);
shape[0] = info.num_row_;
shape[1] = 1;
auto& h_labels = data->HostVector();
for (std::size_t i = 0; i < info.num_row_; ++i) {
h_labels[i] = i;
}
});
linalg::Vector<float> base_scores;
obj->InitEstimation(info, &base_scores);
ASSERT_EQ(base_scores.Size(), 1) << "Vector is not yet supported.";
// mean([5.6, 7.8])
ASSERT_NEAR(base_scores(0), 6.7, kRtEps);
for (std::size_t i = 0; i < info.num_row_; ++i) {
info.weights_.HostVector().emplace_back(info.num_row_ - i - 1.0);
}
obj->InitEstimation(info, &base_scores);
ASSERT_EQ(base_scores.Size(), 1) << "Vector is not yet supported.";
// mean([3, 5])
ASSERT_NEAR(base_scores(0), 4.0, kRtEps);
}
} // namespace xgboost

View File

@@ -0,0 +1,5 @@
/**
* Copyright 2023 XGBoost contributors
*/
// Dummy file to enable the CUDA tests.
#include "test_quantile_obj.cc"

View File

@@ -6,8 +6,9 @@
#include <xgboost/json.h>
#include <xgboost/objective.h>
#include "../../../src/common/linalg_op.h" // begin,end
#include "../../../src/common/linalg_op.h" // for begin, end
#include "../../../src/objective/adaptive.h"
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
#include "xgboost/base.h"
#include "xgboost/data.h"
@@ -157,7 +158,7 @@ TEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) {
ObjFunction::Create("count:poisson", &ctx)
};
args.emplace_back(std::make_pair("max_delta_step", "0.1f"));
args.emplace_back("max_delta_step", "0.1f");
obj->Configure(args);
CheckObjFunction(obj,
@@ -259,7 +260,7 @@ TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) {
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:tweedie", &ctx)};
args.emplace_back(std::make_pair("tweedie_variance_power", "1.1f"));
args.emplace_back("tweedie_variance_power", "1.1f");
obj->Configure(args);
CheckObjFunction(obj,
@@ -408,9 +409,13 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteError)) {
h_predt[i] = labels[i] + i;
}
obj->UpdateTreeLeaf(position, info, predt, 0, &tree);
ASSERT_EQ(tree[1].LeafValue(), -1);
ASSERT_EQ(tree[2].LeafValue(), -4);
tree::TrainParam param;
param.Init(Args{});
auto lr = param.learning_rate;
obj->UpdateTreeLeaf(position, info, param.learning_rate, predt, 0, &tree);
ASSERT_EQ(tree[1].LeafValue(), -1.0f * lr);
ASSERT_EQ(tree[2].LeafValue(), -4.0f * lr);
}
TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
@@ -428,8 +433,8 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
auto h_labels = info.labels.HostView().Slice(linalg::All(), t);
std::iota(linalg::begin(h_labels), linalg::end(h_labels), 0);
auto h_predt = linalg::MakeTensorView(predt.HostSpan(), {kRows, kTargets}, Context::kCpuId)
.Slice(linalg::All(), t);
auto h_predt =
linalg::MakeTensorView(&ctx, predt.HostSpan(), kRows, kTargets).Slice(linalg::All(), t);
for (size_t i = 0; i < h_predt.Size(); ++i) {
h_predt(i) = h_labels(i) + i;
}
@@ -457,11 +462,16 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
ASSERT_EQ(tree.GetNumLeaves(), 4);
auto empty_leaf = tree[4].LeafValue();
obj->UpdateTreeLeaf(position, info, predt, t, &tree);
ASSERT_EQ(tree[3].LeafValue(), -5);
ASSERT_EQ(tree[4].LeafValue(), empty_leaf);
ASSERT_EQ(tree[5].LeafValue(), -10);
ASSERT_EQ(tree[6].LeafValue(), -14);
tree::TrainParam param;
param.Init(Args{});
auto lr = param.learning_rate;
obj->UpdateTreeLeaf(position, info, lr, predt, t, &tree);
ASSERT_EQ(tree[3].LeafValue(), -5.0f * lr);
ASSERT_EQ(tree[4].LeafValue(), empty_leaf * lr);
ASSERT_EQ(tree[5].LeafValue(), -10.0f * lr);
ASSERT_EQ(tree[6].LeafValue(), -14.0f * lr);
}
}

View File

@@ -3,16 +3,18 @@
*/
#include <gtest/gtest.h>
#include <xgboost/cache.h>
#include <xgboost/data.h> // DMatrix
#include <xgboost/data.h> // for DMatrix
#include <cstddef> // std::size_t
#include <cstddef> // for size_t
#include <cstdint> // for uint32_t
#include <thread> // for thread
#include "helpers.h" // RandomDataGenerator
#include "helpers.h" // for RandomDataGenerator
namespace xgboost {
namespace {
struct CacheForTest {
std::size_t i;
std::size_t const i;
explicit CacheForTest(std::size_t k) : i{k} {}
};
@@ -20,7 +22,7 @@ struct CacheForTest {
TEST(DMatrixCache, Basic) {
std::size_t constexpr kRows = 2, kCols = 1, kCacheSize = 4;
DMatrixCache<CacheForTest> cache(kCacheSize);
DMatrixCache<CacheForTest> cache{kCacheSize};
auto add_cache = [&]() {
// Create a lambda function here, so that p_fmat gets deleted upon the
@@ -52,4 +54,63 @@ TEST(DMatrixCache, Basic) {
}
}
}
TEST(DMatrixCache, MultiThread) {
std::size_t constexpr kRows = 2, kCols = 1, kCacheSize = 3;
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
auto n = std::thread::hardware_concurrency() * 128u;
CHECK_NE(n, 0);
std::vector<std::shared_ptr<CacheForTest>> results(n);
{
DMatrixCache<CacheForTest> cache{kCacheSize};
std::vector<std::thread> tasks;
for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
tasks.emplace_back([&, i = tidx]() {
cache.CacheItem(p_fmat, i);
auto p_fmat_local = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
results[i] = cache.CacheItem(p_fmat_local, i);
});
}
for (auto& t : tasks) {
t.join();
}
for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
ASSERT_EQ(results[tidx]->i, tidx);
}
tasks.clear();
for (std::int32_t tidx = static_cast<std::int32_t>(n - 1); tidx >= 0; --tidx) {
tasks.emplace_back([&, i = tidx]() {
cache.CacheItem(p_fmat, i);
auto p_fmat_local = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
results[i] = cache.CacheItem(p_fmat_local, i);
});
}
for (auto& t : tasks) {
t.join();
}
for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
ASSERT_EQ(results[tidx]->i, tidx);
}
}
{
DMatrixCache<CacheForTest> cache{n};
std::vector<std::thread> tasks;
for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
tasks.emplace_back([&, tidx]() { results[tidx] = cache.CacheItem(p_fmat, tidx); });
}
for (auto& t : tasks) {
t.join();
}
for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
ASSERT_EQ(results[tidx]->i, tidx);
}
}
}
} // namespace xgboost

View File

@@ -9,12 +9,14 @@
#include "../../../../src/tree/hist/evaluate_splits.h"
#include "../test_evaluate_splits.h"
#include "../../helpers.h"
#include "xgboost/context.h" // Context
namespace xgboost {
namespace tree {
void TestEvaluateSplits(bool force_read_by_column) {
Context ctx;
ctx.nthread = 4;
int static constexpr kRows = 8, kCols = 16;
int32_t n_threads = std::min(omp_get_max_threads(), 4);
auto sampler = std::make_shared<common::ColumnSampler>();
TrainParam param;
@@ -22,7 +24,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();
auto evaluator = HistEvaluator<CPUExpandEntry>{param, dmat->Info(), n_threads, sampler};
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
common::HistCollection hist;
std::vector<GradientPair> row_gpairs = {
{1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
@@ -86,13 +88,15 @@ TEST(HistEvaluator, Evaluate) {
}
TEST(HistEvaluator, Apply) {
Context ctx;
ctx.nthread = 4;
RegTree tree;
int static constexpr kNRows = 8, kNCols = 16;
TrainParam param;
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}});
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
auto sampler = std::make_shared<common::ColumnSampler>();
auto evaluator_ = HistEvaluator<CPUExpandEntry>{param, dmat->Info(), 4, sampler};
auto evaluator_ = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
CPUExpandEntry entry{0, 0, 10.0f};
entry.split.left_sum = GradStats{0.4, 0.6f};
@@ -115,10 +119,11 @@ TEST(HistEvaluator, Apply) {
}
TEST_F(TestPartitionBasedSplit, CPUHist) {
Context ctx;
// check the evaluator is returning the optimal split
std::vector<FeatureType> ft{FeatureType::kCategorical};
auto sampler = std::make_shared<common::ColumnSampler>();
HistEvaluator<CPUExpandEntry> evaluator{param_, info_, AllThreadsForTest(), sampler};
HistEvaluator<CPUExpandEntry> evaluator{&ctx, &param_, info_, sampler};
evaluator.InitRoot(GradStats{total_gpair_});
RegTree tree;
std::vector<CPUExpandEntry> entries(1);
@@ -128,6 +133,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {
namespace {
auto CompareOneHotAndPartition(bool onehot) {
Context ctx;
int static constexpr kRows = 128, kCols = 1;
std::vector<FeatureType> ft(kCols, FeatureType::kCategorical);
@@ -147,8 +153,7 @@ auto CompareOneHotAndPartition(bool onehot) {
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
auto sampler = std::make_shared<common::ColumnSampler>();
auto evaluator =
HistEvaluator<CPUExpandEntry>{param, dmat->Info(), AllThreadsForTest(), sampler};
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
std::vector<CPUExpandEntry> entries(1);
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({32, param.sparse_threshold})) {
@@ -198,8 +203,8 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
MetaInfo info;
info.num_col_ = 1;
info.feature_types = {FeatureType::kCategorical};
auto evaluator =
HistEvaluator<CPUExpandEntry>{param_, info, AllThreadsForTest(), sampler};
Context ctx;
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param_, info, sampler};
evaluator.InitRoot(GradStats{parent_sum_});
std::vector<CPUExpandEntry> entries(1);

View File

@@ -48,7 +48,7 @@ void TestAddHistRows(bool is_distributed) {
HistogramBuilder<CPUExpandEntry> histogram_builder;
histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1,
is_distributed);
is_distributed, false);
histogram_builder.AddHistRows(&starting_index, &sync_count,
nodes_for_explicit_hist_build_,
nodes_for_subtraction_trick_, &tree);
@@ -86,7 +86,7 @@ void TestSyncHist(bool is_distributed) {
HistogramBuilder<CPUExpandEntry> histogram;
uint32_t total_bins = gmat.cut.Ptrs().back();
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed);
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed, false);
common::RowSetCollection row_set_collection_;
{
@@ -226,11 +226,14 @@ TEST(CPUHistogram, SyncHist) {
TestSyncHist(false);
}
void TestBuildHistogram(bool is_distributed, bool force_read_by_column) {
void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_col_split) {
size_t constexpr kNRows = 8, kNCols = 16;
int32_t constexpr kMaxBins = 4;
auto p_fmat =
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
if (is_col_split) {
p_fmat = std::shared_ptr<DMatrix>{
p_fmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
}
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
uint32_t total_bins = gmat.cut.Ptrs().back();
@@ -241,7 +244,8 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column) {
bst_node_t nid = 0;
HistogramBuilder<CPUExpandEntry> histogram;
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed);
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed,
is_col_split);
RegTree tree;
@@ -284,11 +288,16 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column) {
}
TEST(CPUHistogram, BuildHist) {
TestBuildHistogram(true, false);
TestBuildHistogram(false, false);
TestBuildHistogram(true, true);
TestBuildHistogram(false, true);
TestBuildHistogram(true, false, false);
TestBuildHistogram(false, false, false);
TestBuildHistogram(true, true, false);
TestBuildHistogram(false, true, false);
}
TEST(CPUHistogram, BuildHistColSplit) {
auto constexpr kWorkers = 4;
RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, true, true);
RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, false, true);
}
namespace {
@@ -340,7 +349,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
HistogramBuilder<CPUExpandEntry> cat_hist;
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
auto total_bins = gidx.cut.TotalBins();
cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false);
cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
cat_hist.BuildHist(0, gidx, &tree, row_set_collection,
nodes_for_explicit_hist_build, {}, gpair.HostVector(),
force_read_by_column);
@@ -354,7 +363,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
HistogramBuilder<CPUExpandEntry> onehot_hist;
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
auto total_bins = gidx.cut.TotalBins();
onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false);
onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
onehot_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
gpair.HostVector(),
force_read_by_column);
@@ -419,7 +428,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
1, [&](size_t nidx_in_set) { return partition_size.at(nidx_in_set); },
256};
multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false);
multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false, false);
size_t page_idx{0};
for (auto const &page : m->GetBatches<GHistIndexMatrix>(batch_param)) {
@@ -440,7 +449,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
common::RowSetCollection row_set_collection;
InitRowPartitionForTest(&row_set_collection, n_samples);
single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false);
single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false, false);
SparsePage concat;
std::vector<float> hess(m->Info().num_row_, 1.0f);
for (auto const& page : m->GetBatches<SparsePage>()) {

View File

@@ -10,29 +10,36 @@
namespace xgboost {
namespace tree {
TEST(Approx, Partitioner) {
size_t n_samples = 1024, n_features = 1, base_rowid = 0;
Context ctx;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
ASSERT_EQ(partitioner.base_rowid, base_rowid);
ASSERT_EQ(partitioner.Size(), 1);
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
ctx.InitAllowUnknown(Args{});
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
namespace {
std::vector<float> GenerateHess(size_t n_samples) {
auto grad = GenerateRandomGradients(n_samples);
std::vector<float> hess(grad.Size());
std::transform(grad.HostVector().cbegin(), grad.HostVector().cend(), hess.begin(),
[](auto gpair) { return gpair.GetHess(); });
return hess;
}
} // anonymous namespace
TEST(Approx, Partitioner) {
size_t n_samples = 1024, n_features = 1, base_rowid = 0;
Context ctx;
ctx.InitAllowUnknown(Args{});
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
ASSERT_EQ(partitioner.base_rowid, base_rowid);
ASSERT_EQ(partitioner.Size(), 1);
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
auto hess = GenerateHess(n_samples);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
bst_feature_t const split_ind = 0;
{
auto min_value = page.cut.MinValues()[split_ind];
RegTree tree;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
GetSplit(&tree, min_value, &candidates);
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
ASSERT_EQ(partitioner.Size(), 3);
@@ -40,7 +47,7 @@ TEST(Approx, Partitioner) {
ASSERT_EQ(partitioner[2].Size(), n_samples);
}
{
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
auto ptr = page.cut.Ptrs()[split_ind + 1];
float split_value = page.cut.Values().at(ptr / 2);
RegTree tree;
@@ -66,12 +73,85 @@ TEST(Approx, Partitioner) {
}
}
namespace {
void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared_ptr<DMatrix> Xy,
std::vector<float>* hess, float min_value, float mid_value,
CommonRowPartitioner const& expected_mid_partitioner) {
auto dmat =
std::unique_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
Context ctx;
ctx.InitAllowUnknown(Args{});
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>({64, *hess, true})) {
{
RegTree tree;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
GetSplit(&tree, min_value, &candidates);
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
ASSERT_EQ(partitioner.Size(), 3);
ASSERT_EQ(partitioner[1].Size(), 0);
ASSERT_EQ(partitioner[2].Size(), n_samples);
}
{
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
RegTree tree;
GetSplit(&tree, mid_value, &candidates);
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
auto left_nidx = tree[RegTree::kRoot].LeftChild();
auto elem = partitioner[left_nidx];
ASSERT_LT(elem.Size(), n_samples);
ASSERT_GT(elem.Size(), 1);
auto expected_elem = expected_mid_partitioner[left_nidx];
ASSERT_EQ(elem.Size(), expected_elem.Size());
for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
ASSERT_EQ(*it, *eit);
}
auto right_nidx = tree[RegTree::kRoot].RightChild();
elem = partitioner[right_nidx];
expected_elem = expected_mid_partitioner[right_nidx];
ASSERT_EQ(elem.Size(), expected_elem.Size());
for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
ASSERT_EQ(*it, *eit);
}
}
}
}
} // anonymous namespace
TEST(Approx, PartitionerColSplit) {
size_t n_samples = 1024, n_features = 16, base_rowid = 0;
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
auto hess = GenerateHess(n_samples);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
float min_value, mid_value;
Context ctx;
ctx.InitAllowUnknown(Args{});
CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false};
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
bst_feature_t const split_ind = 0;
min_value = page.cut.MinValues()[split_ind];
auto ptr = page.cut.Ptrs()[split_ind + 1];
mid_value = page.cut.Values().at(ptr / 2);
RegTree tree;
GetSplit(&tree, mid_value, &candidates);
mid_partitioner.UpdatePosition(&ctx, page, candidates, &tree);
}
auto constexpr kWorkers = 4;
RunWithInMemoryCommunicator(kWorkers, TestColumnSplitPartitioner, n_samples, base_rowid, Xy,
&hess, min_value, mid_value, mid_partitioner);
}
namespace {
void TestLeafPartition(size_t n_samples) {
size_t const n_features = 2, base_rowid = 0;
Context ctx;
common::RowSetCollection row_set;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2022 by XGBoost Contributors
/**
* Copyright 2022-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
@@ -12,8 +12,7 @@
#include "../../../src/tree/split_evaluator.h"
#include "../helpers.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
/**
* \brief Enumerate all possible partitions for categorical split.
*/
@@ -151,5 +150,4 @@ class TestCategoricalSplitWithMissing : public testing::Test {
ASSERT_EQ(right_sum.GetHess(), parent_sum_.GetHess() - left_sum.GetHess());
}
};
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2017-2022 XGBoost contributors
/**
* Copyright 2017-2023 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <thrust/device_vector.h>
@@ -13,6 +13,7 @@
#include "../../../src/common/common.h"
#include "../../../src/data/sparse_page_source.h"
#include "../../../src/tree/constraints.cuh"
#include "../../../src/tree/param.h" // for TrainParam
#include "../../../src/tree/updater_gpu_common.cuh"
#include "../../../src/tree/updater_gpu_hist.cu"
#include "../filesystem.h" // dmlc::TemporaryDirectory
@@ -21,8 +22,7 @@
#include "xgboost/context.h"
#include "xgboost/json.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
TEST(GpuHist, DeviceHistogram) {
// Ensures that node allocates correctly after reaching `kStopGrowingSize`.
dh::safe_cuda(cudaSetDevice(0));
@@ -83,11 +83,12 @@ void TestBuildHist(bool use_shared_memory_histograms) {
int const kNRows = 16, kNCols = 8;
TrainParam param;
std::vector<std::pair<std::string, std::string>> args {
{"max_depth", "6"},
{"max_leaves", "0"},
Args args{
{"max_depth", "6"},
{"max_leaves", "0"},
};
param.Init(args);
auto page = BuildEllpackPage(kNRows, kNCols);
BatchParam batch_param{};
Context ctx{CreateEmptyGenericParam(0)};
@@ -168,7 +169,6 @@ void TestHistogramIndexImpl() {
int constexpr kNRows = 1000, kNCols = 10;
// Build 2 matrices and build a histogram maker with that
Context ctx(CreateEmptyGenericParam(0));
tree::GPUHistMaker hist_maker{&ctx, ObjInfo{ObjInfo::kRegression}},
hist_maker_ext{&ctx, ObjInfo{ObjInfo::kRegression}};
@@ -179,15 +179,14 @@ void TestHistogramIndexImpl() {
std::unique_ptr<DMatrix> hist_maker_ext_dmat(
CreateSparsePageDMatrixWithRC(kNRows, kNCols, 128UL, true, tempdir));
std::vector<std::pair<std::string, std::string>> training_params = {
{"max_depth", "10"},
{"max_leaves", "0"}
};
Args training_params = {{"max_depth", "10"}, {"max_leaves", "0"}};
TrainParam param;
param.UpdateAllowUnknown(training_params);
hist_maker.Configure(training_params);
hist_maker.InitDataOnce(hist_maker_dmat.get());
hist_maker.InitDataOnce(&param, hist_maker_dmat.get());
hist_maker_ext.Configure(training_params);
hist_maker_ext.InitDataOnce(hist_maker_ext_dmat.get());
hist_maker_ext.InitDataOnce(&param, hist_maker_ext_dmat.get());
// Extract the device maker from the histogram makers and from that its compressed
// histogram index
@@ -237,13 +236,15 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
{"subsample", std::to_string(subsample)},
{"sampling_method", sampling_method},
};
TrainParam param;
param.UpdateAllowUnknown(args);
Context ctx(CreateEmptyGenericParam(0));
tree::GPUHistMaker hist_maker{&ctx,ObjInfo{ObjInfo::kRegression}};
hist_maker.Configure(args);
std::vector<HostDeviceVector<bst_node_t>> position(1);
hist_maker.Update(gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position}, {tree});
hist_maker.Update(&param, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
{tree});
auto cache = linalg::VectorView<float>{preds->DeviceSpan(), {preds->Size()}, 0};
hist_maker.UpdatePredictionCache(dmat, cache);
}
@@ -391,13 +392,11 @@ TEST(GpuHist, ConfigIO) {
Json j_updater { Object() };
updater->SaveConfig(&j_updater);
ASSERT_TRUE(IsA<Object>(j_updater["gpu_hist_train_param"]));
ASSERT_TRUE(IsA<Object>(j_updater["train_param"]));
updater->LoadConfig(j_updater);
Json j_updater_roundtrip { Object() };
updater->SaveConfig(&j_updater_roundtrip);
ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["gpu_hist_train_param"]));
ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["train_param"]));
ASSERT_EQ(j_updater, j_updater_roundtrip);
}
@@ -414,5 +413,4 @@ TEST(GpuHist, MaxDepth) {
ASSERT_THROW({learner->UpdateOneIter(0, p_mat);}, dmlc::Error);
}
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -1,33 +1,42 @@
/**
* Copyright 2019-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/tree_model.h>
#include <xgboost/tree_updater.h>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
std::shared_ptr<DMatrix> GenerateDMatrix(std::size_t rows, std::size_t cols){
return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix();
}
TEST(GrowHistMaker, InteractionConstraint) {
size_t constexpr kRows = 32;
size_t constexpr kCols = 16;
Context ctx;
auto p_dmat = RandomDataGenerator{kRows, kCols, 0.6f}.Seed(3).GenerateDMatrix();
HostDeviceVector<GradientPair> gradients (kRows);
std::vector<GradientPair>& h_gradients = gradients.HostVector();
std::unique_ptr<HostDeviceVector<GradientPair>> GenerateGradients(std::size_t rows) {
auto p_gradients = std::make_unique<HostDeviceVector<GradientPair>>(rows);
auto& h_gradients = p_gradients->HostVector();
xgboost::SimpleLCG gen;
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
for (size_t i = 0; i < kRows; ++i) {
bst_float grad = dist(&gen);
bst_float hess = dist(&gen);
h_gradients[i] = GradientPair(grad, hess);
for (std::size_t i = 0; i < rows; ++i) {
auto grad = dist(&gen);
auto hess = dist(&gen);
h_gradients[i] = GradientPair{grad, hess};
}
return p_gradients;
}
TEST(GrowHistMaker, InteractionConstraint)
{
auto constexpr kRows = 32;
auto constexpr kCols = 16;
auto p_dmat = GenerateDMatrix(kRows, kCols);
auto p_gradients = GenerateGradients(kRows);
Context ctx;
{
// With constraints
RegTree tree;
@@ -35,11 +44,11 @@ TEST(GrowHistMaker, InteractionConstraint) {
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
updater->Configure(Args{
{"interaction_constraints", "[[0, 1]]"},
{"num_feature", std::to_string(kCols)}});
TrainParam param;
param.UpdateAllowUnknown(
Args{{"interaction_constraints", "[[0, 1]]"}, {"num_feature", std::to_string(kCols)}});
std::vector<HostDeviceVector<bst_node_t>> position(1);
updater->Update(&gradients, p_dmat.get(), position, {&tree});
updater->Update(&param, p_gradients.get(), p_dmat.get(), position, {&tree});
ASSERT_EQ(tree.NumExtraNodes(), 4);
ASSERT_EQ(tree[0].SplitIndex(), 1);
@@ -54,9 +63,10 @@ TEST(GrowHistMaker, InteractionConstraint) {
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
updater->Configure(Args{{"num_feature", std::to_string(kCols)}});
std::vector<HostDeviceVector<bst_node_t>> position(1);
updater->Update(&gradients, p_dmat.get(), position, {&tree});
TrainParam param;
param.Init(Args{});
updater->Update(&param, p_gradients.get(), p_dmat.get(), position, {&tree});
ASSERT_EQ(tree.NumExtraNodes(), 10);
ASSERT_EQ(tree[0].SplitIndex(), 1);
@@ -66,5 +76,53 @@ TEST(GrowHistMaker, InteractionConstraint) {
}
}
} // namespace tree
} // namespace xgboost
namespace {
void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
auto p_dmat = GenerateDMatrix(rows, cols);
auto p_gradients = GenerateGradients(rows);
Context ctx;
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
std::vector<HostDeviceVector<bst_node_t>> position(1);
std::unique_ptr<DMatrix> sliced{
p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
RegTree tree;
tree.param.num_feature = cols;
TrainParam param;
param.Init(Args{});
updater->Update(&param, p_gradients.get(), sliced.get(), position, {&tree});
EXPECT_EQ(tree.NumExtraNodes(), 10);
EXPECT_EQ(tree[0].SplitIndex(), 1);
EXPECT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);
EXPECT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
EXPECT_EQ(tree, expected_tree);
}
} // anonymous namespace
TEST(GrowHistMaker, ColumnSplit) {
auto constexpr kRows = 32;
auto constexpr kCols = 16;
RegTree expected_tree;
expected_tree.param.num_feature = kCols;
{
auto p_dmat = GenerateDMatrix(kRows, kCols);
auto p_gradients = GenerateGradients(kRows);
Context ctx;
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
std::vector<HostDeviceVector<bst_node_t>> position(1);
TrainParam param;
param.Init(Args{});
updater->Update(&param, p_gradients.get(), p_dmat.get(), position, {&expected_tree});
}
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, TestColumnSplit, kRows, kCols, std::cref(expected_tree));
}
} // namespace xgboost::tree

View File

@@ -7,6 +7,7 @@
#include <memory>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
namespace xgboost {
@@ -75,9 +76,11 @@ class TestPredictionCache : public ::testing::Test {
RegTree tree;
std::vector<RegTree *> trees{&tree};
auto gpair = GenerateRandomGradients(n_samples_);
updater->Configure(Args{{"max_bin", "64"}});
tree::TrainParam param;
param.UpdateAllowUnknown(Args{{"max_bin", "64"}});
std::vector<HostDeviceVector<bst_node_t>> position(1);
updater->Update(&gpair, Xy_.get(), position, trees);
updater->Update(&param, &gpair, Xy_.get(), position, trees);
HostDeviceVector<float> out_prediction_cached;
out_prediction_cached.SetDevice(ctx.gpu_id);
out_prediction_cached.Resize(n_samples_);

View File

@@ -1,28 +1,26 @@
/*!
* Copyright 2018-2019 by Contributors
/**
* Copyright 2018-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/tree_updater.h>
#include <xgboost/learner.h>
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include <memory>
#include <xgboost/tree_updater.h>
#include <memory>
#include <string>
#include <vector>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
TEST(Updater, Prune) {
int constexpr kCols = 16;
std::vector<std::pair<std::string, std::string>> cfg;
cfg.emplace_back(std::pair<std::string, std::string>("num_feature",
std::to_string(kCols)));
cfg.emplace_back(std::pair<std::string, std::string>(
"min_split_loss", "10"));
cfg.emplace_back("num_feature", std::to_string(kCols));
cfg.emplace_back("min_split_loss", "10");
// These data are just place holders.
HostDeviceVector<GradientPair> gpair =
@@ -38,28 +36,30 @@ TEST(Updater, Prune) {
tree.param.UpdateAllowUnknown(cfg);
std::vector<RegTree*> trees {&tree};
// prepare pruner
TrainParam param;
param.UpdateAllowUnknown(cfg);
std::unique_ptr<TreeUpdater> pruner(
TreeUpdater::Create("prune", &ctx, ObjInfo{ObjInfo::kRegression}));
pruner->Configure(cfg);
// loss_chg < min_split_loss;
std::vector<HostDeviceVector<bst_node_t>> position(trees.size());
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 0.0f, 0.0f,
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
pruner->Update(&gpair, p_dmat.get(), position, trees);
pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
ASSERT_EQ(tree.NumExtraNodes(), 0);
// loss_chg > min_split_loss;
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 11.0f, 0.0f,
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
pruner->Update(&gpair, p_dmat.get(), position, trees);
pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
// loss_chg == min_split_loss;
tree.Stat(0).loss_chg = 10;
pruner->Update(&gpair, p_dmat.get(), position, trees);
pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
@@ -73,20 +73,20 @@ TEST(Updater, Prune) {
0, 0.5f, true, 0.3, 0.4, 0.5,
/*loss_chg=*/19.0f, 0.0f,
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
cfg.emplace_back(std::make_pair("max_depth", "1"));
pruner->Configure(cfg);
pruner->Update(&gpair, p_dmat.get(), position, trees);
cfg.emplace_back("max_depth", "1");
param.UpdateAllowUnknown(cfg);
pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
tree.ExpandNode(tree[0].LeftChild(),
0, 0.5f, true, 0.3, 0.4, 0.5,
/*loss_chg=*/18.0f, 0.0f,
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
cfg.emplace_back(std::make_pair("min_split_loss", "0"));
pruner->Configure(cfg);
pruner->Update(&gpair, p_dmat.get(), position, trees);
cfg.emplace_back("min_split_loss", "0");
param.UpdateAllowUnknown(cfg);
pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
}
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -23,7 +23,7 @@ TEST(QuantileHist, Partitioner) {
Context ctx;
ctx.InitAllowUnknown(Args{});
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
ASSERT_EQ(partitioner.base_rowid, base_rowid);
ASSERT_EQ(partitioner.Size(), 1);
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
@@ -41,7 +41,7 @@ TEST(QuantileHist, Partitioner) {
{
auto min_value = gmat.cut.MinValues()[split_ind];
RegTree tree;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
GetSplit(&tree, min_value, &candidates);
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
ASSERT_EQ(partitioner.Size(), 3);
@@ -49,7 +49,7 @@ TEST(QuantileHist, Partitioner) {
ASSERT_EQ(partitioner[2].Size(), n_samples);
}
{
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
auto ptr = gmat.cut.Ptrs()[split_ind + 1];
float split_value = gmat.cut.Values().at(ptr / 2);
RegTree tree;

View File

@@ -1,14 +1,15 @@
/*!
* Copyright 2018-2019 by Contributors
/**
* Copyright 2018-2013 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/tree_updater.h>
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include <memory>
#include <string>
#include <vector>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
namespace xgboost {
@@ -43,9 +44,11 @@ TEST(Updater, Refresh) {
tree.Stat(cleft).base_weight = 1.2;
tree.Stat(cright).base_weight = 1.3;
refresher->Configure(cfg);
std::vector<HostDeviceVector<bst_node_t>> position;
refresher->Update(&gpair, p_dmat.get(), position, trees);
tree::TrainParam param;
param.UpdateAllowUnknown(cfg);
refresher->Update(&param, &gpair, p_dmat.get(), position, trees);
bst_float constexpr kEps = 1e-6;
ASSERT_NEAR(-0.183392, tree[cright].LeafValue(), kEps);

View File

@@ -1,7 +1,11 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/tree_model.h>
#include <xgboost/tree_updater.h>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
namespace xgboost {
@@ -21,6 +25,9 @@ class UpdaterTreeStatTest : public ::testing::Test {
}
void RunTest(std::string updater) {
tree::TrainParam param;
param.Init(Args{});
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
: CreateEmptyGenericParam(Context::kCpuId));
auto up = std::unique_ptr<TreeUpdater>{
@@ -29,7 +36,7 @@ class UpdaterTreeStatTest : public ::testing::Test {
RegTree tree;
tree.param.num_feature = kCols;
std::vector<HostDeviceVector<bst_node_t>> position(1);
up->Update(&gpairs_, p_dmat_.get(), position, {&tree});
up->Update(&param, &gpairs_, p_dmat_.get(), position, {&tree});
tree.WalkTree([&tree](bst_node_t nidx) {
if (tree[nidx].IsLeaf()) {
@@ -69,28 +76,33 @@ class UpdaterEtaTest : public ::testing::Test {
void RunTest(std::string updater) {
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
: CreateEmptyGenericParam(Context::kCpuId));
float eta = 0.4;
auto up_0 = std::unique_ptr<TreeUpdater>{
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
up_0->Configure(Args{{"eta", std::to_string(eta)}});
up_0->Configure(Args{});
tree::TrainParam param0;
param0.Init(Args{{"eta", std::to_string(eta)}});
auto up_1 = std::unique_ptr<TreeUpdater>{
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
up_1->Configure(Args{{"eta", "1.0"}});
tree::TrainParam param1;
param1.Init(Args{{"eta", "1.0"}});
for (size_t iter = 0; iter < 4; ++iter) {
RegTree tree_0;
{
tree_0.param.num_feature = kCols;
std::vector<HostDeviceVector<bst_node_t>> position(1);
up_0->Update(&gpairs_, p_dmat_.get(), position, {&tree_0});
up_0->Update(&param0, &gpairs_, p_dmat_.get(), position, {&tree_0});
}
RegTree tree_1;
{
tree_1.param.num_feature = kCols;
std::vector<HostDeviceVector<bst_node_t>> position(1);
up_1->Update(&gpairs_, p_dmat_.get(), position, {&tree_1});
up_1->Update(&param1, &gpairs_, p_dmat_.get(), position, {&tree_1});
}
tree_0.WalkTree([&](bst_node_t nidx) {
if (tree_0[nidx].IsLeaf()) {
@@ -139,17 +151,18 @@ class TestMinSplitLoss : public ::testing::Test {
// test gamma
{"gamma", std::to_string(gamma)}};
tree::TrainParam param;
param.UpdateAllowUnknown(args);
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
: CreateEmptyGenericParam(Context::kCpuId));
std::cout << ctx.gpu_id << std::endl;
auto up = std::unique_ptr<TreeUpdater>{
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kRegression})};
up->Configure(args);
up->Configure({});
RegTree tree;
std::vector<HostDeviceVector<bst_node_t>> position(1);
up->Update(&gpair_, dmat_.get(), position, {&tree});
up->Update(&param, &gpair_, dmat_.get(), position, {&tree});
auto n_nodes = tree.NumExtraNodes();
return n_nodes;