Implement column sampler in CUDA. (#9785)
- CUDA implementation. - Extract the broadcasting logic, we will need the context parameter after revamping the collective implementation. - Some changes to the event loop for fixing a deadlock in CI. - Move argsort into algorithms.cuh, add support for cuda stream.
This commit is contained in:
@@ -57,13 +57,13 @@ TEST(Algorithm, GpuArgSort) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
|
||||
dh::device_vector<float> values(20);
|
||||
dh::Iota(dh::ToSpan(values)); // accending
|
||||
dh::Iota(dh::ToSpan(values), ctx.CUDACtx()->Stream()); // accending
|
||||
dh::device_vector<size_t> sorted_idx(20);
|
||||
dh::ArgSort<false>(dh::ToSpan(values), dh::ToSpan(sorted_idx)); // sort to descending
|
||||
ASSERT_TRUE(thrust::is_sorted(thrust::device, sorted_idx.begin(), sorted_idx.end(),
|
||||
ArgSort<false>(&ctx, dh::ToSpan(values), dh::ToSpan(sorted_idx)); // sort to descending
|
||||
ASSERT_TRUE(thrust::is_sorted(ctx.CUDACtx()->CTP(), sorted_idx.begin(), sorted_idx.end(),
|
||||
thrust::greater<size_t>{}));
|
||||
|
||||
dh::Iota(dh::ToSpan(values));
|
||||
dh::Iota(dh::ToSpan(values), ctx.CUDACtx()->Stream());
|
||||
dh::device_vector<size_t> groups(3);
|
||||
groups[0] = 0;
|
||||
groups[1] = 10;
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../include/xgboost/logging.h"
|
||||
#include "../../../src/common/cuda_context.cuh"
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
#include "../../../src/common/hist_util.cuh"
|
||||
#include "../../../src/common/hist_util.h"
|
||||
@@ -211,7 +212,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
|
||||
cuts_ptr.SetDevice(DeviceOrd::CUDA(0));
|
||||
|
||||
dh::device_vector<float> weight(n_samples * n_features, 0);
|
||||
dh::Iota(dh::ToSpan(weight));
|
||||
dh::Iota(dh::ToSpan(weight), ctx.CUDACtx()->Stream());
|
||||
|
||||
dh::caching_device_vector<bst_row_t> columns_ptr(4);
|
||||
for (std::size_t i = 0; i < columns_ptr.size(); ++i) {
|
||||
|
||||
@@ -1,19 +1,20 @@
|
||||
#include <valarray>
|
||||
/**
|
||||
* Copyright 2018-2023, XGBoost Contributors
|
||||
*/
|
||||
#include "../../../src/common/random.h"
|
||||
#include "../helpers.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/context.h" // for Context
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST(ColumnSampler, Test) {
|
||||
Context ctx;
|
||||
namespace xgboost::common {
|
||||
namespace {
|
||||
void TestBasic(Context const* ctx) {
|
||||
int n = 128;
|
||||
ColumnSampler cs;
|
||||
ColumnSampler cs{1u};
|
||||
std::vector<float> feature_weights;
|
||||
|
||||
// No node sampling
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 0.5f, 0.5f);
|
||||
cs.Init(ctx, n, feature_weights, 1.0f, 0.5f, 0.5f);
|
||||
auto set0 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set0->Size(), 32);
|
||||
|
||||
@@ -26,7 +27,7 @@ TEST(ColumnSampler, Test) {
|
||||
ASSERT_EQ(set2->Size(), 32);
|
||||
|
||||
// Node sampling
|
||||
cs.Init(&ctx, n, feature_weights, 0.5f, 1.0f, 0.5f);
|
||||
cs.Init(ctx, n, feature_weights, 0.5f, 1.0f, 0.5f);
|
||||
auto set3 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set3->Size(), 32);
|
||||
|
||||
@@ -36,21 +37,33 @@ TEST(ColumnSampler, Test) {
|
||||
ASSERT_EQ(set4->Size(), 32);
|
||||
|
||||
// No level or node sampling, should be the same at different depth
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 0.5f);
|
||||
ASSERT_EQ(cs.GetFeatureSet(0)->HostVector(),
|
||||
cs.GetFeatureSet(1)->HostVector());
|
||||
cs.Init(ctx, n, feature_weights, 1.0f, 1.0f, 0.5f);
|
||||
ASSERT_EQ(cs.GetFeatureSet(0)->HostVector(), cs.GetFeatureSet(1)->HostVector());
|
||||
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
cs.Init(ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
auto set5 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set5->Size(), n);
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
cs.Init(ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
auto set6 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set5->HostVector(), set6->HostVector());
|
||||
|
||||
// Should always be a minimum of one feature
|
||||
cs.Init(&ctx, n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
|
||||
cs.Init(ctx, n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
|
||||
ASSERT_EQ(cs.GetFeatureSet(0)->Size(), 1);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(ColumnSampler, Test) {
|
||||
Context ctx;
|
||||
TestBasic(&ctx);
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST(ColumnSampler, GPUTest) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestBasic(&ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
// Test if different threads using the same seed produce the same result
|
||||
TEST(ColumnSampler, ThreadSynchronisation) {
|
||||
@@ -81,16 +94,16 @@ TEST(ColumnSampler, ThreadSynchronisation) {
|
||||
ASSERT_TRUE(success);
|
||||
}
|
||||
|
||||
TEST(ColumnSampler, WeightedSampling) {
|
||||
auto test_basic = [](int first) {
|
||||
Context ctx;
|
||||
namespace {
|
||||
void TestWeightedSampling(Context const* ctx) {
|
||||
auto test_basic = [ctx](int first) {
|
||||
std::vector<float> feature_weights(2);
|
||||
feature_weights[0] = std::abs(first - 1.0f);
|
||||
feature_weights[1] = first - 0.0f;
|
||||
ColumnSampler cs{0};
|
||||
cs.Init(&ctx, 2, feature_weights, 1.0, 1.0, 0.5);
|
||||
cs.Init(ctx, 2, feature_weights, 1.0, 1.0, 0.5);
|
||||
auto feature_sets = cs.GetFeatureSet(0);
|
||||
auto const &h_feat_set = feature_sets->HostVector();
|
||||
auto const& h_feat_set = feature_sets->HostVector();
|
||||
ASSERT_EQ(h_feat_set.size(), 1);
|
||||
ASSERT_EQ(h_feat_set[0], first - 0);
|
||||
};
|
||||
@@ -104,8 +117,7 @@ TEST(ColumnSampler, WeightedSampling) {
|
||||
SimpleRealUniformDistribution<float> dist(.0f, 12.0f);
|
||||
std::generate(feature_weights.begin(), feature_weights.end(), [&]() { return dist(&rng); });
|
||||
ColumnSampler cs{0};
|
||||
Context ctx;
|
||||
cs.Init(&ctx, kCols, feature_weights, 0.5f, 1.0f, 1.0f);
|
||||
cs.Init(ctx, kCols, feature_weights, 0.5f, 1.0f, 1.0f);
|
||||
std::vector<bst_feature_t> features(kCols);
|
||||
std::iota(features.begin(), features.end(), 0);
|
||||
std::vector<float> freq(kCols, 0);
|
||||
@@ -131,8 +143,22 @@ TEST(ColumnSampler, WeightedSampling) {
|
||||
EXPECT_NEAR(freq[i], feature_weights[i], 1e-2);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(ColumnSampler, WeightedMultiSampling) {
|
||||
TEST(ColumnSampler, WeightedSampling) {
|
||||
Context ctx;
|
||||
TestWeightedSampling(&ctx);
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST(ColumnSampler, GPUWeightedSampling) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestWeightedSampling(&ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
namespace {
|
||||
void TestWeightedMultiSampling(Context const* ctx) {
|
||||
size_t constexpr kCols = 32;
|
||||
std::vector<float> feature_weights(kCols, 0);
|
||||
for (size_t i = 0; i < feature_weights.size(); ++i) {
|
||||
@@ -140,13 +166,24 @@ TEST(ColumnSampler, WeightedMultiSampling) {
|
||||
}
|
||||
ColumnSampler cs{0};
|
||||
float bytree{0.5}, bylevel{0.5}, bynode{0.5};
|
||||
Context ctx;
|
||||
cs.Init(&ctx, feature_weights.size(), feature_weights, bytree, bylevel, bynode);
|
||||
cs.Init(ctx, feature_weights.size(), feature_weights, bytree, bylevel, bynode);
|
||||
auto feature_set = cs.GetFeatureSet(0);
|
||||
size_t n_sampled = kCols * bytree * bylevel * bynode;
|
||||
ASSERT_EQ(feature_set->Size(), n_sampled);
|
||||
feature_set = cs.GetFeatureSet(1);
|
||||
ASSERT_EQ(feature_set->Size(), n_sampled);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace
|
||||
|
||||
TEST(ColumnSampler, WeightedMultiSampling) {
|
||||
Context ctx;
|
||||
TestWeightedMultiSampling(&ctx);
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST(ColumnSampler, GPUWeightedMultiSampling) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestWeightedMultiSampling(&ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -28,7 +28,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
|
||||
Context ctx;
|
||||
ctx.nthread = 4;
|
||||
int static constexpr kRows = 8, kCols = 16;
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>(1u);
|
||||
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
|
||||
@@ -102,7 +102,7 @@ TEST(HistMultiEvaluator, Evaluate) {
|
||||
|
||||
TrainParam param;
|
||||
param.Init(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>(1u);
|
||||
|
||||
std::size_t n_samples = 3;
|
||||
bst_feature_t n_features = 2;
|
||||
@@ -166,7 +166,7 @@ TEST(HistEvaluator, Apply) {
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}});
|
||||
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>(1u);
|
||||
auto evaluator_ = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler};
|
||||
|
||||
CPUExpandEntry entry{0, 0};
|
||||
@@ -194,7 +194,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {
|
||||
Context ctx;
|
||||
// check the evaluator is returning the optimal split
|
||||
std::vector<FeatureType> ft{FeatureType::kCategorical};
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>(1u);
|
||||
HistEvaluator evaluator{&ctx, ¶m_, info_, sampler};
|
||||
evaluator.InitRoot(GradStats{total_gpair_});
|
||||
RegTree tree;
|
||||
@@ -224,7 +224,7 @@ auto CompareOneHotAndPartition(bool onehot) {
|
||||
auto dmat =
|
||||
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
|
||||
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>(1u);
|
||||
auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler};
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
HistMakerTrainParam hist_param;
|
||||
@@ -271,7 +271,7 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
|
||||
ASSERT_EQ(node_hist.size(), feature_histogram_.size());
|
||||
std::copy(feature_histogram_.cbegin(), feature_histogram_.cend(), node_hist.begin());
|
||||
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>(1u);
|
||||
MetaInfo info;
|
||||
info.num_col_ = 1;
|
||||
info.feature_types = {FeatureType::kCategorical};
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/logging.h>
|
||||
@@ -9,9 +12,7 @@
|
||||
#include "../../../src/tree/hist/evaluate_splits.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
TEST(CPUFeatureInteractionConstraint, Empty) {
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
@@ -77,7 +78,7 @@ TEST(CPUMonoConstraint, Basic) {
|
||||
param.UpdateAllowUnknown(Args{{"monotone_constraints", str_mono}});
|
||||
|
||||
auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true);
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>(1u);
|
||||
|
||||
HistEvaluator evalutor{&ctx, ¶m, Xy->Info(), sampler};
|
||||
evalutor.InitRoot(GradStats{2.0, 2.0});
|
||||
@@ -90,5 +91,4 @@ TEST(CPUMonoConstraint, Basic) {
|
||||
|
||||
ASSERT_TRUE(evalutor.Evaluator().has_constraint);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
Reference in New Issue
Block a user