Specify the number of threads for parallel sort. (#8735)
* Specify the number of threads for parallel sort. - Pass context object into argsort. - Replace macros with inline functions.
This commit is contained in:
35
tests/cpp/common/test_algorithm.cc
Normal file
35
tests/cpp/common/test_algorithm.cc
Normal file
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h> // Context
|
||||
#include <xgboost/span.h>
|
||||
|
||||
#include <algorithm> // is_sorted
|
||||
|
||||
#include "../../../src/common/algorithm.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST(Algorithm, ArgSort) {
|
||||
Context ctx;
|
||||
std::vector<float> inputs{3.0, 2.0, 1.0};
|
||||
auto ret = ArgSort<bst_feature_t>(&ctx, inputs.cbegin(), inputs.cend());
|
||||
std::vector<bst_feature_t> sol{2, 1, 0};
|
||||
ASSERT_EQ(ret, sol);
|
||||
}
|
||||
|
||||
TEST(Algorithm, Sort) {
|
||||
Context ctx;
|
||||
ctx.Init(Args{{"nthread", "8"}});
|
||||
std::vector<float> inputs{3.0, 1.0, 2.0};
|
||||
|
||||
Sort(&ctx, inputs.begin(), inputs.end(), std::less<>{});
|
||||
ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend()));
|
||||
|
||||
inputs = {3.0, 1.0, 2.0};
|
||||
StableSort(&ctx, inputs.begin(), inputs.end(), std::less<>{});
|
||||
ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend()));
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
@@ -52,9 +52,9 @@ void TestSegmentedArgSort() {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Algorithms, SegmentedArgSort) { TestSegmentedArgSort(); }
|
||||
TEST(Algorithm, SegmentedArgSort) { TestSegmentedArgSort(); }
|
||||
|
||||
TEST(Algorithms, ArgSort) {
|
||||
TEST(Algorithm, GpuArgSort) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
|
||||
@@ -80,7 +80,7 @@ TEST(Algorithms, ArgSort) {
|
||||
thrust::is_sorted(sorted_idx.begin() + 10, sorted_idx.end(), thrust::greater<size_t>{}));
|
||||
}
|
||||
|
||||
TEST(Algorithms, SegmentedSequence) {
|
||||
TEST(Algorithm, SegmentedSequence) {
|
||||
dh::device_vector<std::size_t> idx(16);
|
||||
dh::device_vector<std::size_t> ptr(3);
|
||||
Context ctx = CreateEmptyGenericParam(0);
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/span.h>
|
||||
#include "../../../src/common/common.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST(ArgSort, Basic) {
|
||||
std::vector<float> inputs {3.0, 2.0, 1.0};
|
||||
auto ret = ArgSort<bst_feature_t>(Span<float>{inputs});
|
||||
std::vector<bst_feature_t> sol{2, 1, 0};
|
||||
ASSERT_EQ(ret, sol);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
@@ -2,16 +2,18 @@
|
||||
#include "../../../src/common/random.h"
|
||||
#include "../helpers.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "xgboost/context.h" // Context
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST(ColumnSampler, Test) {
|
||||
Context ctx;
|
||||
int n = 128;
|
||||
ColumnSampler cs;
|
||||
std::vector<float> feature_weights;
|
||||
|
||||
// No node sampling
|
||||
cs.Init(n, feature_weights, 1.0f, 0.5f, 0.5f);
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 0.5f, 0.5f);
|
||||
auto set0 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set0->Size(), 32);
|
||||
|
||||
@@ -24,7 +26,7 @@ TEST(ColumnSampler, Test) {
|
||||
ASSERT_EQ(set2->Size(), 32);
|
||||
|
||||
// Node sampling
|
||||
cs.Init(n, feature_weights, 0.5f, 1.0f, 0.5f);
|
||||
cs.Init(&ctx, n, feature_weights, 0.5f, 1.0f, 0.5f);
|
||||
auto set3 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set3->Size(), 32);
|
||||
|
||||
@@ -34,24 +36,25 @@ TEST(ColumnSampler, Test) {
|
||||
ASSERT_EQ(set4->Size(), 32);
|
||||
|
||||
// No level or node sampling, should be the same at different depth
|
||||
cs.Init(n, feature_weights, 1.0f, 1.0f, 0.5f);
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 0.5f);
|
||||
ASSERT_EQ(cs.GetFeatureSet(0)->HostVector(),
|
||||
cs.GetFeatureSet(1)->HostVector());
|
||||
|
||||
cs.Init(n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
auto set5 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set5->Size(), n);
|
||||
cs.Init(n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
auto set6 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set5->HostVector(), set6->HostVector());
|
||||
|
||||
// Should always be a minimum of one feature
|
||||
cs.Init(n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
|
||||
cs.Init(&ctx, n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
|
||||
ASSERT_EQ(cs.GetFeatureSet(0)->Size(), 1);
|
||||
}
|
||||
|
||||
// Test if different threads using the same seed produce the same result
|
||||
TEST(ColumnSampler, ThreadSynchronisation) {
|
||||
Context ctx;
|
||||
const int64_t num_threads = 100;
|
||||
int n = 128;
|
||||
size_t iterations = 10;
|
||||
@@ -63,7 +66,7 @@ TEST(ColumnSampler, ThreadSynchronisation) {
|
||||
{
|
||||
for (auto j = 0ull; j < iterations; j++) {
|
||||
ColumnSampler cs(j);
|
||||
cs.Init(n, feature_weights, 0.5f, 0.5f, 0.5f);
|
||||
cs.Init(&ctx, n, feature_weights, 0.5f, 0.5f, 0.5f);
|
||||
for (auto level = 0ull; level < levels; level++) {
|
||||
auto result = cs.GetFeatureSet(level)->ConstHostVector();
|
||||
#pragma omp single
|
||||
@@ -80,11 +83,12 @@ TEST(ColumnSampler, ThreadSynchronisation) {
|
||||
|
||||
TEST(ColumnSampler, WeightedSampling) {
|
||||
auto test_basic = [](int first) {
|
||||
Context ctx;
|
||||
std::vector<float> feature_weights(2);
|
||||
feature_weights[0] = std::abs(first - 1.0f);
|
||||
feature_weights[1] = first - 0.0f;
|
||||
ColumnSampler cs{0};
|
||||
cs.Init(2, feature_weights, 1.0, 1.0, 0.5);
|
||||
cs.Init(&ctx, 2, feature_weights, 1.0, 1.0, 0.5);
|
||||
auto feature_sets = cs.GetFeatureSet(0);
|
||||
auto const &h_feat_set = feature_sets->HostVector();
|
||||
ASSERT_EQ(h_feat_set.size(), 1);
|
||||
@@ -100,7 +104,8 @@ TEST(ColumnSampler, WeightedSampling) {
|
||||
SimpleRealUniformDistribution<float> dist(.0f, 12.0f);
|
||||
std::generate(feature_weights.begin(), feature_weights.end(), [&]() { return dist(&rng); });
|
||||
ColumnSampler cs{0};
|
||||
cs.Init(kCols, feature_weights, 0.5f, 1.0f, 1.0f);
|
||||
Context ctx;
|
||||
cs.Init(&ctx, kCols, feature_weights, 0.5f, 1.0f, 1.0f);
|
||||
std::vector<bst_feature_t> features(kCols);
|
||||
std::iota(features.begin(), features.end(), 0);
|
||||
std::vector<float> freq(kCols, 0);
|
||||
@@ -135,7 +140,8 @@ TEST(ColumnSampler, WeightedMultiSampling) {
|
||||
}
|
||||
ColumnSampler cs{0};
|
||||
float bytree{0.5}, bylevel{0.5}, bynode{0.5};
|
||||
cs.Init(feature_weights.size(), feature_weights, bytree, bylevel, bynode);
|
||||
Context ctx;
|
||||
cs.Init(&ctx, feature_weights.size(), feature_weights, bytree, bylevel, bynode);
|
||||
auto feature_set = cs.GetFeatureSet(0);
|
||||
size_t n_sampled = kCols * bytree * bylevel * bynode;
|
||||
ASSERT_EQ(feature_set->Size(), n_sampled);
|
||||
|
||||
@@ -9,12 +9,14 @@
|
||||
#include "../../../../src/tree/hist/evaluate_splits.h"
|
||||
#include "../test_evaluate_splits.h"
|
||||
#include "../../helpers.h"
|
||||
#include "xgboost/context.h" // Context
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
void TestEvaluateSplits(bool force_read_by_column) {
|
||||
Context ctx;
|
||||
ctx.nthread = 4;
|
||||
int static constexpr kRows = 8, kCols = 16;
|
||||
int32_t n_threads = std::min(omp_get_max_threads(), 4);
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
|
||||
TrainParam param;
|
||||
@@ -22,7 +24,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
|
||||
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();
|
||||
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{param, dmat->Info(), n_threads, sampler};
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, param, dmat->Info(), sampler};
|
||||
common::HistCollection hist;
|
||||
std::vector<GradientPair> row_gpairs = {
|
||||
{1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
|
||||
@@ -86,13 +88,15 @@ TEST(HistEvaluator, Evaluate) {
|
||||
}
|
||||
|
||||
TEST(HistEvaluator, Apply) {
|
||||
Context ctx;
|
||||
ctx.nthread = 4;
|
||||
RegTree tree;
|
||||
int static constexpr kNRows = 8, kNCols = 16;
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}});
|
||||
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto evaluator_ = HistEvaluator<CPUExpandEntry>{param, dmat->Info(), 4, sampler};
|
||||
auto evaluator_ = HistEvaluator<CPUExpandEntry>{&ctx, param, dmat->Info(), sampler};
|
||||
|
||||
CPUExpandEntry entry{0, 0, 10.0f};
|
||||
entry.split.left_sum = GradStats{0.4, 0.6f};
|
||||
@@ -115,10 +119,11 @@ TEST(HistEvaluator, Apply) {
|
||||
}
|
||||
|
||||
TEST_F(TestPartitionBasedSplit, CPUHist) {
|
||||
Context ctx;
|
||||
// check the evaluator is returning the optimal split
|
||||
std::vector<FeatureType> ft{FeatureType::kCategorical};
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
HistEvaluator<CPUExpandEntry> evaluator{param_, info_, AllThreadsForTest(), sampler};
|
||||
HistEvaluator<CPUExpandEntry> evaluator{&ctx, param_, info_, sampler};
|
||||
evaluator.InitRoot(GradStats{total_gpair_});
|
||||
RegTree tree;
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
@@ -128,6 +133,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {
|
||||
|
||||
namespace {
|
||||
auto CompareOneHotAndPartition(bool onehot) {
|
||||
Context ctx;
|
||||
int static constexpr kRows = 128, kCols = 1;
|
||||
std::vector<FeatureType> ft(kCols, FeatureType::kCategorical);
|
||||
|
||||
@@ -147,8 +153,7 @@ auto CompareOneHotAndPartition(bool onehot) {
|
||||
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
|
||||
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto evaluator =
|
||||
HistEvaluator<CPUExpandEntry>{param, dmat->Info(), AllThreadsForTest(), sampler};
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, param, dmat->Info(), sampler};
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
|
||||
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({32, param.sparse_threshold})) {
|
||||
@@ -198,8 +203,8 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
|
||||
MetaInfo info;
|
||||
info.num_col_ = 1;
|
||||
info.feature_types = {FeatureType::kCategorical};
|
||||
auto evaluator =
|
||||
HistEvaluator<CPUExpandEntry>{param_, info, AllThreadsForTest(), sampler};
|
||||
Context ctx;
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, param_, info, sampler};
|
||||
evaluator.InitRoot(GradStats{parent_sum_});
|
||||
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
|
||||
Reference in New Issue
Block a user