From 6c403187eccb5d00c7b2fcbd37e9293ccbd549c2 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 12 Jul 2024 01:07:12 +0800 Subject: [PATCH] Fix column split race condition. (#10572) --- src/tree/common_row_partitioner.h | 48 ++++++++++++++--- tests/cpp/tree/test_approx.cc | 23 ++++++++ tests/cpp/tree/test_column_split.h | 79 ++++++++++++++++++++++++++++ tests/cpp/tree/test_histmaker.cc | 79 ++-------------------------- tests/cpp/tree/test_quantile_hist.cc | 69 +++++++----------------- 5 files changed, 166 insertions(+), 132 deletions(-) create mode 100644 tests/cpp/tree/test_column_split.h diff --git a/src/tree/common_row_partitioner.h b/src/tree/common_row_partitioner.h index cd267673b..ff75000df 100644 --- a/src/tree/common_row_partitioner.h +++ b/src/tree/common_row_partitioner.h @@ -36,10 +36,11 @@ class ColumnSplitHelper { common::PartitionBuilder* partition_builder, common::RowSetCollection* row_set_collection) : partition_builder_{partition_builder}, row_set_collection_{row_set_collection} { - decision_storage_.resize(num_row); - decision_bits_ = BitVector(common::Span(decision_storage_)); - missing_storage_.resize(num_row); - missing_bits_ = BitVector(common::Span(missing_storage_)); + auto n_bytes = BitVector::ComputeStorageSize(num_row); + decision_storage_.resize(n_bytes); + decision_bits_ = BitVector{common::Span{decision_storage_}}; + missing_storage_.resize(n_bytes); + missing_bits_ = BitVector{common::Span{missing_storage_}}; } template @@ -51,14 +52,43 @@ class ColumnSplitHelper { // we first collect all the decisions and whether the feature is missing into bit vectors. std::fill(decision_storage_.begin(), decision_storage_.end(), 0); std::fill(missing_storage_.begin(), missing_storage_.end(), 0); - common::ParallelFor2d(space, n_threads, [&](size_t node_in_set, common::Range1d r) { - const int32_t nid = nodes[node_in_set].nid; + + this->tloc_decision_.resize(decision_storage_.size() * n_threads); + this->tloc_missing_.resize(decision_storage_.size() * n_threads); + std::fill_n(this->tloc_decision_.data(), this->tloc_decision_.size(), 0); + std::fill_n(this->tloc_missing_.data(), this->tloc_missing_.size(), 0); + + // Make thread-local storage. + using T = decltype(decision_storage_)::value_type; + auto make_tloc = [&](std::vector& storage, std::int32_t tidx) { + auto span = common::Span{storage}; + auto n = decision_storage_.size(); + auto bitvec = BitVector{span.subspan(n * tidx, n)}; + return bitvec; + }; + + common::ParallelFor2d(space, n_threads, [&](std::size_t node_in_set, common::Range1d r) { + bst_node_t const nid = nodes[node_in_set].nid; + auto tidx = omp_get_thread_num(); + auto decision = make_tloc(this->tloc_decision_, tidx); + auto missing = make_tloc(this->tloc_missing_, tidx); bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0; partition_builder_->MaskRows( node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree, - (*row_set_collection_)[nid].begin(), &decision_bits_, &missing_bits_); + (*row_set_collection_)[nid].begin(), &decision, &missing); }); + // Reduce thread local + auto decision = make_tloc(this->tloc_decision_, 0); + auto missing = make_tloc(this->tloc_missing_, 0); + for (std::int32_t tidx = 1; tidx < n_threads; ++tidx) { + decision |= make_tloc(this->tloc_decision_, tidx); + missing |= make_tloc(this->tloc_missing_, tidx); + } + CHECK_EQ(decision_storage_.size(), decision.NumValues()); + std::copy_n(decision.Data(), decision_storage_.size(), decision_storage_.data()); + std::copy_n(missing.Data(), missing_storage_.size(), missing_storage_.data()); + // Then aggregate the bit vectors across all the workers. auto rc = collective::Success() << [&] { return collective::Allreduce(ctx, &decision_storage_, collective::Op::kBitwiseOR); @@ -85,6 +115,10 @@ class ColumnSplitHelper { BitVector decision_bits_{}; std::vector missing_storage_{}; BitVector missing_bits_{}; + + std::vector tloc_decision_; + std::vector tloc_missing_; + common::PartitionBuilder* partition_builder_; common::RowSetCollection* row_set_collection_; }; diff --git a/tests/cpp/tree/test_approx.cc b/tests/cpp/tree/test_approx.cc index d647d3a97..8f28bfa21 100644 --- a/tests/cpp/tree/test_approx.cc +++ b/tests/cpp/tree/test_approx.cc @@ -6,6 +6,7 @@ #include "../../../src/tree/common_row_partitioner.h" #include "../collective/test_worker.h" // for TestDistributedGlobal #include "../helpers.h" +#include "test_column_split.h" // for TestColumnSplit #include "test_partitioner.h" namespace xgboost::tree { @@ -154,4 +155,26 @@ TEST(Approx, PartitionerColSplit) { mid_partitioner); }); } + +namespace { +class TestApproxColSplit : public ::testing::TestWithParam> { + public: + void Run() { + auto [categorical, sparsity] = GetParam(); + TestColumnSplit(1u, categorical, "grow_histmaker", sparsity); + } +}; +} // namespace + +TEST_P(TestApproxColSplit, Basic) { this->Run(); } + +INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestApproxColSplit, ::testing::ValuesIn([]() { + std::vector> params; + for (auto categorical : {true, false}) { + for (auto sparsity : {0.0f, 0.6f}) { + params.emplace_back(categorical, sparsity); + } + } + return params; + }())); } // namespace xgboost::tree diff --git a/tests/cpp/tree/test_column_split.h b/tests/cpp/tree/test_column_split.h new file mode 100644 index 000000000..b03597f38 --- /dev/null +++ b/tests/cpp/tree/test_column_split.h @@ -0,0 +1,79 @@ +/** + * Copyright 2023-2024, XGBoost Contributors + */ +#pragma once + +#include // for FeatureType, DMatrix +#include // for RegTree +#include // for TreeUpdater + +#include // for size_t +#include // for shared_ptr +#include // for vector + +#include "../../../src/tree/param.h" // for TrainParam +#include "../collective/test_worker.h" // for TestDistributedGlobal +#include "../helpers.h" // for RandomDataGenerator + +namespace xgboost::tree { +inline std::shared_ptr GenerateCatDMatrix(std::size_t rows, std::size_t cols, + float sparsity, bool categorical) { + if (categorical) { + std::vector ft(cols); + for (size_t i = 0; i < ft.size(); ++i) { + ft[i] = (i % 3 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical; + } + return RandomDataGenerator(rows, cols, 0.6f).Seed(3).Type(ft).MaxCategory(17).GenerateDMatrix(); + } else { + return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix(); + } +} + +inline void TestColumnSplit(bst_target_t n_targets, bool categorical, std::string name, + float sparsity) { + auto constexpr kRows = 32; + auto constexpr kCols = 16; + + RegTree expected_tree{n_targets, static_cast(kCols)}; + ObjInfo task{ObjInfo::kRegression}; + Context ctx; + { + auto p_dmat = GenerateCatDMatrix(kRows, kCols, sparsity, categorical); + auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets); + std::unique_ptr updater{TreeUpdater::Create(name, &ctx, &task)}; + std::vector> position(1); + TrainParam param; + param.Init(Args{}); + updater->Configure(Args{}); + updater->Update(¶m, &gpair, p_dmat.get(), position, {&expected_tree}); + } + + auto verify = [&] { + Context ctx; + auto p_dmat = GenerateCatDMatrix(kRows, kCols, sparsity, categorical); + auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets); + + ObjInfo task{ObjInfo::kRegression}; + std::unique_ptr updater{TreeUpdater::Create(name, &ctx, &task)}; + std::vector> position(1); + + std::unique_ptr sliced{ + p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())}; + + RegTree tree{n_targets, static_cast(kCols)}; + TrainParam param; + param.Init(Args{}); + updater->Configure(Args{}); + updater->Update(¶m, &gpair, sliced.get(), position, {&tree}); + + Json json{Object{}}; + tree.SaveModel(&json); + Json expected_json{Object{}}; + expected_tree.SaveModel(&expected_json); + ASSERT_EQ(json, expected_json); + }; + + auto constexpr kWorldSize = 2; + collective::TestDistributedGlobal(kWorldSize, [&] { verify(); }); +} +} // namespace xgboost::tree diff --git a/tests/cpp/tree/test_histmaker.cc b/tests/cpp/tree/test_histmaker.cc index b8b9e46ca..888790aa7 100644 --- a/tests/cpp/tree/test_histmaker.cc +++ b/tests/cpp/tree/test_histmaker.cc @@ -1,32 +1,19 @@ /** - * Copyright 2019-2023 by XGBoost Contributors + * Copyright 2019-2024, XGBoost Contributors */ #include #include #include -#include "../../../src/tree/param.h" // for TrainParam -#include "../collective/test_worker.h" // for TestDistributedGlobal +#include "../../../src/tree/param.h" // for TrainParam #include "../helpers.h" +#include "test_column_split.h" // for GenerateCatDMatrix namespace xgboost::tree { -std::shared_ptr GenerateDMatrix(std::size_t rows, std::size_t cols, - bool categorical = false) { - if (categorical) { - std::vector ft(cols); - for (size_t i = 0; i < ft.size(); ++i) { - ft[i] = (i % 3 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical; - } - return RandomDataGenerator(rows, cols, 0.6f).Seed(3).Type(ft).MaxCategory(17).GenerateDMatrix(); - } else { - return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix(); - } -} - TEST(GrowHistMaker, InteractionConstraint) { auto constexpr kRows = 32; auto constexpr kCols = 16; - auto p_dmat = GenerateDMatrix(kRows, kCols); + auto p_dmat = GenerateCatDMatrix(kRows, kCols, 0.0, false); Context ctx; linalg::Matrix gpair({kRows}, ctx.Device()); @@ -69,62 +56,4 @@ TEST(GrowHistMaker, InteractionConstraint) { ASSERT_NE(tree[tree[0].RightChild()].SplitIndex(), 0); } } - -namespace { -void VerifyColumnSplit(int32_t rows, bst_feature_t cols, bool categorical, - RegTree const& expected_tree) { - Context ctx; - auto p_dmat = GenerateDMatrix(rows, cols, categorical); - linalg::Matrix gpair({rows}, ctx.Device()); - gpair.Data()->Copy(GenerateRandomGradients(rows)); - - - ObjInfo task{ObjInfo::kRegression}; - std::unique_ptr updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)}; - std::vector> position(1); - - std::unique_ptr sliced{ - p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())}; - - RegTree tree{1u, cols}; - TrainParam param; - param.Init(Args{}); - updater->Configure(Args{}); - updater->Update(¶m, &gpair, sliced.get(), position, {&tree}); - - Json json{Object{}}; - tree.SaveModel(&json); - Json expected_json{Object{}}; - expected_tree.SaveModel(&expected_json); - ASSERT_EQ(json, expected_json); -} - -void TestColumnSplit(bool categorical) { - auto constexpr kRows = 32; - auto constexpr kCols = 16; - - RegTree expected_tree{1u, kCols}; - ObjInfo task{ObjInfo::kRegression}; - { - Context ctx; - auto p_dmat = GenerateDMatrix(kRows, kCols, categorical); - linalg::Matrix gpair({kRows}, ctx.Device()); - gpair.Data()->Copy(GenerateRandomGradients(kRows)); - std::unique_ptr updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)}; - std::vector> position(1); - TrainParam param; - param.Init(Args{}); - updater->Configure(Args{}); - updater->Update(¶m, &gpair, p_dmat.get(), position, {&expected_tree}); - } - - auto constexpr kWorldSize = 2; - collective::TestDistributedGlobal( - kWorldSize, [&] { VerifyColumnSplit(kRows, kCols, categorical, expected_tree); }); -} -} // anonymous namespace - -TEST(GrowHistMaker, ColumnSplitNumerical) { TestColumnSplit(false); } - -TEST(GrowHistMaker, ColumnSplitCategorical) { TestColumnSplit(true); } } // namespace xgboost::tree diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index 29ae02f8d..74fd6ec5f 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -11,9 +11,9 @@ #include "../../../src/tree/common_row_partitioner.h" #include "../../../src/tree/hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry -#include "../../../src/tree/param.h" #include "../collective/test_worker.h" // for TestDistributedGlobal #include "../helpers.h" +#include "test_column_split.h" // for TestColumnSplit #include "test_partitioner.h" #include "xgboost/data.h" @@ -208,57 +208,26 @@ TEST(QuantileHist, PartitionerColSplit) { TestColumnSplitPartitioner(3); } namespace { -void VerifyColumnSplit(Context const* ctx, bst_idx_t rows, bst_feature_t cols, bst_target_t n_targets, - RegTree const& expected_tree) { - auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true); - linalg::Matrix gpair = GenerateRandomGradients(ctx, rows, n_targets); - - ObjInfo task{ObjInfo::kRegression}; - std::unique_ptr updater{TreeUpdater::Create("grow_quantile_histmaker", ctx, &task)}; - std::vector> position(1); - - std::unique_ptr sliced{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())}; - - RegTree tree{n_targets, cols}; - TrainParam param; - param.Init(Args{}); - updater->Configure(Args{}); - updater->Update(¶m, &gpair, sliced.get(), position, {&tree}); - - Json json{Object{}}; - tree.SaveModel(&json); - Json expected_json{Object{}}; - expected_tree.SaveModel(&expected_json); - ASSERT_EQ(json, expected_json); -} - -void TestColumnSplit(bst_target_t n_targets) { - auto constexpr kRows = 32; - auto constexpr kCols = 16; - - RegTree expected_tree{n_targets, kCols}; - ObjInfo task{ObjInfo::kRegression}; - Context ctx; - { - auto Xy = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true); - auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets); - std::unique_ptr updater{ - TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task)}; - std::vector> position(1); - TrainParam param; - param.Init(Args{}); - updater->Configure(Args{}); - updater->Update(¶m, &gpair, Xy.get(), position, {&expected_tree}); +class TestHistColSplit : public ::testing::TestWithParam> { + public: + void Run() { + auto [n_targets, categorical, sparsity] = GetParam(); + TestColumnSplit(n_targets, categorical, "grow_quantile_histmaker", sparsity); } - - auto constexpr kWorldSize = 2; - collective::TestDistributedGlobal(kWorldSize, [&] { - VerifyColumnSplit(&ctx, kRows, kCols, n_targets, std::cref(expected_tree)); - }); -} +}; } // anonymous namespace -TEST(QuantileHist, ColumnSplit) { TestColumnSplit(1); } +TEST_P(TestHistColSplit, Basic) { this->Run(); } -TEST(QuantileHist, ColumnSplitMultiTarget) { TestColumnSplit(3); } +INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestHistColSplit, ::testing::ValuesIn([]() { + std::vector> params; + for (auto categorical : {true, false}) { + for (auto sparsity : {0.0f, 0.6f}) { + for (bst_target_t n_targets : {1u, 3u}) { + params.emplace_back(n_targets, categorical, sparsity); + } + } + } + return params; + }())); } // namespace xgboost::tree