Small cleanup for histogram routines. (#9427)

* Small cleanup for histogram routines.

- Extract hist train param from GPU hist.
- Make histogram const after construction.
- Unify parameter names.
This commit is contained in:
Jiaming Yuan
2023-08-02 18:28:26 +08:00
committed by GitHub
parent c2b85ab68a
commit e93a274823
17 changed files with 182 additions and 111 deletions

View File

@@ -105,13 +105,13 @@ void TestBuildHist(bool use_shared_memory_histograms) {
gpair.SetDevice(0);
thrust::host_vector<common::CompressedByteT> h_gidx_buffer (page->gidx_buffer.HostVector());
maker.row_partitioner.reset(new RowPartitioner(0, kNRows));
maker.row_partitioner = std::make_unique<RowPartitioner>(0, kNRows);
maker.hist.Init(0, page->Cuts().TotalBins());
maker.hist.AllocateHistograms({0});
maker.gpair = gpair.DeviceSpan();
maker.quantiser.reset(new GradientQuantiser(maker.gpair));
maker.quantiser = std::make_unique<GradientQuantiser>(maker.gpair);
maker.page = page.get();
maker.InitFeatureGroupsOnce();
@@ -246,6 +246,7 @@ void UpdateTree(Context const* ctx, HostDeviceVector<GradientPair>* gpair, DMatr
ObjInfo task{ObjInfo::kRegression};
tree::GPUHistMaker hist_maker{ctx, &task};
hist_maker.Configure(Args{});
std::vector<HostDeviceVector<bst_node_t>> position(1);
hist_maker.Update(&param, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
@@ -397,14 +398,14 @@ TEST(GpuHist, ConfigIO) {
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_gpu_hist", &ctx, &task)};
updater->Configure(Args{});
Json j_updater { Object() };
Json j_updater{Object{}};
updater->SaveConfig(&j_updater);
ASSERT_TRUE(IsA<Object>(j_updater["gpu_hist_train_param"]));
ASSERT_TRUE(IsA<Object>(j_updater["hist_train_param"]));
updater->LoadConfig(j_updater);
Json j_updater_roundtrip { Object() };
Json j_updater_roundtrip{Object{}};
updater->SaveConfig(&j_updater_roundtrip);
ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["gpu_hist_train_param"]));
ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["hist_train_param"]));
ASSERT_EQ(j_updater, j_updater_roundtrip);
}

View File

@@ -39,6 +39,7 @@ TEST(GrowHistMaker, InteractionConstraint) {
param.UpdateAllowUnknown(
Args{{"interaction_constraints", "[[0, 1]]"}, {"num_feature", std::to_string(kCols)}});
std::vector<HostDeviceVector<bst_node_t>> position(1);
updater->Configure(Args{});
updater->Update(&param, p_gradients.get(), p_dmat.get(), position, {&tree});
ASSERT_EQ(tree.NumExtraNodes(), 4);
@@ -55,6 +56,7 @@ TEST(GrowHistMaker, InteractionConstraint) {
std::vector<HostDeviceVector<bst_node_t>> position(1);
TrainParam param;
param.Init(Args{});
updater->Configure(Args{});
updater->Update(&param, p_gradients.get(), p_dmat.get(), position, {&tree});
ASSERT_EQ(tree.NumExtraNodes(), 10);
@@ -81,6 +83,7 @@ void VerifyColumnSplit(int32_t rows, bst_feature_t cols, bool categorical,
RegTree tree{1u, cols};
TrainParam param;
param.Init(Args{});
updater->Configure(Args{});
updater->Update(&param, p_gradients.get(), sliced.get(), position, {&tree});
Json json{Object{}};
@@ -104,6 +107,7 @@ void TestColumnSplit(bool categorical) {
std::vector<HostDeviceVector<bst_node_t>> position(1);
TrainParam param;
param.Init(Args{});
updater->Configure(Args{});
updater->Update(&param, p_gradients.get(), p_dmat.get(), position, {&expected_tree});
}

View File

@@ -73,6 +73,7 @@ class TestPredictionCache : public ::testing::Test {
tree::TrainParam param;
param.UpdateAllowUnknown(Args{{"max_bin", "64"}});
updater->Configure(Args{});
std::vector<HostDeviceVector<bst_node_t>> position(1);
updater->Update(&param, &gpair, Xy_.get(), position, trees);
HostDeviceVector<float> out_prediction_cached;

View File

@@ -13,7 +13,6 @@
#include "../../../src/tree/common_row_partitioner.h"
#include "../../../src/tree/hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
#include "../../../src/tree/param.h"
#include "../../../src/tree/split_evaluator.h"
#include "../helpers.h"
#include "test_partitioner.h"
#include "xgboost/data.h"
@@ -49,7 +48,7 @@ void TestPartitioner(bst_target_t n_targets) {
auto min_value = gmat.cut.MinValues()[split_ind];
RegTree tree{n_targets, n_features};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
if constexpr (std::is_same_v<ExpandEntry, CPUExpandEntry>) {
GetSplit(&tree, min_value, &candidates);
} else {
GetMultiSplitForTest(&tree, min_value, &candidates);
@@ -217,6 +216,7 @@ void VerifyColumnSplit(bst_row_t rows, bst_feature_t cols, bst_target_t n_target
RegTree tree{n_targets, cols};
TrainParam param;
param.Init(Args{});
updater->Configure(Args{});
updater->Update(&param, p_gradients.get(), sliced.get(), position, {&tree});
Json json{Object{}};
@@ -241,6 +241,7 @@ void TestColumnSplit(bst_target_t n_targets) {
std::vector<HostDeviceVector<bst_node_t>> position(1);
TrainParam param;
param.Init(Args{});
updater->Configure(Args{});
updater->Update(&param, p_gradients.get(), Xy.get(), position, {&expected_tree});
}

View File

@@ -1459,6 +1459,7 @@ class TestWithDask:
tree_method: str,
) -> None:
params["tree_method"] = tree_method
params["debug_synchronize"] = True
params = dataset.set_params(params)
# It doesn't make sense to distribute a completely
# empty dataset.