Unify the partitioner for hist and approx.

Co-authored-by: dmitry.razdoburdin <drazdobu@jfldaal005.jf.intel.com>
Co-authored-by: jiamingy <jm.yuan@outlook.com>
This commit is contained in:
Dmitry Razdoburdin
2022-10-19 20:49:20 +02:00
committed by GitHub
parent c69af90319
commit 5bd849f1b5
13 changed files with 358 additions and 450 deletions

View File

@@ -5,8 +5,8 @@
#include <xgboost/base.h>
#include "../../../../src/common/hist_util.h"
#include "../../../../src/tree/common_row_partitioner.h"
#include "../../../../src/tree/hist/evaluate_splits.h"
#include "../../../../src/tree/updater_quantile_hist.h"
#include "../test_evaluate_splits.h"
#include "../../helpers.h"

View File

@@ -4,7 +4,7 @@
#include <gtest/gtest.h>
#include "../../../src/common/numeric.h"
#include "../../../src/tree/updater_approx.h"
#include "../../../src/tree/common_row_partitioner.h"
#include "../helpers.h"
#include "test_partitioner.h"
@@ -12,13 +12,13 @@ namespace xgboost {
namespace tree {
TEST(Approx, Partitioner) {
size_t n_samples = 1024, n_features = 1, base_rowid = 0;
ApproxRowPartitioner partitioner{n_samples, base_rowid};
GenericParameter ctx;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
ASSERT_EQ(partitioner.base_rowid, base_rowid);
ASSERT_EQ(partitioner.Size(), 1);
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
GenericParameter ctx;
ctx.InitAllowUnknown(Args{});
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
@@ -32,7 +32,7 @@ TEST(Approx, Partitioner) {
{
auto min_value = page.cut.MinValues()[split_ind];
RegTree tree;
ApproxRowPartitioner partitioner{n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
GetSplit(&tree, min_value, &candidates);
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
ASSERT_EQ(partitioner.Size(), 3);
@@ -40,7 +40,7 @@ TEST(Approx, Partitioner) {
ASSERT_EQ(partitioner[2].Size(), n_samples);
}
{
ApproxRowPartitioner partitioner{n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
auto ptr = page.cut.Ptrs()[split_ind + 1];
float split_value = page.cut.Values().at(ptr / 2);
RegTree tree;
@@ -65,14 +65,15 @@ TEST(Approx, Partitioner) {
}
}
}
namespace {
void TestLeafPartition(size_t n_samples) {
size_t const n_features = 2, base_rowid = 0;
GenericParameter ctx;
common::RowSetCollection row_set;
ApproxRowPartitioner partitioner{n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
GenericParameter ctx;
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
RegTree tree;
std::vector<float> hess(n_samples, 0);
@@ -81,11 +82,9 @@ void TestLeafPartition(size_t n_samples) {
size_t const kSampleFactor{3};
return i % kSampleFactor != 0;
};
size_t n{0};
for (size_t i = 0; i < hess.size(); ++i) {
if (not_sampled(i)) {
hess[i] = 1.0f;
++n;
}
}

View File

@@ -12,8 +12,7 @@ TEST(GrowHistMaker, InteractionConstraint) {
size_t constexpr kRows = 32;
size_t constexpr kCols = 16;
GenericParameter param;
param.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
Context ctx;
auto p_dmat = RandomDataGenerator{kRows, kCols, 0.6f}.Seed(3).GenerateDMatrix();
@@ -35,7 +34,7 @@ TEST(GrowHistMaker, InteractionConstraint) {
tree.param.num_feature = kCols;
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_histmaker", &param, ObjInfo{ObjInfo::kRegression})};
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
updater->Configure(Args{
{"interaction_constraints", "[[0, 1]]"},
{"num_feature", std::to_string(kCols)}});
@@ -54,7 +53,7 @@ TEST(GrowHistMaker, InteractionConstraint) {
tree.param.num_feature = kCols;
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_histmaker", &param, ObjInfo{ObjInfo::kRegression})};
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
updater->Configure(Args{{"num_feature", std::to_string(kCols)}});
std::vector<HostDeviceVector<bst_node_t>> position(1);
updater->Update(&gradients, p_dmat.get(), position, {&tree});

View File

@@ -11,7 +11,7 @@
#include "../../../src/tree/param.h"
#include "../../../src/tree/split_evaluator.h"
#include "../../../src/tree/updater_quantile_hist.h"
#include "../../../src/tree/common_row_partitioner.h"
#include "../helpers.h"
#include "test_partitioner.h"
#include "xgboost/data.h"
@@ -23,7 +23,7 @@ TEST(QuantileHist, Partitioner) {
GenericParameter ctx;
ctx.InitAllowUnknown(Args{});
HistRowPartitioner partitioner{n_samples, base_rowid, ctx.Threads()};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
ASSERT_EQ(partitioner.base_rowid, base_rowid);
ASSERT_EQ(partitioner.Size(), 1);
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
@@ -41,7 +41,7 @@ TEST(QuantileHist, Partitioner) {
{
auto min_value = gmat.cut.MinValues()[split_ind];
RegTree tree;
HistRowPartitioner partitioner{n_samples, base_rowid, ctx.Threads()};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
GetSplit(&tree, min_value, &candidates);
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
ASSERT_EQ(partitioner.Size(), 3);
@@ -49,7 +49,7 @@ TEST(QuantileHist, Partitioner) {
ASSERT_EQ(partitioner[2].Size(), n_samples);
}
{
HistRowPartitioner partitioner{n_samples, base_rowid, ctx.Threads()};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
auto ptr = gmat.cut.Ptrs()[split_ind + 1];
float split_value = gmat.cut.Values().at(ptr / 2);
RegTree tree;

View File

@@ -1,6 +1,6 @@
#include <xgboost/tree_updater.h>
#include <xgboost/tree_model.h>
#include <gtest/gtest.h>
#include <xgboost/tree_model.h>
#include <xgboost/tree_updater.h>
#include "../helpers.h"
@@ -21,9 +21,10 @@ class UpdaterTreeStatTest : public ::testing::Test {
}
void RunTest(std::string updater) {
auto tparam = CreateEmptyGenericParam(0);
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
: CreateEmptyGenericParam(Context::kCpuId));
auto up = std::unique_ptr<TreeUpdater>{
TreeUpdater::Create(updater, &tparam, ObjInfo{ObjInfo::kRegression})};
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kRegression})};
up->Configure(Args{});
RegTree tree;
tree.param.num_feature = kCols;
@@ -41,22 +42,14 @@ class UpdaterTreeStatTest : public ::testing::Test {
};
#if defined(XGBOOST_USE_CUDA)
TEST_F(UpdaterTreeStatTest, GpuHist) {
this->RunTest("grow_gpu_hist");
}
TEST_F(UpdaterTreeStatTest, GpuHist) { this->RunTest("grow_gpu_hist"); }
#endif // defined(XGBOOST_USE_CUDA)
TEST_F(UpdaterTreeStatTest, Hist) {
this->RunTest("grow_quantile_histmaker");
}
TEST_F(UpdaterTreeStatTest, Hist) { this->RunTest("grow_quantile_histmaker"); }
TEST_F(UpdaterTreeStatTest, Exact) {
this->RunTest("grow_colmaker");
}
TEST_F(UpdaterTreeStatTest, Exact) { this->RunTest("grow_colmaker"); }
TEST_F(UpdaterTreeStatTest, Approx) {
this->RunTest("grow_histmaker");
}
TEST_F(UpdaterTreeStatTest, Approx) { this->RunTest("grow_histmaker"); }
class UpdaterEtaTest : public ::testing::Test {
protected:
@@ -74,14 +67,15 @@ class UpdaterEtaTest : public ::testing::Test {
}
void RunTest(std::string updater) {
auto tparam = CreateEmptyGenericParam(0);
GenericParameter ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
: CreateEmptyGenericParam(Context::kCpuId));
float eta = 0.4;
auto up_0 = std::unique_ptr<TreeUpdater>{
TreeUpdater::Create(updater, &tparam, ObjInfo{ObjInfo::kClassification})};
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
up_0->Configure(Args{{"eta", std::to_string(eta)}});
auto up_1 = std::unique_ptr<TreeUpdater>{
TreeUpdater::Create(updater, &tparam, ObjInfo{ObjInfo::kClassification})};
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
up_1->Configure(Args{{"eta", "1.0"}});
for (size_t iter = 0; iter < 4; ++iter) {
@@ -130,7 +124,7 @@ class TestMinSplitLoss : public ::testing::Test {
gpair_ = GenerateRandomGradients(kRows);
}
int32_t Update(std::string updater, float gamma) {
std::int32_t Update(std::string updater, float gamma) {
Args args{{"max_depth", "1"},
{"max_leaves", "0"},
@@ -146,9 +140,12 @@ class TestMinSplitLoss : public ::testing::Test {
// test gamma
{"gamma", std::to_string(gamma)}};
GenericParameter generic_param(CreateEmptyGenericParam(0));
std::cout << "updater:" << updater << std::endl;
GenericParameter ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
: CreateEmptyGenericParam(Context::kCpuId));
std::cout << ctx.gpu_id << std::endl;
auto up = std::unique_ptr<TreeUpdater>{
TreeUpdater::Create(updater, &generic_param, ObjInfo{ObjInfo::kRegression})};
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kRegression})};
up->Configure(args);
RegTree tree;