Unify the partitioner for hist and approx.

Co-authored-by: dmitry.razdoburdin <drazdobu@jfldaal005.jf.intel.com>
Co-authored-by: jiamingy <jm.yuan@outlook.com>
This commit is contained in:
Dmitry Razdoburdin
2022-10-19 20:49:20 +02:00
committed by GitHub
parent c69af90319
commit 5bd849f1b5
13 changed files with 358 additions and 450 deletions

View File

@@ -4,7 +4,7 @@
#include <gtest/gtest.h>
#include "../../../src/common/numeric.h"
#include "../../../src/tree/updater_approx.h"
#include "../../../src/tree/common_row_partitioner.h"
#include "../helpers.h"
#include "test_partitioner.h"
@@ -12,13 +12,13 @@ namespace xgboost {
namespace tree {
TEST(Approx, Partitioner) {
size_t n_samples = 1024, n_features = 1, base_rowid = 0;
ApproxRowPartitioner partitioner{n_samples, base_rowid};
GenericParameter ctx;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
ASSERT_EQ(partitioner.base_rowid, base_rowid);
ASSERT_EQ(partitioner.Size(), 1);
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
GenericParameter ctx;
ctx.InitAllowUnknown(Args{});
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
@@ -32,7 +32,7 @@ TEST(Approx, Partitioner) {
{
auto min_value = page.cut.MinValues()[split_ind];
RegTree tree;
ApproxRowPartitioner partitioner{n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
GetSplit(&tree, min_value, &candidates);
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
ASSERT_EQ(partitioner.Size(), 3);
@@ -40,7 +40,7 @@ TEST(Approx, Partitioner) {
ASSERT_EQ(partitioner[2].Size(), n_samples);
}
{
ApproxRowPartitioner partitioner{n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
auto ptr = page.cut.Ptrs()[split_ind + 1];
float split_value = page.cut.Values().at(ptr / 2);
RegTree tree;
@@ -65,14 +65,15 @@ TEST(Approx, Partitioner) {
}
}
}
namespace {
void TestLeafPartition(size_t n_samples) {
size_t const n_features = 2, base_rowid = 0;
GenericParameter ctx;
common::RowSetCollection row_set;
ApproxRowPartitioner partitioner{n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
GenericParameter ctx;
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
RegTree tree;
std::vector<float> hess(n_samples, 0);
@@ -81,11 +82,9 @@ void TestLeafPartition(size_t n_samples) {
size_t const kSampleFactor{3};
return i % kSampleFactor != 0;
};
size_t n{0};
for (size_t i = 0; i < hess.size(); ++i) {
if (not_sampled(i)) {
hess[i] = 1.0f;
++n;
}
}