Refactor gpu_hist split evaluation (#5610)

* Refactor

* Rewrite evaluate splits

* Add more tests
This commit is contained in:
Rory Mitchell
2020-04-30 08:58:12 +12:00
committed by GitHub
parent dfcdfabf1f
commit b9649e7b8e
9 changed files with 678 additions and 355 deletions

View File

@@ -0,0 +1,222 @@
#include <gtest/gtest.h>
#include "../../../../src/tree/gpu_hist/evaluate_splits.cuh"
#include "../../helpers.h"
#include "../../histogram_helpers.h"
namespace xgboost {
namespace tree {
TEST(GpuHist, EvaluateSingleSplit) {
thrust::device_vector<DeviceSplitCandidate> out_splits(1);
GradientPair parent_sum(0.0, 1.0);
GPUTrainingParam param{};
thrust::device_vector<bst_feature_t> feature_set =
std::vector<bst_feature_t>{0, 1};
thrust::device_vector<uint32_t> feature_segments =
std::vector<bst_row_t>{0, 2, 4};
thrust::device_vector<float> feature_values =
std::vector<float>{1.0, 2.0, 11.0, 12.0};
thrust::device_vector<float> feature_min_values =
std::vector<float>{0.0, 0.0};
// Setup gradients so that second feature gets higher gain
thrust::device_vector<GradientPair> feature_histogram =
std::vector<GradientPair>{
{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}};
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
EvaluateSplitInputs<GradientPair> input{1,
parent_sum,
param,
dh::ToSpan(feature_set),
dh::ToSpan(feature_segments),
dh::ToSpan(feature_values),
dh::ToSpan(feature_min_values),
dh::ToSpan(feature_histogram),
ValueConstraint(),
dh::ToSpan(monotonic_constraints)};
EvaluateSingleSplit(dh::ToSpan(out_splits), input);
DeviceSplitCandidate result = out_splits[0];
EXPECT_EQ(result.findex, 1);
EXPECT_EQ(result.fvalue, 11.0);
EXPECT_FLOAT_EQ(result.left_sum.GetGrad() + result.right_sum.GetGrad(),
parent_sum.GetGrad());
EXPECT_FLOAT_EQ(result.left_sum.GetHess() + result.right_sum.GetHess(),
parent_sum.GetHess());
}
TEST(GpuHist, EvaluateSingleSplitMissing) {
thrust::device_vector<DeviceSplitCandidate> out_splits(1);
GradientPair parent_sum(1.0, 1.5);
GPUTrainingParam param{};
thrust::device_vector<bst_feature_t> feature_set =
std::vector<bst_feature_t>{0};
thrust::device_vector<uint32_t> feature_segments =
std::vector<bst_row_t>{0, 2};
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0};
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0};
thrust::device_vector<GradientPair> feature_histogram =
std::vector<GradientPair>{{-0.5, 0.5}, {0.5, 0.5}};
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
EvaluateSplitInputs<GradientPair> input{1,
parent_sum,
param,
dh::ToSpan(feature_set),
dh::ToSpan(feature_segments),
dh::ToSpan(feature_values),
dh::ToSpan(feature_min_values),
dh::ToSpan(feature_histogram),
ValueConstraint(),
dh::ToSpan(monotonic_constraints)};
EvaluateSingleSplit(dh::ToSpan(out_splits), input);
DeviceSplitCandidate result = out_splits[0];
EXPECT_EQ(result.findex, 0);
EXPECT_EQ(result.fvalue, 1.0);
EXPECT_EQ(result.dir, kRightDir);
EXPECT_EQ(result.left_sum, GradientPair(-0.5, 0.5));
EXPECT_EQ(result.right_sum, GradientPair(1.5, 1.0));
}
TEST(GpuHist, EvaluateSingleSplitEmpty) {
DeviceSplitCandidate nonzeroed;
nonzeroed.findex = 1;
nonzeroed.loss_chg = 1.0;
thrust::device_vector<DeviceSplitCandidate> out_split(1);
out_split[0] = nonzeroed;
EvaluateSingleSplit(dh::ToSpan(out_split),
EvaluateSplitInputs<GradientPair>{});
DeviceSplitCandidate result = out_split[0];
EXPECT_EQ(result.findex, -1);
EXPECT_LT(result.loss_chg, 0.0f);
}
// Feature 0 has a better split, but the algorithm must select feature 1
TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
thrust::device_vector<DeviceSplitCandidate> out_splits(1);
GradientPair parent_sum(0.0, 1.0);
GPUTrainingParam param{};
thrust::device_vector<bst_feature_t> feature_set =
std::vector<bst_feature_t>{1};
thrust::device_vector<uint32_t> feature_segments =
std::vector<bst_row_t>{0, 2, 4};
thrust::device_vector<float> feature_values =
std::vector<float>{1.0, 2.0, 11.0, 12.0};
thrust::device_vector<float> feature_min_values =
std::vector<float>{0.0, 10.0};
thrust::device_vector<GradientPair> feature_histogram =
std::vector<GradientPair>{
{-10.0, 0.5}, {10.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}};
thrust::device_vector<int> monotonic_constraints(2, 0);
EvaluateSplitInputs<GradientPair> input{1,
parent_sum,
param,
dh::ToSpan(feature_set),
dh::ToSpan(feature_segments),
dh::ToSpan(feature_values),
dh::ToSpan(feature_min_values),
dh::ToSpan(feature_histogram),
ValueConstraint(),
dh::ToSpan(monotonic_constraints)};
EvaluateSingleSplit(dh::ToSpan(out_splits), input);
DeviceSplitCandidate result = out_splits[0];
EXPECT_EQ(result.findex, 1);
EXPECT_EQ(result.fvalue, 11.0);
EXPECT_EQ(result.left_sum, GradientPair(-0.5, 0.5));
EXPECT_EQ(result.right_sum, GradientPair(0.5, 0.5));
}
// Features 0 and 1 have identical gain, the algorithm must select 0
TEST(GpuHist, EvaluateSingleSplitBreakTies) {
thrust::device_vector<DeviceSplitCandidate> out_splits(1);
GradientPair parent_sum(0.0, 1.0);
GPUTrainingParam param{};
thrust::device_vector<bst_feature_t> feature_set =
std::vector<bst_feature_t>{0, 1};
thrust::device_vector<uint32_t> feature_segments =
std::vector<bst_row_t>{0, 2, 4};
thrust::device_vector<float> feature_values =
std::vector<float>{1.0, 2.0, 11.0, 12.0};
thrust::device_vector<float> feature_min_values =
std::vector<float>{0.0, 10.0};
thrust::device_vector<GradientPair> feature_histogram =
std::vector<GradientPair>{
{-0.5, 0.5}, {0.5, 0.5}, {-0.5, 0.5}, {0.5, 0.5}};
thrust::device_vector<int> monotonic_constraints(2, 0);
EvaluateSplitInputs<GradientPair> input{1,
parent_sum,
param,
dh::ToSpan(feature_set),
dh::ToSpan(feature_segments),
dh::ToSpan(feature_values),
dh::ToSpan(feature_min_values),
dh::ToSpan(feature_histogram),
ValueConstraint(),
dh::ToSpan(monotonic_constraints)};
EvaluateSingleSplit(dh::ToSpan(out_splits), input);
DeviceSplitCandidate result = out_splits[0];
EXPECT_EQ(result.findex, 0);
EXPECT_EQ(result.fvalue, 1.0);
}
TEST(GpuHist, EvaluateSplits) {
thrust::device_vector<DeviceSplitCandidate> out_splits(2);
GradientPair parent_sum(0.0, 1.0);
GPUTrainingParam param{};
thrust::device_vector<bst_feature_t> feature_set =
std::vector<bst_feature_t>{0, 1};
thrust::device_vector<uint32_t> feature_segments =
std::vector<bst_row_t>{0, 2, 4};
thrust::device_vector<float> feature_values =
std::vector<float>{1.0, 2.0, 11.0, 12.0};
thrust::device_vector<float> feature_min_values =
std::vector<float>{0.0, 0.0};
thrust::device_vector<GradientPair> feature_histogram_left =
std::vector<GradientPair>{
{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}};
thrust::device_vector<GradientPair> feature_histogram_right =
std::vector<GradientPair>{
{-1.0, 0.5}, {1.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}};
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
EvaluateSplitInputs<GradientPair> input_left{
1,
parent_sum,
param,
dh::ToSpan(feature_set),
dh::ToSpan(feature_segments),
dh::ToSpan(feature_values),
dh::ToSpan(feature_min_values),
dh::ToSpan(feature_histogram_left),
ValueConstraint(),
dh::ToSpan(monotonic_constraints)};
EvaluateSplitInputs<GradientPair> input_right{
2,
parent_sum,
param,
dh::ToSpan(feature_set),
dh::ToSpan(feature_segments),
dh::ToSpan(feature_values),
dh::ToSpan(feature_min_values),
dh::ToSpan(feature_histogram_right),
ValueConstraint(),
dh::ToSpan(monotonic_constraints)};
EvaluateSplits(dh::ToSpan(out_splits), input_left, input_right);
DeviceSplitCandidate result_left = out_splits[0];
EXPECT_EQ(result_left.findex, 1);
EXPECT_EQ(result_left.fvalue, 11.0);
DeviceSplitCandidate result_right = out_splits[1];
EXPECT_EQ(result_right.findex, 0);
EXPECT_EQ(result_right.fvalue, 1.0);
}
} // namespace tree
} // namespace xgboost

View File

@@ -41,9 +41,9 @@ void VerifySampling(size_t page_size,
EXPECT_EQ(sample.page->n_rows, kRows);
EXPECT_EQ(sample.gpair.size(), kRows);
} else {
EXPECT_NEAR(sample.sample_rows, sample_rows, kRows * 0.016);
EXPECT_NEAR(sample.page->n_rows, sample_rows, kRows * 0.016f);
EXPECT_NEAR(sample.gpair.size(), sample_rows, kRows * 0.016f);
EXPECT_NEAR(sample.sample_rows, sample_rows, kRows * 0.03);
EXPECT_NEAR(sample.page->n_rows, sample_rows, kRows * 0.03f);
EXPECT_NEAR(sample.gpair.size(), sample_rows, kRows * 0.03f);
}
GradientPair sum_sampled_gpair{};

View File

@@ -82,8 +82,6 @@ void TestBuildHist(bool use_shared_memory_histograms) {
BatchParam batch_param{};
GPUHistMakerDevice<GradientSumT> maker(0, page.get(), kNRows, param, kNCols, kNCols,
true, batch_param);
maker.InitHistogram();
xgboost::SimpleLCG gen;
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
HostDeviceVector<GradientPair> gpair(kNRows);
@@ -150,7 +148,7 @@ HistogramCutsWrapper GetHostCutMatrix () {
}
// TODO(trivialfis): This test is over simplified.
TEST(GpuHist, EvaluateSplits) {
TEST(GpuHist, EvaluateRootSplit) {
constexpr int kNRows = 16;
constexpr int kNCols = 8;
@@ -158,16 +156,16 @@ TEST(GpuHist, EvaluateSplits) {
std::vector<std::pair<std::string, std::string>> args {
{"max_depth", "1"},
{"max_leaves", "0"},
{"max_leaves", "0"},
// Disable all other parameters.
{"colsample_bynode", "1"},
{"colsample_bylevel", "1"},
{"colsample_bytree", "1"},
{"min_child_weight", "0.01"},
{"reg_alpha", "0"},
{"reg_lambda", "0"},
{"max_delta_step", "0"}
// Disable all other parameters.
{"colsample_bynode", "1"},
{"colsample_bylevel", "1"},
{"colsample_bytree", "1"},
{"min_child_weight", "0.01"},
{"reg_alpha", "0"},
{"reg_lambda", "0"},
{"max_delta_step", "0"}
};
param.Init(args);
for (size_t i = 0; i < kNCols; ++i) {
@@ -180,9 +178,9 @@ TEST(GpuHist, EvaluateSplits) {
auto page = BuildEllpackPage(kNRows, kNCols);
BatchParam batch_param{};
GPUHistMakerDevice<GradientPairPrecise>
maker(0, page.get(), kNRows, param, kNCols, kNCols, true, batch_param);
maker(0, page.get(), kNRows, param, kNCols, kNCols, true, batch_param);
// Initialize GPUHistMakerDevice::node_sum_gradients
maker.host_node_sum_gradients = {{6.4f, 12.8f}};
maker.node_sum_gradients = {};
// Initialize GPUHistMakerDevice::cut
auto cmat = GetHostCutMatrix();
@@ -205,13 +203,13 @@ TEST(GpuHist, EvaluateSplits) {
ASSERT_EQ(maker.hist.Data().size(), hist.size());
thrust::copy(hist.begin(), hist.end(),
maker.hist.Data().begin());
maker.hist.Data().begin());
maker.column_sampler.Init(kNCols,
param.colsample_bynode,
param.colsample_bylevel,
param.colsample_bytree,
false);
param.colsample_bynode,
param.colsample_bylevel,
param.colsample_bytree,
false);
RegTree tree;
MetaInfo info;
@@ -222,12 +220,10 @@ TEST(GpuHist, EvaluateSplits) {
maker.node_value_constraints[0].lower_bound = -1.0;
maker.node_value_constraints[0].upper_bound = 1.0;
std::vector<DeviceSplitCandidate> res = maker.EvaluateSplits({0, 0 }, tree, kNCols);
DeviceSplitCandidate res = maker.EvaluateRootSplit({6.4f, 12.8f});
ASSERT_EQ(res[0].findex, 7);
ASSERT_EQ(res[1].findex, 7);
ASSERT_NEAR(res[0].fvalue, 0.26, xgboost::kRtEps);
ASSERT_NEAR(res[1].fvalue, 0.26, xgboost::kRtEps);
ASSERT_EQ(res.findex, 7);
ASSERT_NEAR(res.fvalue, 0.26, xgboost::kRtEps);
}
void TestHistogramIndexImpl() {