Refactor gpu_hist split evaluation (#5610)
* Refactor * Rewrite evaluate splits * Add more tests
This commit is contained in:
222
tests/cpp/tree/gpu_hist/test_evaluate_splits.cu
Normal file
222
tests/cpp/tree/gpu_hist/test_evaluate_splits.cu
Normal file
@@ -0,0 +1,222 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include "../../../../src/tree/gpu_hist/evaluate_splits.cuh"
|
||||
#include "../../helpers.h"
|
||||
#include "../../histogram_helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
TEST(GpuHist, EvaluateSingleSplit) {
|
||||
thrust::device_vector<DeviceSplitCandidate> out_splits(1);
|
||||
GradientPair parent_sum(0.0, 1.0);
|
||||
GPUTrainingParam param{};
|
||||
|
||||
thrust::device_vector<bst_feature_t> feature_set =
|
||||
std::vector<bst_feature_t>{0, 1};
|
||||
thrust::device_vector<uint32_t> feature_segments =
|
||||
std::vector<bst_row_t>{0, 2, 4};
|
||||
thrust::device_vector<float> feature_values =
|
||||
std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
||||
thrust::device_vector<float> feature_min_values =
|
||||
std::vector<float>{0.0, 0.0};
|
||||
// Setup gradients so that second feature gets higher gain
|
||||
thrust::device_vector<GradientPair> feature_histogram =
|
||||
std::vector<GradientPair>{
|
||||
{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}};
|
||||
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
|
||||
EvaluateSplitInputs<GradientPair> input{1,
|
||||
parent_sum,
|
||||
param,
|
||||
dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_segments),
|
||||
dh::ToSpan(feature_values),
|
||||
dh::ToSpan(feature_min_values),
|
||||
dh::ToSpan(feature_histogram),
|
||||
ValueConstraint(),
|
||||
dh::ToSpan(monotonic_constraints)};
|
||||
EvaluateSingleSplit(dh::ToSpan(out_splits), input);
|
||||
|
||||
DeviceSplitCandidate result = out_splits[0];
|
||||
EXPECT_EQ(result.findex, 1);
|
||||
EXPECT_EQ(result.fvalue, 11.0);
|
||||
EXPECT_FLOAT_EQ(result.left_sum.GetGrad() + result.right_sum.GetGrad(),
|
||||
parent_sum.GetGrad());
|
||||
EXPECT_FLOAT_EQ(result.left_sum.GetHess() + result.right_sum.GetHess(),
|
||||
parent_sum.GetHess());
|
||||
}
|
||||
|
||||
TEST(GpuHist, EvaluateSingleSplitMissing) {
|
||||
thrust::device_vector<DeviceSplitCandidate> out_splits(1);
|
||||
GradientPair parent_sum(1.0, 1.5);
|
||||
GPUTrainingParam param{};
|
||||
|
||||
thrust::device_vector<bst_feature_t> feature_set =
|
||||
std::vector<bst_feature_t>{0};
|
||||
thrust::device_vector<uint32_t> feature_segments =
|
||||
std::vector<bst_row_t>{0, 2};
|
||||
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0};
|
||||
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0};
|
||||
thrust::device_vector<GradientPair> feature_histogram =
|
||||
std::vector<GradientPair>{{-0.5, 0.5}, {0.5, 0.5}};
|
||||
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
|
||||
EvaluateSplitInputs<GradientPair> input{1,
|
||||
parent_sum,
|
||||
param,
|
||||
dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_segments),
|
||||
dh::ToSpan(feature_values),
|
||||
dh::ToSpan(feature_min_values),
|
||||
dh::ToSpan(feature_histogram),
|
||||
ValueConstraint(),
|
||||
dh::ToSpan(monotonic_constraints)};
|
||||
EvaluateSingleSplit(dh::ToSpan(out_splits), input);
|
||||
|
||||
DeviceSplitCandidate result = out_splits[0];
|
||||
EXPECT_EQ(result.findex, 0);
|
||||
EXPECT_EQ(result.fvalue, 1.0);
|
||||
EXPECT_EQ(result.dir, kRightDir);
|
||||
EXPECT_EQ(result.left_sum, GradientPair(-0.5, 0.5));
|
||||
EXPECT_EQ(result.right_sum, GradientPair(1.5, 1.0));
|
||||
}
|
||||
|
||||
TEST(GpuHist, EvaluateSingleSplitEmpty) {
|
||||
DeviceSplitCandidate nonzeroed;
|
||||
nonzeroed.findex = 1;
|
||||
nonzeroed.loss_chg = 1.0;
|
||||
|
||||
thrust::device_vector<DeviceSplitCandidate> out_split(1);
|
||||
out_split[0] = nonzeroed;
|
||||
EvaluateSingleSplit(dh::ToSpan(out_split),
|
||||
EvaluateSplitInputs<GradientPair>{});
|
||||
DeviceSplitCandidate result = out_split[0];
|
||||
EXPECT_EQ(result.findex, -1);
|
||||
EXPECT_LT(result.loss_chg, 0.0f);
|
||||
}
|
||||
|
||||
// Feature 0 has a better split, but the algorithm must select feature 1
|
||||
TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
|
||||
thrust::device_vector<DeviceSplitCandidate> out_splits(1);
|
||||
GradientPair parent_sum(0.0, 1.0);
|
||||
GPUTrainingParam param{};
|
||||
|
||||
thrust::device_vector<bst_feature_t> feature_set =
|
||||
std::vector<bst_feature_t>{1};
|
||||
thrust::device_vector<uint32_t> feature_segments =
|
||||
std::vector<bst_row_t>{0, 2, 4};
|
||||
thrust::device_vector<float> feature_values =
|
||||
std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
||||
thrust::device_vector<float> feature_min_values =
|
||||
std::vector<float>{0.0, 10.0};
|
||||
thrust::device_vector<GradientPair> feature_histogram =
|
||||
std::vector<GradientPair>{
|
||||
{-10.0, 0.5}, {10.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}};
|
||||
thrust::device_vector<int> monotonic_constraints(2, 0);
|
||||
EvaluateSplitInputs<GradientPair> input{1,
|
||||
parent_sum,
|
||||
param,
|
||||
dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_segments),
|
||||
dh::ToSpan(feature_values),
|
||||
dh::ToSpan(feature_min_values),
|
||||
dh::ToSpan(feature_histogram),
|
||||
ValueConstraint(),
|
||||
dh::ToSpan(monotonic_constraints)};
|
||||
EvaluateSingleSplit(dh::ToSpan(out_splits), input);
|
||||
|
||||
DeviceSplitCandidate result = out_splits[0];
|
||||
EXPECT_EQ(result.findex, 1);
|
||||
EXPECT_EQ(result.fvalue, 11.0);
|
||||
EXPECT_EQ(result.left_sum, GradientPair(-0.5, 0.5));
|
||||
EXPECT_EQ(result.right_sum, GradientPair(0.5, 0.5));
|
||||
}
|
||||
|
||||
// Features 0 and 1 have identical gain, the algorithm must select 0
|
||||
TEST(GpuHist, EvaluateSingleSplitBreakTies) {
|
||||
thrust::device_vector<DeviceSplitCandidate> out_splits(1);
|
||||
GradientPair parent_sum(0.0, 1.0);
|
||||
GPUTrainingParam param{};
|
||||
|
||||
thrust::device_vector<bst_feature_t> feature_set =
|
||||
std::vector<bst_feature_t>{0, 1};
|
||||
thrust::device_vector<uint32_t> feature_segments =
|
||||
std::vector<bst_row_t>{0, 2, 4};
|
||||
thrust::device_vector<float> feature_values =
|
||||
std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
||||
thrust::device_vector<float> feature_min_values =
|
||||
std::vector<float>{0.0, 10.0};
|
||||
thrust::device_vector<GradientPair> feature_histogram =
|
||||
std::vector<GradientPair>{
|
||||
{-0.5, 0.5}, {0.5, 0.5}, {-0.5, 0.5}, {0.5, 0.5}};
|
||||
thrust::device_vector<int> monotonic_constraints(2, 0);
|
||||
EvaluateSplitInputs<GradientPair> input{1,
|
||||
parent_sum,
|
||||
param,
|
||||
dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_segments),
|
||||
dh::ToSpan(feature_values),
|
||||
dh::ToSpan(feature_min_values),
|
||||
dh::ToSpan(feature_histogram),
|
||||
ValueConstraint(),
|
||||
dh::ToSpan(monotonic_constraints)};
|
||||
EvaluateSingleSplit(dh::ToSpan(out_splits), input);
|
||||
|
||||
DeviceSplitCandidate result = out_splits[0];
|
||||
EXPECT_EQ(result.findex, 0);
|
||||
EXPECT_EQ(result.fvalue, 1.0);
|
||||
}
|
||||
|
||||
TEST(GpuHist, EvaluateSplits) {
|
||||
thrust::device_vector<DeviceSplitCandidate> out_splits(2);
|
||||
GradientPair parent_sum(0.0, 1.0);
|
||||
GPUTrainingParam param{};
|
||||
|
||||
thrust::device_vector<bst_feature_t> feature_set =
|
||||
std::vector<bst_feature_t>{0, 1};
|
||||
thrust::device_vector<uint32_t> feature_segments =
|
||||
std::vector<bst_row_t>{0, 2, 4};
|
||||
thrust::device_vector<float> feature_values =
|
||||
std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
||||
thrust::device_vector<float> feature_min_values =
|
||||
std::vector<float>{0.0, 0.0};
|
||||
thrust::device_vector<GradientPair> feature_histogram_left =
|
||||
std::vector<GradientPair>{
|
||||
{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}};
|
||||
thrust::device_vector<GradientPair> feature_histogram_right =
|
||||
std::vector<GradientPair>{
|
||||
{-1.0, 0.5}, {1.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}};
|
||||
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
|
||||
EvaluateSplitInputs<GradientPair> input_left{
|
||||
1,
|
||||
parent_sum,
|
||||
param,
|
||||
dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_segments),
|
||||
dh::ToSpan(feature_values),
|
||||
dh::ToSpan(feature_min_values),
|
||||
dh::ToSpan(feature_histogram_left),
|
||||
ValueConstraint(),
|
||||
dh::ToSpan(monotonic_constraints)};
|
||||
EvaluateSplitInputs<GradientPair> input_right{
|
||||
2,
|
||||
parent_sum,
|
||||
param,
|
||||
dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_segments),
|
||||
dh::ToSpan(feature_values),
|
||||
dh::ToSpan(feature_min_values),
|
||||
dh::ToSpan(feature_histogram_right),
|
||||
ValueConstraint(),
|
||||
dh::ToSpan(monotonic_constraints)};
|
||||
EvaluateSplits(dh::ToSpan(out_splits), input_left, input_right);
|
||||
|
||||
DeviceSplitCandidate result_left = out_splits[0];
|
||||
EXPECT_EQ(result_left.findex, 1);
|
||||
EXPECT_EQ(result_left.fvalue, 11.0);
|
||||
|
||||
DeviceSplitCandidate result_right = out_splits[1];
|
||||
EXPECT_EQ(result_right.findex, 0);
|
||||
EXPECT_EQ(result_right.fvalue, 1.0);
|
||||
}
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
@@ -41,9 +41,9 @@ void VerifySampling(size_t page_size,
|
||||
EXPECT_EQ(sample.page->n_rows, kRows);
|
||||
EXPECT_EQ(sample.gpair.size(), kRows);
|
||||
} else {
|
||||
EXPECT_NEAR(sample.sample_rows, sample_rows, kRows * 0.016);
|
||||
EXPECT_NEAR(sample.page->n_rows, sample_rows, kRows * 0.016f);
|
||||
EXPECT_NEAR(sample.gpair.size(), sample_rows, kRows * 0.016f);
|
||||
EXPECT_NEAR(sample.sample_rows, sample_rows, kRows * 0.03);
|
||||
EXPECT_NEAR(sample.page->n_rows, sample_rows, kRows * 0.03f);
|
||||
EXPECT_NEAR(sample.gpair.size(), sample_rows, kRows * 0.03f);
|
||||
}
|
||||
|
||||
GradientPair sum_sampled_gpair{};
|
||||
|
||||
@@ -82,8 +82,6 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
BatchParam batch_param{};
|
||||
GPUHistMakerDevice<GradientSumT> maker(0, page.get(), kNRows, param, kNCols, kNCols,
|
||||
true, batch_param);
|
||||
maker.InitHistogram();
|
||||
|
||||
xgboost::SimpleLCG gen;
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
|
||||
HostDeviceVector<GradientPair> gpair(kNRows);
|
||||
@@ -150,7 +148,7 @@ HistogramCutsWrapper GetHostCutMatrix () {
|
||||
}
|
||||
|
||||
// TODO(trivialfis): This test is over simplified.
|
||||
TEST(GpuHist, EvaluateSplits) {
|
||||
TEST(GpuHist, EvaluateRootSplit) {
|
||||
constexpr int kNRows = 16;
|
||||
constexpr int kNCols = 8;
|
||||
|
||||
@@ -158,16 +156,16 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> args {
|
||||
{"max_depth", "1"},
|
||||
{"max_leaves", "0"},
|
||||
{"max_leaves", "0"},
|
||||
|
||||
// Disable all other parameters.
|
||||
{"colsample_bynode", "1"},
|
||||
{"colsample_bylevel", "1"},
|
||||
{"colsample_bytree", "1"},
|
||||
{"min_child_weight", "0.01"},
|
||||
{"reg_alpha", "0"},
|
||||
{"reg_lambda", "0"},
|
||||
{"max_delta_step", "0"}
|
||||
// Disable all other parameters.
|
||||
{"colsample_bynode", "1"},
|
||||
{"colsample_bylevel", "1"},
|
||||
{"colsample_bytree", "1"},
|
||||
{"min_child_weight", "0.01"},
|
||||
{"reg_alpha", "0"},
|
||||
{"reg_lambda", "0"},
|
||||
{"max_delta_step", "0"}
|
||||
};
|
||||
param.Init(args);
|
||||
for (size_t i = 0; i < kNCols; ++i) {
|
||||
@@ -180,9 +178,9 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
auto page = BuildEllpackPage(kNRows, kNCols);
|
||||
BatchParam batch_param{};
|
||||
GPUHistMakerDevice<GradientPairPrecise>
|
||||
maker(0, page.get(), kNRows, param, kNCols, kNCols, true, batch_param);
|
||||
maker(0, page.get(), kNRows, param, kNCols, kNCols, true, batch_param);
|
||||
// Initialize GPUHistMakerDevice::node_sum_gradients
|
||||
maker.host_node_sum_gradients = {{6.4f, 12.8f}};
|
||||
maker.node_sum_gradients = {};
|
||||
|
||||
// Initialize GPUHistMakerDevice::cut
|
||||
auto cmat = GetHostCutMatrix();
|
||||
@@ -205,13 +203,13 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
|
||||
ASSERT_EQ(maker.hist.Data().size(), hist.size());
|
||||
thrust::copy(hist.begin(), hist.end(),
|
||||
maker.hist.Data().begin());
|
||||
maker.hist.Data().begin());
|
||||
|
||||
maker.column_sampler.Init(kNCols,
|
||||
param.colsample_bynode,
|
||||
param.colsample_bylevel,
|
||||
param.colsample_bytree,
|
||||
false);
|
||||
param.colsample_bynode,
|
||||
param.colsample_bylevel,
|
||||
param.colsample_bytree,
|
||||
false);
|
||||
|
||||
RegTree tree;
|
||||
MetaInfo info;
|
||||
@@ -222,12 +220,10 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
maker.node_value_constraints[0].lower_bound = -1.0;
|
||||
maker.node_value_constraints[0].upper_bound = 1.0;
|
||||
|
||||
std::vector<DeviceSplitCandidate> res = maker.EvaluateSplits({0, 0 }, tree, kNCols);
|
||||
DeviceSplitCandidate res = maker.EvaluateRootSplit({6.4f, 12.8f});
|
||||
|
||||
ASSERT_EQ(res[0].findex, 7);
|
||||
ASSERT_EQ(res[1].findex, 7);
|
||||
ASSERT_NEAR(res[0].fvalue, 0.26, xgboost::kRtEps);
|
||||
ASSERT_NEAR(res[1].fvalue, 0.26, xgboost::kRtEps);
|
||||
ASSERT_EQ(res.findex, 7);
|
||||
ASSERT_NEAR(res.fvalue, 0.26, xgboost::kRtEps);
|
||||
}
|
||||
|
||||
void TestHistogramIndexImpl() {
|
||||
|
||||
@@ -4,6 +4,7 @@ import pytest
|
||||
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
from test_predict import run_threaded_predict # noqa
|
||||
@@ -34,12 +35,13 @@ class TestGPUPredict(unittest.TestCase):
|
||||
param = {
|
||||
"objective": "binary:logistic",
|
||||
"predictor": "gpu_predictor",
|
||||
'eval_metric': 'auc',
|
||||
'tree_method': 'gpu_hist'
|
||||
'eval_metric': 'logloss',
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': 1
|
||||
}
|
||||
bst = xgb.train(param, dtrain, iterations, evals=watchlist,
|
||||
evals_result=res)
|
||||
assert self.non_decreasing(res["train"]["auc"])
|
||||
assert self.non_increasing(res["train"]["logloss"])
|
||||
gpu_pred_train = bst.predict(dtrain, output_margin=True)
|
||||
gpu_pred_test = bst.predict(dtest, output_margin=True)
|
||||
gpu_pred_val = bst.predict(dval, output_margin=True)
|
||||
@@ -57,8 +59,8 @@ class TestGPUPredict(unittest.TestCase):
|
||||
np.testing.assert_allclose(cpu_pred_test, gpu_pred_test,
|
||||
rtol=1e-6)
|
||||
|
||||
def non_decreasing(self, L):
|
||||
return all((x - y) < 0.001 for x, y in zip(L, L[1:]))
|
||||
def non_increasing(self, L):
|
||||
return all((y - x) < 0.001 for x, y in zip(L, L[1:]))
|
||||
|
||||
# Test case for a bug where multiple batch predictions made on a
|
||||
# test set produce incorrect results
|
||||
|
||||
Reference in New Issue
Block a user