Deterministic GPU histogram. (#5361)
* Use pre-rounding based method to obtain reproducible floating point summation. * GPU Hist for regression and classification are bit-by-bit reproducible. * Add doc. * Switch to thrust reduce for `node_sum_gradient`.
This commit is contained in:
@@ -76,6 +76,20 @@ void TestDeviceSketch(bool use_external_memory) {
|
||||
ASSERT_LT(fabs(hmat_cpu.Values()[i] - hmat_gpu.Values()[i]), eps * nrows);
|
||||
}
|
||||
|
||||
// Determinstic
|
||||
size_t constexpr kRounds { 100 };
|
||||
for (size_t r = 0; r < kRounds; ++r) {
|
||||
HistogramCuts new_sketch;
|
||||
DeviceSketch(device, max_bin, gpu_batch_nrows, dmat->get(), &new_sketch);
|
||||
ASSERT_EQ(hmat_gpu.Values().size(), new_sketch.Values().size());
|
||||
for (size_t i = 0; i < hmat_gpu.Values().size(); ++i) {
|
||||
ASSERT_EQ(hmat_gpu.Values()[i], new_sketch.Values()[i]);
|
||||
}
|
||||
for (size_t i = 0; i < hmat_gpu.MinValues().size(); ++i) {
|
||||
ASSERT_EQ(hmat_gpu.MinValues()[i], new_sketch.MinValues()[i]);
|
||||
}
|
||||
}
|
||||
|
||||
delete dmat;
|
||||
}
|
||||
|
||||
|
||||
@@ -224,9 +224,10 @@ inline GenericParameter CreateEmptyGenericParam(int gpu_id) {
|
||||
return tparam;
|
||||
}
|
||||
|
||||
inline HostDeviceVector<GradientPair> GenerateRandomGradients(const size_t n_rows) {
|
||||
inline HostDeviceVector<GradientPair> GenerateRandomGradients(const size_t n_rows,
|
||||
float lower= 0.0f, float upper = 1.0f) {
|
||||
xgboost::SimpleLCG gen;
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(lower, upper);
|
||||
std::vector<GradientPair> h_gpair(n_rows);
|
||||
for (auto &gpair : h_gpair) {
|
||||
bst_float grad = dist(&gen);
|
||||
@@ -288,6 +289,5 @@ inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(
|
||||
return page;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace xgboost
|
||||
#endif
|
||||
|
||||
@@ -605,6 +605,10 @@ TEST_F(MultiClassesSerializationTest, GPU_Hist) {
|
||||
{"seed", "0"},
|
||||
{"nthread", "1"},
|
||||
{"max_depth", std::to_string(kClasses)},
|
||||
// Somehow rebuilding the cache can generate slightly
|
||||
// different result (1e-7) with CPU predictor for some
|
||||
// entries.
|
||||
{"predictor", "gpu_predictor"},
|
||||
{"enable_experimental_json_serialization", "1"},
|
||||
{"tree_method", "gpu_hist"}},
|
||||
fmap_, *pp_dmat_);
|
||||
|
||||
69
tests/cpp/tree/gpu_hist/test_histogram.cu
Normal file
69
tests/cpp/tree/gpu_hist/test_histogram.cu
Normal file
@@ -0,0 +1,69 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include "../../helpers.h"
|
||||
#include "../../../../src/tree/gpu_hist/row_partitioner.cuh"
|
||||
#include "../../../../src/tree/gpu_hist/histogram.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
template <typename Gradient>
|
||||
void TestDeterminsticHistogram() {
|
||||
size_t constexpr kBins = 24, kCols = 8, kRows = 32768, kRounds = 16;
|
||||
float constexpr kLower = -1e-2, kUpper = 1e2;
|
||||
|
||||
auto pp_m = CreateDMatrix(kRows, kCols, 0.5);
|
||||
auto& matrix = **pp_m;
|
||||
BatchParam batch_param{0, static_cast<int32_t>(kBins), 0, 0};
|
||||
|
||||
for (auto const& batch : matrix.GetBatches<EllpackPage>(batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
|
||||
tree::RowPartitioner row_partitioner(0, kRows);
|
||||
auto ridx = row_partitioner.GetRows(0);
|
||||
|
||||
dh::device_vector<Gradient> histogram(kBins * kCols);
|
||||
auto d_histogram = dh::ToSpan(histogram);
|
||||
auto gpair = GenerateRandomGradients(kRows, kLower, kUpper);
|
||||
gpair.SetDevice(0);
|
||||
|
||||
auto rounding = CreateRoundingFactor<Gradient>(gpair.DeviceSpan());
|
||||
BuildGradientHistogram(page->matrix, gpair.DeviceSpan(), ridx,
|
||||
d_histogram, rounding, true);
|
||||
|
||||
for (size_t i = 0; i < kRounds; ++i) {
|
||||
dh::device_vector<Gradient> new_histogram(kBins * kCols);
|
||||
auto d_histogram = dh::ToSpan(new_histogram);
|
||||
|
||||
auto rounding = CreateRoundingFactor<Gradient>(gpair.DeviceSpan());
|
||||
BuildGradientHistogram(page->matrix, gpair.DeviceSpan(), ridx,
|
||||
d_histogram, rounding, true);
|
||||
|
||||
for (size_t j = 0; j < new_histogram.size(); ++j) {
|
||||
ASSERT_EQ(((Gradient)new_histogram[j]).GetGrad(),
|
||||
((Gradient)histogram[j]).GetGrad());
|
||||
ASSERT_EQ(((Gradient)new_histogram[j]).GetHess(),
|
||||
((Gradient)histogram[j]).GetHess());
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto gpair = GenerateRandomGradients(kRows, kLower, kUpper);
|
||||
gpair.SetDevice(0);
|
||||
dh::device_vector<Gradient> baseline(kBins * kCols);
|
||||
BuildGradientHistogram(page->matrix, gpair.DeviceSpan(), ridx,
|
||||
dh::ToSpan(baseline), rounding, true);
|
||||
for (size_t i = 0; i < baseline.size(); ++i) {
|
||||
EXPECT_NEAR(((Gradient)baseline[i]).GetGrad(), ((Gradient)histogram[i]).GetGrad(),
|
||||
((Gradient)baseline[i]).GetGrad() * 1e-3);
|
||||
}
|
||||
}
|
||||
}
|
||||
delete pp_m;
|
||||
}
|
||||
|
||||
TEST(Histogram, GPUDeterminstic) {
|
||||
TestDeterminsticHistogram<GradientPair>();
|
||||
TestDeterminsticHistogram<GradientPairPrecise>();
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
@@ -83,7 +83,8 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
param.Init(args);
|
||||
auto page = BuildEllpackPage(kNRows, kNCols);
|
||||
BatchParam batch_param{};
|
||||
GPUHistMakerDevice<GradientSumT> maker(0, page.get(), kNRows, param, kNCols, kNCols, batch_param);
|
||||
GPUHistMakerDevice<GradientSumT> maker(0, page.get(), kNRows, param, kNCols, kNCols,
|
||||
true, batch_param);
|
||||
maker.InitHistogram();
|
||||
|
||||
xgboost::SimpleLCG gen;
|
||||
@@ -187,7 +188,7 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
auto page = BuildEllpackPage(kNRows, kNCols);
|
||||
BatchParam batch_param{};
|
||||
GPUHistMakerDevice<GradientPairPrecise>
|
||||
maker(0, page.get(), kNRows, param, kNCols, kNCols, batch_param);
|
||||
maker(0, page.get(), kNRows, param, kNCols, kNCols, true, batch_param);
|
||||
// Initialize GPUHistMakerDevice::node_sum_gradients
|
||||
maker.node_sum_gradients = {{6.4f, 12.8f}};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user