Use quantised gradients in gpu_hist histograms (#8246)

This commit is contained in:
Rory Mitchell
2022-09-26 17:35:35 +02:00
committed by GitHub
parent 4056974e37
commit 8f77677193
14 changed files with 394 additions and 336 deletions

View File

@@ -29,7 +29,7 @@ TEST(GpuHist, DeviceHistogram) {
constexpr size_t kNBins = 128;
constexpr int kNNodes = 4;
constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;
DeviceHistogramStorage<GradientPairPrecise, kStopGrowing> histogram;
DeviceHistogramStorage<kStopGrowing> histogram;
histogram.Init(0, kNBins);
for (int i = 0; i < kNNodes; ++i) {
histogram.AllocateHistograms({i});
@@ -107,32 +107,27 @@ void TestBuildHist(bool use_shared_memory_histograms) {
maker.row_partitioner.reset(new RowPartitioner(0, kNRows));
maker.hist.AllocateHistograms({0});
maker.gpair = gpair.DeviceSpan();
maker.histogram_rounding = CreateRoundingFactor<GradientSumT>(maker.gpair);
maker.histogram_rounding.reset(new GradientQuantizer(maker.gpair));
BuildGradientHistogram(
page->GetDeviceAccessor(0), maker.feature_groups->DeviceAccessor(0),
gpair.DeviceSpan(), maker.row_partitioner->GetRows(0),
maker.hist.GetNodeHistogram(0), maker.histogram_rounding,
maker.hist.GetNodeHistogram(0), *maker.histogram_rounding,
!use_shared_memory_histograms);
DeviceHistogramStorage<GradientSumT>& d_hist = maker.hist;
DeviceHistogramStorage<>& d_hist = maker.hist;
auto node_histogram = d_hist.GetNodeHistogram(0);
// d_hist.data stored in float, not gradient pair
thrust::host_vector<GradientSumT> h_result (d_hist.Data().size() / 2);
size_t data_size =
sizeof(GradientSumT) /
(sizeof(GradientSumT) / sizeof(typename GradientSumT::ValueT));
data_size *= d_hist.Data().size();
dh::safe_cuda(cudaMemcpy(h_result.data(), node_histogram.data(), data_size,
thrust::host_vector<GradientPairInt64> h_result (node_histogram.size());
dh::safe_cuda(cudaMemcpy(h_result.data(), node_histogram.data(), node_histogram.size_bytes(),
cudaMemcpyDeviceToHost));
std::vector<GradientPairPrecise> solution = GetHostHistGpair();
std::cout << std::fixed;
for (size_t i = 0; i < h_result.size(); ++i) {
ASSERT_FALSE(std::isnan(h_result[i].GetGrad()));
EXPECT_NEAR(h_result[i].GetGrad(), solution[i].GetGrad(), 0.01f);
EXPECT_NEAR(h_result[i].GetHess(), solution[i].GetHess(), 0.01f);
auto result = maker.histogram_rounding->ToFloatingPoint(h_result[i]);
EXPECT_NEAR(result.GetGrad(), solution[i].GetGrad(), 0.01f);
EXPECT_NEAR(result.GetHess(), solution[i].GetHess(), 0.01f);
}
}
@@ -161,6 +156,12 @@ HistogramCutsWrapper GetHostCutMatrix () {
return cmat;
}
inline GradientQuantizer DummyRoundingFactor() {
thrust::device_vector<GradientPair> gpair(1);
gpair[0] = {1000.f, 1000.f}; // Tests should not exceed sum of 1000
return GradientQuantizer(dh::ToSpan(gpair));
}
// TODO(trivialfis): This test is over simplified.
TEST(GpuHist, EvaluateRootSplit) {
constexpr int kNRows = 16;
@@ -209,10 +210,12 @@ TEST(GpuHist, EvaluateRootSplit) {
// Each row of hist_gpair represents gpairs for one feature.
// Each entry represents a bin.
std::vector<GradientPairPrecise> hist_gpair = GetHostHistGpair();
std::vector<bst_float> hist;
maker.histogram_rounding.reset(new GradientQuantizer(DummyRoundingFactor()));
std::vector<int64_t> hist;
for (auto pair : hist_gpair) {
hist.push_back(pair.GetGrad());
hist.push_back(pair.GetHess());
auto grad = maker.histogram_rounding->ToFixedPoint({float(pair.GetGrad()),float(pair.GetHess())});
hist.push_back(grad.GetQuantisedGrad());
hist.push_back(grad.GetQuantisedHess());
}
ASSERT_EQ(maker.hist.Data().size(), hist.size());