Cache GPU histogram kernel configuration. (#10538)
This commit is contained in:
@@ -1,11 +1,10 @@
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost Contributors
|
||||
* Copyright 2020-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../../../../src/common/categorical.h"
|
||||
#include "../../../../src/tree/gpu_hist/histogram.cuh"
|
||||
#include "../../../../src/tree/gpu_hist/row_partitioner.cuh"
|
||||
#include "../../../../src/tree/param.h" // TrainParam
|
||||
@@ -13,7 +12,7 @@
|
||||
#include "../../helpers.h"
|
||||
|
||||
namespace xgboost::tree {
|
||||
void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
void TestDeterministicHistogram(bool is_dense, int shm_size, bool force_global) {
|
||||
Context ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kBins = 256, kCols = 120, kRows = 16384, kRounds = 16;
|
||||
float constexpr kLower = -1e-2, kUpper = 1e2;
|
||||
@@ -25,35 +24,37 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
for (auto const& batch : matrix->GetBatches<EllpackPage>(&ctx, batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
|
||||
tree::RowPartitioner row_partitioner(FstCU(), kRows);
|
||||
tree::RowPartitioner row_partitioner(ctx.Device(), kRows);
|
||||
auto ridx = row_partitioner.GetRows(0);
|
||||
|
||||
int num_bins = kBins * kCols;
|
||||
bst_bin_t num_bins = kBins * kCols;
|
||||
dh::device_vector<GradientPairInt64> histogram(num_bins);
|
||||
auto d_histogram = dh::ToSpan(histogram);
|
||||
auto gpair = GenerateRandomGradients(kRows, kLower, kUpper);
|
||||
gpair.SetDevice(FstCU());
|
||||
gpair.SetDevice(ctx.Device());
|
||||
|
||||
FeatureGroups feature_groups(page->Cuts(), page->is_dense, shm_size,
|
||||
sizeof(GradientPairInt64));
|
||||
FeatureGroups feature_groups(page->Cuts(), page->is_dense, shm_size, sizeof(GradientPairInt64));
|
||||
|
||||
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
|
||||
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
|
||||
DeviceHistogramBuilder builder;
|
||||
builder.Reset(&ctx, feature_groups.DeviceAccessor(ctx.Device()), force_global);
|
||||
builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()),
|
||||
feature_groups.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
|
||||
d_histogram, quantiser);
|
||||
|
||||
std::vector<GradientPairInt64> histogram_h(num_bins);
|
||||
dh::safe_cuda(cudaMemcpy(histogram_h.data(), d_histogram.data(),
|
||||
num_bins * sizeof(GradientPairInt64),
|
||||
cudaMemcpyDeviceToHost));
|
||||
num_bins * sizeof(GradientPairInt64), cudaMemcpyDeviceToHost));
|
||||
|
||||
for (size_t i = 0; i < kRounds; ++i) {
|
||||
for (std::size_t i = 0; i < kRounds; ++i) {
|
||||
dh::device_vector<GradientPairInt64> new_histogram(num_bins);
|
||||
auto d_new_histogram = dh::ToSpan(new_histogram);
|
||||
|
||||
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
|
||||
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
|
||||
DeviceHistogramBuilder builder;
|
||||
builder.Reset(&ctx, feature_groups.DeviceAccessor(ctx.Device()), force_global);
|
||||
builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()),
|
||||
feature_groups.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
|
||||
d_new_histogram, quantiser);
|
||||
|
||||
std::vector<GradientPairInt64> new_histogram_h(num_bins);
|
||||
@@ -68,14 +69,16 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
|
||||
{
|
||||
auto gpair = GenerateRandomGradients(kRows, kLower, kUpper);
|
||||
gpair.SetDevice(FstCU());
|
||||
gpair.SetDevice(ctx.Device());
|
||||
|
||||
// Use a single feature group to compute the baseline.
|
||||
FeatureGroups single_group(page->Cuts());
|
||||
|
||||
dh::device_vector<GradientPairInt64> baseline(num_bins);
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
|
||||
single_group.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
|
||||
DeviceHistogramBuilder builder;
|
||||
builder.Reset(&ctx, single_group.DeviceAccessor(ctx.Device()), force_global);
|
||||
builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()),
|
||||
single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
|
||||
dh::ToSpan(baseline), quantiser);
|
||||
|
||||
std::vector<GradientPairInt64> baseline_h(num_bins);
|
||||
@@ -96,7 +99,9 @@ TEST(Histogram, GPUDeterministic) {
|
||||
std::vector<int> shm_sizes{48 * 1024, 64 * 1024, 160 * 1024};
|
||||
for (bool is_dense : is_dense_array) {
|
||||
for (int shm_size : shm_sizes) {
|
||||
TestDeterministicHistogram(is_dense, shm_size);
|
||||
for (bool force_global : {true, false}) {
|
||||
TestDeterministicHistogram(is_dense, shm_size, force_global);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -136,7 +141,9 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
for (auto const &batch : cat_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
FeatureGroups single_group(page->Cuts());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()),
|
||||
DeviceHistogramBuilder builder;
|
||||
builder.Reset(&ctx, single_group.DeviceAccessor(ctx.Device()), false);
|
||||
builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()),
|
||||
single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
|
||||
dh::ToSpan(cat_hist), quantiser);
|
||||
}
|
||||
@@ -150,7 +157,9 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
for (auto const &batch : encode_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
FeatureGroups single_group(page->Cuts());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()),
|
||||
DeviceHistogramBuilder builder;
|
||||
builder.Reset(&ctx, single_group.DeviceAccessor(ctx.Device()), false);
|
||||
builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()),
|
||||
single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
|
||||
dh::ToSpan(encode_hist), quantiser);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user