Fix memory usage of device sketching (#5407)
This commit is contained in:
@@ -6,7 +6,6 @@
|
||||
|
||||
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/iterator/counting_iterator.h>
|
||||
|
||||
#include "xgboost/c_api.h"
|
||||
|
||||
@@ -20,6 +19,7 @@
|
||||
#include "../../../src/common/math.h"
|
||||
#include "../../../src/data/simple_dmatrix.h"
|
||||
#include "test_hist_util.h"
|
||||
#include "../../../include/xgboost/logging.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@@ -39,7 +39,7 @@ TEST(hist_util, DeviceSketch) {
|
||||
std::vector<float> x = {1.0, 2.0, 3.0, 4.0, 5.0};
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
|
||||
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins, 0);
|
||||
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
HistogramCuts host_cuts;
|
||||
DenseCuts builder(&host_cuts);
|
||||
builder.Build(dmat.get(), num_bins);
|
||||
@@ -49,6 +49,59 @@ TEST(hist_util, DeviceSketch) {
|
||||
EXPECT_EQ(device_cuts.MinValues(), host_cuts.MinValues());
|
||||
}
|
||||
|
||||
// Duplicate this function from hist_util.cu so we don't have to expose it in
|
||||
// header
|
||||
size_t RequiredSampleCutsTest(int max_bins, size_t num_rows) {
|
||||
constexpr int kFactor = 8;
|
||||
double eps = 1.0 / (kFactor * max_bins);
|
||||
size_t dummy_nlevel;
|
||||
size_t num_cuts;
|
||||
WQuantileSketch<bst_float, bst_float>::LimitSizeLevel(
|
||||
num_rows, eps, &dummy_nlevel, &num_cuts);
|
||||
return std::min(num_cuts, num_rows);
|
||||
}
|
||||
|
||||
TEST(hist_util, DeviceSketchMemory) {
|
||||
int num_columns = 100;
|
||||
int num_rows = 1000;
|
||||
int num_bins = 256;
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
|
||||
dh::GlobalMemoryLogger().Clear();
|
||||
ConsoleLogger::Configure({{"verbosity", "3"}});
|
||||
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
ConsoleLogger::Configure({{"verbosity", "0"}});
|
||||
|
||||
size_t bytes_num_elements = num_rows * num_columns*sizeof(Entry);
|
||||
size_t bytes_cuts = RequiredSampleCutsTest(num_bins, num_rows) * num_columns *
|
||||
sizeof(DenseCuts::WQSketch::Entry);
|
||||
size_t bytes_constant = 1000;
|
||||
EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(),
|
||||
bytes_num_elements + bytes_cuts + bytes_constant);
|
||||
}
|
||||
|
||||
TEST(hist_util, DeviceSketchMemoryWeights) {
|
||||
int num_columns = 100;
|
||||
int num_rows = 1000;
|
||||
int num_bins = 256;
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);
|
||||
|
||||
dh::GlobalMemoryLogger().Clear();
|
||||
ConsoleLogger::Configure({{"verbosity", "3"}});
|
||||
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
ConsoleLogger::Configure({{"verbosity", "0"}});
|
||||
|
||||
size_t bytes_num_elements =
|
||||
num_rows * num_columns * (sizeof(Entry) + sizeof(float));
|
||||
size_t bytes_cuts = RequiredSampleCutsTest(num_bins, num_rows) * num_columns *
|
||||
sizeof(DenseCuts::WQSketch::Entry);
|
||||
EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(),
|
||||
size_t((bytes_num_elements + bytes_cuts) * 1.05));
|
||||
}
|
||||
|
||||
TEST(hist_util, DeviceSketchDeterminism) {
|
||||
int num_rows = 500;
|
||||
int num_columns = 5;
|
||||
@@ -72,7 +125,7 @@ TEST(hist_util, DeviceSketchDeterminism) {
|
||||
for (auto num_categories : categorical_sizes) {
|
||||
auto x = GenerateRandomCategoricalSingleColumn(n, num_categories);
|
||||
auto dmat = GetDMatrixFromData(x, n, 1);
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins, 0);
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -86,7 +139,7 @@ TEST(hist_util, DeviceSketchMultipleColumns) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins, 0);
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -102,7 +155,7 @@ TEST(hist_util, DeviceSketchMultipleColumnsWeights) {
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins, 0);
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -131,7 +184,7 @@ TEST(hist_util, DeviceSketchMultipleColumnsExternal) {
|
||||
auto dmat =
|
||||
GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, 100, temp);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins, 0);
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -159,6 +212,29 @@ TEST(hist_util, AdapterDeviceSketch)
|
||||
EXPECT_EQ(device_cuts.MinValues(), host_cuts.MinValues());
|
||||
}
|
||||
|
||||
TEST(hist_util, AdapterDeviceSketchMemory) {
|
||||
int num_columns = 100;
|
||||
int num_rows = 1000;
|
||||
int num_bins = 256;
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
|
||||
|
||||
dh::GlobalMemoryLogger().Clear();
|
||||
ConsoleLogger::Configure({{"verbosity", "3"}});
|
||||
auto cuts = AdapterDeviceSketch(&adapter, num_bins,
|
||||
std::numeric_limits<float>::quiet_NaN());
|
||||
ConsoleLogger::Configure({{"verbosity", "0"}});
|
||||
|
||||
size_t bytes_num_elements = num_rows * num_columns * sizeof(Entry);
|
||||
size_t bytes_num_columns = (num_columns + 1) * sizeof(size_t);
|
||||
size_t bytes_cuts = RequiredSampleCutsTest(num_bins, num_rows) * num_columns *
|
||||
sizeof(DenseCuts::WQSketch::Entry);
|
||||
size_t bytes_constant = 1000;
|
||||
EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(),
|
||||
bytes_num_elements + bytes_cuts + bytes_num_columns + bytes_constant);
|
||||
}
|
||||
|
||||
TEST(hist_util, AdapterDeviceSketchCategorical) {
|
||||
int categorical_sizes[] = {2, 6, 8, 12};
|
||||
int num_bins = 256;
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
namespace xgboost {
|
||||
|
||||
TEST(EllpackPage, EmptyDMatrix) {
|
||||
constexpr int kNRows = 0, kNCols = 0, kMaxBin = 256, kGpuBatchNRows = 64;
|
||||
constexpr int kNRows = 0, kNCols = 0, kMaxBin = 256;
|
||||
constexpr float kSparsity = 0;
|
||||
auto dmat = *CreateDMatrix(kNRows, kNCols, kSparsity);
|
||||
auto& page = *dmat->GetBatches<EllpackPage>({0, kMaxBin, kGpuBatchNRows}).begin();
|
||||
auto& page = *dmat->GetBatches<EllpackPage>({0, kMaxBin}).begin();
|
||||
auto impl = page.Impl();
|
||||
ASSERT_EQ(impl->row_stride, 0);
|
||||
ASSERT_EQ(impl->cuts_.TotalBins(), 0);
|
||||
@@ -101,7 +101,7 @@ TEST(EllpackPage, Copy) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::unique_ptr<DMatrix>
|
||||
dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
BatchParam param{0, 256, 0, kPageSize};
|
||||
BatchParam param{0, 256, kPageSize};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
@@ -147,7 +147,7 @@ TEST(EllpackPage, Compact) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::unique_ptr<DMatrix>
|
||||
dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
BatchParam param{0, 256, 0, kPageSize};
|
||||
BatchParam param{0, 256, kPageSize};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
|
||||
@@ -33,7 +33,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) {
|
||||
// Loop over the batches and count the records
|
||||
int64_t batch_count = 0;
|
||||
int64_t row_count = 0;
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, 256, 0, 7UL})) {
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, 256, 7UL})) {
|
||||
EXPECT_LT(batch.Size(), dmat->Info().num_row_);
|
||||
batch_count++;
|
||||
row_count += batch.Size();
|
||||
@@ -57,7 +57,7 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
|
||||
std::unique_ptr<DMatrix>
|
||||
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
|
||||
BatchParam param{0, 2, 0, 0};
|
||||
BatchParam param{0, 2, 0};
|
||||
auto impl = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
EXPECT_EQ(impl->base_rowid, 0);
|
||||
EXPECT_EQ(impl->n_rows, kRows);
|
||||
@@ -107,7 +107,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
|
||||
std::unique_ptr<DMatrix>
|
||||
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
|
||||
BatchParam param{0, kMaxBins, 0, kPageSize};
|
||||
BatchParam param{0, kMaxBins, kPageSize};
|
||||
auto impl = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
EXPECT_EQ(impl->base_rowid, 0);
|
||||
EXPECT_EQ(impl->n_rows, kRows);
|
||||
@@ -148,7 +148,7 @@ TEST(SparsePageDMatrix, EllpackPageMultipleLoops) {
|
||||
std::unique_ptr<DMatrix>
|
||||
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
|
||||
BatchParam param{0, kMaxBins, 0, kPageSize};
|
||||
BatchParam param{0, kMaxBins, kPageSize};
|
||||
auto impl = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
|
||||
size_t current_row = 0;
|
||||
|
||||
@@ -27,7 +27,7 @@ void VerifySampling(size_t page_size,
|
||||
}
|
||||
gpair.SetDevice(0);
|
||||
|
||||
BatchParam param{0, 256, 0, page_size};
|
||||
BatchParam param{0, 256, page_size};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
if (page_size != 0) {
|
||||
EXPECT_NE(page->n_rows, kRows);
|
||||
@@ -82,7 +82,7 @@ TEST(GradientBasedSampler, NoSampling_ExternalMemory) {
|
||||
auto gpair = GenerateRandomGradients(kRows);
|
||||
gpair.SetDevice(0);
|
||||
|
||||
BatchParam param{0, 256, 0, kPageSize};
|
||||
BatchParam param{0, 256, kPageSize};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
EXPECT_NE(page->n_rows, kRows);
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ void TestDeterminsticHistogram() {
|
||||
|
||||
auto pp_m = CreateDMatrix(kRows, kCols, 0.5);
|
||||
auto& matrix = **pp_m;
|
||||
BatchParam batch_param{0, static_cast<int32_t>(kBins), 0, 0};
|
||||
BatchParam batch_param{0, static_cast<int32_t>(kBins), 0};
|
||||
|
||||
for (auto const& batch : matrix.GetBatches<EllpackPage>(batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
|
||||
@@ -341,7 +341,7 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
// Loop over the batches and count the records
|
||||
int64_t batch_count = 0;
|
||||
int64_t row_count = 0;
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, max_bin, 0, gpu_page_size})) {
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, max_bin, gpu_page_size})) {
|
||||
EXPECT_LT(batch.Size(), dmat->Info().num_row_);
|
||||
batch_count++;
|
||||
row_count += batch.Size();
|
||||
|
||||
Reference in New Issue
Block a user