Fix memory usage of device sketching (#5407)

This commit is contained in:
Rory Mitchell
2020-03-14 13:43:24 +13:00
committed by GitHub
parent bb8c8df39d
commit b745b7acce
13 changed files with 153 additions and 73 deletions

View File

@@ -6,7 +6,6 @@
#include <thrust/device_vector.h>
#include <thrust/iterator/counting_iterator.h>
#include "xgboost/c_api.h"
@@ -20,6 +19,7 @@
#include "../../../src/common/math.h"
#include "../../../src/data/simple_dmatrix.h"
#include "test_hist_util.h"
#include "../../../include/xgboost/logging.h"
namespace xgboost {
namespace common {
@@ -39,7 +39,7 @@ TEST(hist_util, DeviceSketch) {
std::vector<float> x = {1.0, 2.0, 3.0, 4.0, 5.0};
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins, 0);
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
HistogramCuts host_cuts;
DenseCuts builder(&host_cuts);
builder.Build(dmat.get(), num_bins);
@@ -49,6 +49,59 @@ TEST(hist_util, DeviceSketch) {
EXPECT_EQ(device_cuts.MinValues(), host_cuts.MinValues());
}
// Duplicate this function from hist_util.cu so we don't have to expose it in
// header
size_t RequiredSampleCutsTest(int max_bins, size_t num_rows) {
constexpr int kFactor = 8;
double eps = 1.0 / (kFactor * max_bins);
size_t dummy_nlevel;
size_t num_cuts;
WQuantileSketch<bst_float, bst_float>::LimitSizeLevel(
num_rows, eps, &dummy_nlevel, &num_cuts);
return std::min(num_cuts, num_rows);
}
TEST(hist_util, DeviceSketchMemory) {
int num_columns = 100;
int num_rows = 1000;
int num_bins = 256;
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
dh::GlobalMemoryLogger().Clear();
ConsoleLogger::Configure({{"verbosity", "3"}});
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
ConsoleLogger::Configure({{"verbosity", "0"}});
size_t bytes_num_elements = num_rows * num_columns*sizeof(Entry);
size_t bytes_cuts = RequiredSampleCutsTest(num_bins, num_rows) * num_columns *
sizeof(DenseCuts::WQSketch::Entry);
size_t bytes_constant = 1000;
EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(),
bytes_num_elements + bytes_cuts + bytes_constant);
}
TEST(hist_util, DeviceSketchMemoryWeights) {
int num_columns = 100;
int num_rows = 1000;
int num_bins = 256;
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);
dh::GlobalMemoryLogger().Clear();
ConsoleLogger::Configure({{"verbosity", "3"}});
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
ConsoleLogger::Configure({{"verbosity", "0"}});
size_t bytes_num_elements =
num_rows * num_columns * (sizeof(Entry) + sizeof(float));
size_t bytes_cuts = RequiredSampleCutsTest(num_bins, num_rows) * num_columns *
sizeof(DenseCuts::WQSketch::Entry);
EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(),
size_t((bytes_num_elements + bytes_cuts) * 1.05));
}
TEST(hist_util, DeviceSketchDeterminism) {
int num_rows = 500;
int num_columns = 5;
@@ -72,7 +125,7 @@ TEST(hist_util, DeviceSketchDeterminism) {
for (auto num_categories : categorical_sizes) {
auto x = GenerateRandomCategoricalSingleColumn(n, num_categories);
auto dmat = GetDMatrixFromData(x, n, 1);
auto cuts = DeviceSketch(0, dmat.get(), num_bins, 0);
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
ValidateCuts(cuts, dmat.get(), num_bins);
}
}
@@ -86,7 +139,7 @@ TEST(hist_util, DeviceSketchMultipleColumns) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
for (auto num_bins : bin_sizes) {
auto cuts = DeviceSketch(0, dmat.get(), num_bins, 0);
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
ValidateCuts(cuts, dmat.get(), num_bins);
}
}
@@ -102,7 +155,7 @@ TEST(hist_util, DeviceSketchMultipleColumnsWeights) {
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);
for (auto num_bins : bin_sizes) {
auto cuts = DeviceSketch(0, dmat.get(), num_bins, 0);
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
ValidateCuts(cuts, dmat.get(), num_bins);
}
}
@@ -131,7 +184,7 @@ TEST(hist_util, DeviceSketchMultipleColumnsExternal) {
auto dmat =
GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, 100, temp);
for (auto num_bins : bin_sizes) {
auto cuts = DeviceSketch(0, dmat.get(), num_bins, 0);
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
ValidateCuts(cuts, dmat.get(), num_bins);
}
}
@@ -159,6 +212,29 @@ TEST(hist_util, AdapterDeviceSketch)
EXPECT_EQ(device_cuts.MinValues(), host_cuts.MinValues());
}
TEST(hist_util, AdapterDeviceSketchMemory) {
int num_columns = 100;
int num_rows = 1000;
int num_bins = 256;
auto x = GenerateRandom(num_rows, num_columns);
auto x_device = thrust::device_vector<float>(x);
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
dh::GlobalMemoryLogger().Clear();
ConsoleLogger::Configure({{"verbosity", "3"}});
auto cuts = AdapterDeviceSketch(&adapter, num_bins,
std::numeric_limits<float>::quiet_NaN());
ConsoleLogger::Configure({{"verbosity", "0"}});
size_t bytes_num_elements = num_rows * num_columns * sizeof(Entry);
size_t bytes_num_columns = (num_columns + 1) * sizeof(size_t);
size_t bytes_cuts = RequiredSampleCutsTest(num_bins, num_rows) * num_columns *
sizeof(DenseCuts::WQSketch::Entry);
size_t bytes_constant = 1000;
EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(),
bytes_num_elements + bytes_cuts + bytes_num_columns + bytes_constant);
}
TEST(hist_util, AdapterDeviceSketchCategorical) {
int categorical_sizes[] = {2, 6, 8, 12};
int num_bins = 256;

View File

@@ -14,10 +14,10 @@
namespace xgboost {
TEST(EllpackPage, EmptyDMatrix) {
constexpr int kNRows = 0, kNCols = 0, kMaxBin = 256, kGpuBatchNRows = 64;
constexpr int kNRows = 0, kNCols = 0, kMaxBin = 256;
constexpr float kSparsity = 0;
auto dmat = *CreateDMatrix(kNRows, kNCols, kSparsity);
auto& page = *dmat->GetBatches<EllpackPage>({0, kMaxBin, kGpuBatchNRows}).begin();
auto& page = *dmat->GetBatches<EllpackPage>({0, kMaxBin}).begin();
auto impl = page.Impl();
ASSERT_EQ(impl->row_stride, 0);
ASSERT_EQ(impl->cuts_.TotalBins(), 0);
@@ -101,7 +101,7 @@ TEST(EllpackPage, Copy) {
dmlc::TemporaryDirectory tmpdir;
std::unique_ptr<DMatrix>
dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
BatchParam param{0, 256, 0, kPageSize};
BatchParam param{0, 256, kPageSize};
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
// Create an empty result page.
@@ -147,7 +147,7 @@ TEST(EllpackPage, Compact) {
dmlc::TemporaryDirectory tmpdir;
std::unique_ptr<DMatrix>
dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
BatchParam param{0, 256, 0, kPageSize};
BatchParam param{0, 256, kPageSize};
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
// Create an empty result page.

View File

@@ -33,7 +33,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) {
// Loop over the batches and count the records
int64_t batch_count = 0;
int64_t row_count = 0;
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, 256, 0, 7UL})) {
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, 256, 7UL})) {
EXPECT_LT(batch.Size(), dmat->Info().num_row_);
batch_count++;
row_count += batch.Size();
@@ -57,7 +57,7 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
std::unique_ptr<DMatrix>
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
BatchParam param{0, 2, 0, 0};
BatchParam param{0, 2, 0};
auto impl = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
EXPECT_EQ(impl->base_rowid, 0);
EXPECT_EQ(impl->n_rows, kRows);
@@ -107,7 +107,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
std::unique_ptr<DMatrix>
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
BatchParam param{0, kMaxBins, 0, kPageSize};
BatchParam param{0, kMaxBins, kPageSize};
auto impl = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
EXPECT_EQ(impl->base_rowid, 0);
EXPECT_EQ(impl->n_rows, kRows);
@@ -148,7 +148,7 @@ TEST(SparsePageDMatrix, EllpackPageMultipleLoops) {
std::unique_ptr<DMatrix>
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
BatchParam param{0, kMaxBins, 0, kPageSize};
BatchParam param{0, kMaxBins, kPageSize};
auto impl = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
size_t current_row = 0;

View File

@@ -27,7 +27,7 @@ void VerifySampling(size_t page_size,
}
gpair.SetDevice(0);
BatchParam param{0, 256, 0, page_size};
BatchParam param{0, 256, page_size};
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
if (page_size != 0) {
EXPECT_NE(page->n_rows, kRows);
@@ -82,7 +82,7 @@ TEST(GradientBasedSampler, NoSampling_ExternalMemory) {
auto gpair = GenerateRandomGradients(kRows);
gpair.SetDevice(0);
BatchParam param{0, 256, 0, kPageSize};
BatchParam param{0, 256, kPageSize};
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
EXPECT_NE(page->n_rows, kRows);

View File

@@ -13,7 +13,7 @@ void TestDeterminsticHistogram() {
auto pp_m = CreateDMatrix(kRows, kCols, 0.5);
auto& matrix = **pp_m;
BatchParam batch_param{0, static_cast<int32_t>(kBins), 0, 0};
BatchParam batch_param{0, static_cast<int32_t>(kBins), 0};
for (auto const& batch : matrix.GetBatches<EllpackPage>(batch_param)) {
auto* page = batch.Impl();

View File

@@ -341,7 +341,7 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
// Loop over the batches and count the records
int64_t batch_count = 0;
int64_t row_count = 0;
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, max_bin, 0, gpu_page_size})) {
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, max_bin, gpu_page_size})) {
EXPECT_LT(batch.Size(), dmat->Info().num_row_);
batch_count++;
row_count += batch.Size();