GPU binning and compression. (#3319)
* GPU binning and compression. - binning and index compression are done inside the DeviceShard constructor - in case of a DMatrix with multiple row batches, it is first converted into a single row batch
This commit is contained in:
committed by
Rory Mitchell
parent
3f7696ff53
commit
286dccb8e8
73
tests/cpp/common/test_gpu_compressed_iterator.cu
Normal file
73
tests/cpp/common/test_gpu_compressed_iterator.cu
Normal file
@@ -0,0 +1,73 @@
|
||||
#include "../../../src/common/compressed_iterator.h"
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
#include "gtest/gtest.h"
|
||||
#include <algorithm>
|
||||
#include <thrust/device_vector.h>
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
struct WriteSymbolFunction {
|
||||
CompressedBufferWriter cbw;
|
||||
unsigned char* buffer_data_d;
|
||||
int* input_data_d;
|
||||
WriteSymbolFunction(CompressedBufferWriter cbw, unsigned char* buffer_data_d,
|
||||
int* input_data_d)
|
||||
: cbw(cbw), buffer_data_d(buffer_data_d), input_data_d(input_data_d) {}
|
||||
|
||||
__device__ void operator()(size_t i) {
|
||||
cbw.AtomicWriteSymbol(buffer_data_d, input_data_d[i], i);
|
||||
}
|
||||
};
|
||||
|
||||
struct ReadSymbolFunction {
|
||||
CompressedIterator<int> ci;
|
||||
int* output_data_d;
|
||||
ReadSymbolFunction(CompressedIterator<int> ci, int* output_data_d)
|
||||
: ci(ci), output_data_d(output_data_d) {}
|
||||
|
||||
__device__ void operator()(size_t i) {
|
||||
output_data_d[i] = ci[i];
|
||||
}
|
||||
};
|
||||
|
||||
TEST(CompressedIterator, TestGPU) {
|
||||
std::vector<int> test_cases = {1, 3, 426, 21, 64, 256, 100000, INT32_MAX};
|
||||
int num_elements = 1000;
|
||||
int repetitions = 1000;
|
||||
srand(9);
|
||||
|
||||
for (auto alphabet_size : test_cases) {
|
||||
for (int i = 0; i < repetitions; i++) {
|
||||
std::vector<int> input(num_elements);
|
||||
std::generate(input.begin(), input.end(),
|
||||
[=]() { return rand() % alphabet_size; });
|
||||
CompressedBufferWriter cbw(alphabet_size);
|
||||
thrust::device_vector<int> input_d(input);
|
||||
|
||||
thrust::device_vector<unsigned char> buffer_d(
|
||||
CompressedBufferWriter::CalculateBufferSize(input.size(),
|
||||
alphabet_size));
|
||||
|
||||
// write the data on device
|
||||
auto input_data_d = input_d.data().get();
|
||||
auto buffer_data_d = buffer_d.data().get();
|
||||
dh::LaunchN(0, input_d.size(),
|
||||
WriteSymbolFunction(cbw, buffer_data_d, input_data_d));
|
||||
|
||||
// read the data on device
|
||||
CompressedIterator<int> ci(buffer_d.data().get(), alphabet_size);
|
||||
thrust::device_vector<int> output_d(input.size());
|
||||
auto output_data_d = output_d.data().get();
|
||||
dh::LaunchN(0, output_d.size(), ReadSymbolFunction(ci, output_data_d));
|
||||
|
||||
std::vector<int> output(output_d.size());
|
||||
thrust::copy(output_d.begin(), output_d.end(), output.begin());
|
||||
|
||||
ASSERT_TRUE(input == output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
@@ -18,11 +18,19 @@ long GetFileSize(const std::string filename) {
|
||||
}
|
||||
|
||||
std::string CreateSimpleTestData() {
|
||||
return CreateBigTestData(6);
|
||||
}
|
||||
|
||||
std::string CreateBigTestData(size_t n_entries) {
|
||||
std::string tmp_file = TempFileName();
|
||||
std::ofstream fo;
|
||||
fo.open(tmp_file);
|
||||
fo << "0 0:0 1:10 2:20\n";
|
||||
fo << "1 0:0 3:30 4:40\n";
|
||||
const size_t entries_per_row = 3;
|
||||
size_t n_rows = (n_entries + entries_per_row - 1) / entries_per_row;
|
||||
for (size_t i = 0; i < n_rows; ++i) {
|
||||
const char* row = i % 2 == 0 ? " 0:0 1:10 2:20\n" : " 0:0 3:30 4:40\n";
|
||||
fo << i << row;
|
||||
}
|
||||
fo.close();
|
||||
return tmp_file;
|
||||
}
|
||||
|
||||
@@ -23,6 +23,8 @@ long GetFileSize(const std::string filename);
|
||||
|
||||
std::string CreateSimpleTestData();
|
||||
|
||||
std::string CreateBigTestData(size_t n_entries);
|
||||
|
||||
void CheckObjFunction(xgboost::ObjFunction * obj,
|
||||
std::vector<xgboost::bst_float> preds,
|
||||
std::vector<xgboost::bst_float> labels,
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include "../helpers.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "../../../src/data/sparse_page_source.h"
|
||||
#include "../../../src/gbm/gbtree_model.h"
|
||||
#include "../../../src/tree/updater_gpu_hist.cu"
|
||||
|
||||
@@ -24,8 +25,14 @@ TEST(gpu_hist_experimental, TestSparseShard) {
|
||||
gmat.Init(dmat.get());
|
||||
TrainParam p;
|
||||
p.max_depth = 6;
|
||||
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(),
|
||||
p);
|
||||
|
||||
dmlc::DataIter<RowBatch>* iter = dmat->RowIterator();
|
||||
iter->BeforeFirst();
|
||||
CHECK(iter->Next());
|
||||
const RowBatch& batch = iter->Value();
|
||||
DeviceShard shard(0, 0, 0, rows, hmat.row_ptr.back(), p);
|
||||
shard.Init(hmat, batch);
|
||||
CHECK(!iter->Next());
|
||||
|
||||
ASSERT_LT(shard.row_stride, columns);
|
||||
|
||||
@@ -59,8 +66,15 @@ TEST(gpu_hist_experimental, TestDenseShard) {
|
||||
gmat.Init(dmat.get());
|
||||
TrainParam p;
|
||||
p.max_depth = 6;
|
||||
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(),
|
||||
p);
|
||||
|
||||
dmlc::DataIter<RowBatch>* iter = dmat->RowIterator();
|
||||
iter->BeforeFirst();
|
||||
CHECK(iter->Next());
|
||||
const RowBatch& batch = iter->Value();
|
||||
|
||||
DeviceShard shard(0, 0, 0, rows, hmat.row_ptr.back(), p);
|
||||
shard.Init(hmat, batch);
|
||||
CHECK(!iter->Next());
|
||||
|
||||
ASSERT_EQ(shard.row_stride, columns);
|
||||
|
||||
@@ -75,4 +89,4 @@ TEST(gpu_hist_experimental, TestDenseShard) {
|
||||
}
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost
|
||||
|
||||
Reference in New Issue
Block a user