GPU binning and compression. (#3319)

* GPU binning and compression.

- binning and index compression are done inside the DeviceShard constructor
- in case of a DMatrix with multiple row batches, it is first converted into a single row batch
This commit is contained in:
Andy Adinets
2018-06-05 07:15:13 +02:00
committed by Rory Mitchell
parent 3f7696ff53
commit 286dccb8e8
10 changed files with 302 additions and 67 deletions

View File

@@ -8,6 +8,10 @@
#include <cstddef>
#include <algorithm>
#ifdef __CUDACC__
#include "device_helpers.cuh"
#endif
namespace xgboost {
namespace common {
@@ -96,6 +100,23 @@ class CompressedBufferWriter {
}
}
}
#ifdef __CUDACC__
__device__ void AtomicWriteSymbol
(CompressedByteT* buffer, uint64_t symbol, size_t offset) {
size_t ibit_start = offset * symbol_bits_;
size_t ibit_end = (offset + 1) * symbol_bits_ - 1;
size_t ibyte_start = ibit_start / 8, ibyte_end = ibit_end / 8;
symbol <<= 7 - ibit_end % 8;
for (ptrdiff_t ibyte = ibyte_end; ibyte >= (ptrdiff_t)ibyte_start; --ibyte) {
dh::AtomicOrByte(reinterpret_cast<unsigned int*>(buffer + detail::kPadding),
ibyte, symbol & 0xff);
symbol >>= 8;
}
}
#endif
template <typename IterT>
void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {
uint64_t tmp = 0;