[GPU-Plugin] Add load balancing search to gpu_hist. Add compressed iterator. (#2504)

This commit is contained in:
Rory Mitchell
2017-07-11 22:36:39 +12:00
committed by GitHub
parent 64c8f6fa6d
commit 530f01e21c
9 changed files with 523 additions and 222 deletions

View File

@@ -0,0 +1,199 @@
/*!
* Copyright 2017 by Contributors
* \file compressed_iterator.h
*/
#pragma once
#include <xgboost/base.h>
#include <cmath>
#include <cstddef>
#include "dmlc/logging.h"
namespace xgboost {
namespace common {
typedef unsigned char compressed_byte_t;
namespace detail {
inline void SetBit(compressed_byte_t *byte, int bit_idx) {
*byte |= 1 << bit_idx;
}
template <typename T>
inline T CheckBit(const T &byte, int bit_idx) {
return byte & (1 << bit_idx);
}
inline void ClearBit(compressed_byte_t *byte, int bit_idx) {
*byte &= ~(1 << bit_idx);
}
static const int padding = 4; // Assign padding so we can read slightly off
// the beginning of the array
// The number of bits required to represent a given unsigned range
static int SymbolBits(int num_symbols) {
return std::ceil(std::log2(num_symbols));
}
} // namespace detail
/**
* \class CompressedBufferWriter
*
* \brief Writes bit compressed symbols to a memory buffer. Use
* CompressedIterator to read symbols back from buffer. Currently limited to a
* maximum symbol size of 28 bits.
*
* \author Rory
* \date 7/9/2017
*/
class CompressedBufferWriter {
private:
int symbol_bits_;
size_t offset_;
public:
explicit CompressedBufferWriter(int num_symbols) : offset_(0) {
symbol_bits_ = detail::SymbolBits(num_symbols);
}
/**
* \fn static size_t CompressedBufferWriter::CalculateBufferSize(int
* num_elements, int num_symbols)
*
* \brief Calculates number of bytes requiredm for a given number of elements
* and a symbol range.
*
* \author Rory
* \date 7/9/2017
*
* \param num_elements Number of elements.
* \param num_symbols Max number of symbols (alphabet size)
*
* \return The calculated buffer size.
*/
static size_t CalculateBufferSize(int num_elements, int num_symbols) {
const int bits_per_byte = 8;
int compressed_size = std::ceil(
static_cast<double>(detail::SymbolBits(num_symbols) * num_elements) /
bits_per_byte);
return compressed_size + detail::padding;
}
template <typename T>
void WriteSymbol(compressed_byte_t *buffer, T symbol, size_t offset) {
const int bits_per_byte = 8;
for (int i = 0; i < symbol_bits_; i++) {
size_t byte_idx = ((offset + 1) * symbol_bits_ - (i + 1)) / bits_per_byte;
byte_idx += detail::padding;
int bit_idx =
((bits_per_byte + i) - ((offset + 1) * symbol_bits_)) % bits_per_byte;
if (detail::CheckBit(symbol, i)) {
detail::SetBit(&buffer[byte_idx], bit_idx);
} else {
detail::ClearBit(&buffer[byte_idx], bit_idx);
}
}
}
template <typename iter_t>
void Write(compressed_byte_t *buffer, iter_t input_begin, iter_t input_end) {
uint64_t tmp = 0;
int stored_bits = 0;
const int max_stored_bits = 64 - symbol_bits_;
int buffer_position = detail::padding;
const int num_symbols = input_end - input_begin;
for (int i = 0; i < num_symbols; i++) {
typename std::iterator_traits<iter_t>::value_type symbol = input_begin[i];
if (stored_bits > max_stored_bits) {
// Eject only full bytes
int tmp_bytes = stored_bits / 8;
for (int j = 0; j < tmp_bytes; j++) {
buffer[buffer_position] = tmp >> (stored_bits - (j + 1) * 8);
buffer_position++;
}
stored_bits -= tmp_bytes * 8;
tmp &= (1 << stored_bits) - 1;
}
// Store symbol
tmp <<= symbol_bits_;
tmp |= symbol;
stored_bits += symbol_bits_;
}
// Eject all bytes
int tmp_bytes = std::ceil(static_cast<float>(stored_bits) / 8);
for (int j = 0; j < tmp_bytes; j++) {
int shift_bits = stored_bits - (j + 1) * 8;
if (shift_bits >= 0) {
buffer[buffer_position] = tmp >> shift_bits;
} else {
buffer[buffer_position] = tmp << std::abs(shift_bits);
}
buffer_position++;
}
}
};
template <typename T>
/**
* \class CompressedIterator
*
* \brief Read symbols from a bit compressed memory buffer. Usable on device and
* host.
*
* \author Rory
* \date 7/9/2017
*/
class CompressedIterator {
public:
typedef CompressedIterator<T> self_type; ///< My own type
typedef ptrdiff_t
difference_type; ///< Type to express the result of subtracting
/// one iterator from another
typedef T value_type; ///< The type of the element the iterator can point to
typedef value_type *pointer; ///< The type of a pointer to an element the
/// iterator can point to
typedef value_type reference; ///< The type of a reference to an element the
/// iterator can point to
private:
compressed_byte_t *buffer_;
int symbol_bits_;
size_t offset_;
public:
CompressedIterator() : buffer_(nullptr), symbol_bits_(0), offset_(0) {}
CompressedIterator(compressed_byte_t *buffer, int num_symbols)
: buffer_(buffer), offset_(0) {
symbol_bits_ = detail::SymbolBits(num_symbols);
}
XGBOOST_DEVICE reference operator*() const {
const int bits_per_byte = 8;
size_t start_bit_idx = ((offset_ + 1) * symbol_bits_ - 1);
size_t start_byte_idx = start_bit_idx / bits_per_byte;
start_byte_idx += detail::padding;
// Read 5 bytes - the maximum we will need
uint64_t tmp = static_cast<uint64_t>(buffer_[start_byte_idx - 4]) << 32 |
static_cast<uint64_t>(buffer_[start_byte_idx - 3]) << 24 |
static_cast<uint64_t>(buffer_[start_byte_idx - 2]) << 16 |
static_cast<uint64_t>(buffer_[start_byte_idx - 1]) << 8 |
buffer_[start_byte_idx];
int bit_shift =
(bits_per_byte - ((offset_ + 1) * symbol_bits_)) % bits_per_byte;
tmp >>= bit_shift;
// Mask off unneeded bits
uint64_t mask = (1 << symbol_bits_) - 1;
return static_cast<T>(tmp & mask);
}
XGBOOST_DEVICE reference operator[](int idx) const {
self_type offset = (*this);
offset.offset_ += idx;
return *offset;
}
};
} // namespace common
} // namespace xgboost