Cudf support. (#4745)
* Initial support for cudf integration. * Add two C APIs for consuming data and metainfo. * Add CopyFrom for SimpleCSRSource as a generic function to consume the data. * Add FromDeviceColumnar for consuming device data. * Add new MetaInfo::SetInfo for consuming label, weight etc.
This commit is contained in:
committed by
Rory Mitchell
parent
ab357dd41c
commit
9700776597
@@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2014 by Contributors
|
||||
// Copyright (c) 2014-2019 by Contributors
|
||||
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/learner.h>
|
||||
@@ -16,7 +16,7 @@
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include "./c_api_error.h"
|
||||
#include "c_api_error.h"
|
||||
#include "../data/simple_csr_source.h"
|
||||
#include "../common/math.h"
|
||||
#include "../common/io.h"
|
||||
@@ -189,6 +189,16 @@ int XGDMatrixCreateFromDataIter(
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromArrayInterfaces(
|
||||
char const* c_json_strs, DMatrixHandle* out) {
|
||||
API_BEGIN();
|
||||
std::string json_str {c_json_strs};
|
||||
std::unique_ptr<data::SimpleCSRSource> source (new data::SimpleCSRSource());
|
||||
source->CopyFrom(json_str);
|
||||
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source)));
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr,
|
||||
const unsigned* indices,
|
||||
const bst_float* data,
|
||||
@@ -679,9 +689,9 @@ XGB_DLL int XGDMatrixSaveBinary(DMatrixHandle handle,
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle,
|
||||
const char* field,
|
||||
const bst_float* info,
|
||||
xgboost::bst_ulong len) {
|
||||
const char* field,
|
||||
const bst_float* info,
|
||||
xgboost::bst_ulong len) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
static_cast<std::shared_ptr<DMatrix>*>(handle)
|
||||
@@ -689,10 +699,20 @@ XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle,
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle,
|
||||
char const* field,
|
||||
char const* interface_c_str) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
static_cast<std::shared_ptr<DMatrix>*>(handle)
|
||||
->get()->Info().SetInfo(field, interface_c_str);
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle,
|
||||
const char* field,
|
||||
const unsigned* info,
|
||||
xgboost::bst_ulong len) {
|
||||
const char* field,
|
||||
const unsigned* info,
|
||||
xgboost::bst_ulong len) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
static_cast<std::shared_ptr<DMatrix>*>(handle)
|
||||
@@ -771,7 +791,7 @@ XGB_DLL int XGDMatrixNumCol(const DMatrixHandle handle,
|
||||
xgboost::bst_ulong *out) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
*out = static_cast<size_t>(
|
||||
*out = static_cast<xgboost::bst_ulong>(
|
||||
static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->Info().num_col_);
|
||||
API_END();
|
||||
}
|
||||
|
||||
@@ -1,160 +0,0 @@
|
||||
#ifndef XGBOOST_COMMON_BITFIELD_CUH_
|
||||
#define XGBOOST_COMMON_BITFIELD_CUH_
|
||||
|
||||
#include <bitset>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <cinttypes>
|
||||
|
||||
#include "span.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
__forceinline__ __device__ unsigned long long AtomicOr(unsigned long long* address,
|
||||
unsigned long long val) {
|
||||
unsigned long long int old = *address, assumed; // NOLINT
|
||||
do {
|
||||
assumed = old;
|
||||
old = atomicCAS(address, assumed, val | assumed);
|
||||
} while (assumed != old);
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
__forceinline__ __device__ unsigned long long AtomicAnd(unsigned long long* address,
|
||||
unsigned long long val) {
|
||||
unsigned long long int old = *address, assumed; // NOLINT
|
||||
do {
|
||||
assumed = old;
|
||||
old = atomicCAS(address, assumed, val & assumed);
|
||||
} while (assumed != old);
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief A non-owning type with auxiliary methods defined for manipulating bits.
|
||||
*/
|
||||
struct BitField {
|
||||
using value_type = uint64_t;
|
||||
|
||||
static value_type constexpr kValueSize = sizeof(value_type) * 8;
|
||||
static value_type constexpr kOne = 1UL; // force uint64_t
|
||||
static_assert(kValueSize == 64, "uint64_t should be of 64 bits.");
|
||||
|
||||
struct Pos {
|
||||
value_type int_pos {0};
|
||||
value_type bit_pos {0};
|
||||
};
|
||||
|
||||
common::Span<value_type> bits_;
|
||||
|
||||
public:
|
||||
BitField() = default;
|
||||
XGBOOST_DEVICE BitField(common::Span<value_type> bits) : bits_{bits} {}
|
||||
XGBOOST_DEVICE BitField(BitField const& other) : bits_{other.bits_} {}
|
||||
|
||||
static size_t ComputeStorageSize(size_t size) {
|
||||
auto pos = ToBitPos(size);
|
||||
if (size < kValueSize) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (pos.bit_pos != 0) {
|
||||
return pos.int_pos + 2;
|
||||
} else {
|
||||
return pos.int_pos + 1;
|
||||
}
|
||||
}
|
||||
XGBOOST_DEVICE static Pos ToBitPos(value_type pos) {
|
||||
Pos pos_v;
|
||||
if (pos == 0) {
|
||||
return pos_v;
|
||||
}
|
||||
pos_v.int_pos = pos / kValueSize;
|
||||
pos_v.bit_pos = pos % kValueSize;
|
||||
return pos_v;
|
||||
}
|
||||
|
||||
__device__ BitField& operator|=(BitField const& rhs) {
|
||||
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t min_size = min(bits_.size(), rhs.bits_.size());
|
||||
if (tid < min_size) {
|
||||
bits_[tid] |= rhs.bits_[tid];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
__device__ BitField& operator&=(BitField const& rhs) {
|
||||
size_t min_size = min(bits_.size(), rhs.bits_.size());
|
||||
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (tid < min_size) {
|
||||
bits_[tid] &= rhs.bits_[tid];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE size_t Size() const { return kValueSize * bits_.size(); }
|
||||
|
||||
__device__ void Set(value_type pos) {
|
||||
Pos pos_v = ToBitPos(pos);
|
||||
value_type& value = bits_[pos_v.int_pos];
|
||||
value_type set_bit = kOne << (kValueSize - pos_v.bit_pos - kOne);
|
||||
static_assert(sizeof(unsigned long long int) == sizeof(value_type), "");
|
||||
AtomicOr(reinterpret_cast<unsigned long long*>(&value), set_bit);
|
||||
}
|
||||
__device__ void Clear(value_type pos) {
|
||||
Pos pos_v = ToBitPos(pos);
|
||||
value_type& value = bits_[pos_v.int_pos];
|
||||
value_type clear_bit = ~(kOne << (kValueSize - pos_v.bit_pos - kOne));
|
||||
static_assert(sizeof(unsigned long long int) == sizeof(value_type), "");
|
||||
AtomicAnd(reinterpret_cast<unsigned long long*>(&value), clear_bit);
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE bool Check(Pos pos_v) const {
|
||||
value_type value = bits_[pos_v.int_pos];
|
||||
value_type const test_bit = kOne << (kValueSize - pos_v.bit_pos - kOne);
|
||||
value_type result = test_bit & value;
|
||||
return static_cast<bool>(result);
|
||||
}
|
||||
XGBOOST_DEVICE bool Check(value_type pos) const {
|
||||
Pos pos_v = ToBitPos(pos);
|
||||
return Check(pos_v);
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, BitField field) {
|
||||
os << "Bits " << "storage size: " << field.bits_.size() << "\n";
|
||||
for (size_t i = 0; i < field.bits_.size(); ++i) {
|
||||
std::bitset<BitField::kValueSize> set(field.bits_[i]);
|
||||
os << set << "\n";
|
||||
}
|
||||
return os;
|
||||
}
|
||||
};
|
||||
|
||||
inline void PrintDeviceBits(std::string name, BitField field) {
|
||||
std::cout << "Bits: " << name << std::endl;
|
||||
std::vector<BitField::value_type> h_field_bits(field.bits_.size());
|
||||
thrust::copy(thrust::device_ptr<BitField::value_type>(field.bits_.data()),
|
||||
thrust::device_ptr<BitField::value_type>(field.bits_.data() + field.bits_.size()),
|
||||
h_field_bits.data());
|
||||
BitField h_field;
|
||||
h_field.bits_ = {h_field_bits.data(), h_field_bits.data() + h_field_bits.size()};
|
||||
std::cout << h_field;
|
||||
}
|
||||
|
||||
inline void PrintDeviceStorage(std::string name, common::Span<int32_t> list) {
|
||||
std::cout << name << std::endl;
|
||||
std::vector<int32_t> h_list(list.size());
|
||||
thrust::copy(thrust::device_ptr<int32_t>(list.data()),
|
||||
thrust::device_ptr<int32_t>(list.data() + list.size()),
|
||||
h_list.data());
|
||||
for (auto v : h_list) {
|
||||
std::cout << v << ", ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // XGBOOST_COMMON_BITFIELD_CUH_
|
||||
248
src/common/bitfield.h
Normal file
248
src/common/bitfield.h
Normal file
@@ -0,0 +1,248 @@
|
||||
/*!
|
||||
* Copyright 2019 by Contributors
|
||||
* \file bitfield.h
|
||||
*/
|
||||
#ifndef XGBOOST_COMMON_BITFIELD_H_
|
||||
#define XGBOOST_COMMON_BITFIELD_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <bitset>
|
||||
#include <cinttypes>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "span.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
using BitFieldAtomicType = unsigned long long; // NOLINT
|
||||
|
||||
__forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address,
|
||||
BitFieldAtomicType val) {
|
||||
BitFieldAtomicType old = *address, assumed; // NOLINT
|
||||
do {
|
||||
assumed = old;
|
||||
old = atomicCAS(address, assumed, val | assumed);
|
||||
} while (assumed != old);
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
__forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* address,
|
||||
BitFieldAtomicType val) {
|
||||
BitFieldAtomicType old = *address, assumed; // NOLINT
|
||||
do {
|
||||
assumed = old;
|
||||
old = atomicCAS(address, assumed, val & assumed);
|
||||
} while (assumed != old);
|
||||
|
||||
return old;
|
||||
}
|
||||
#endif // defined(__CUDACC__)
|
||||
|
||||
/*!
|
||||
* \brief A non-owning type with auxiliary methods defined for manipulating bits.
|
||||
*
|
||||
* \tparam Direction Whether the bits start from left or from right.
|
||||
*/
|
||||
template <typename VT, typename Direction>
|
||||
struct BitFieldContainer {
|
||||
using value_type = VT;
|
||||
using pointer = value_type*;
|
||||
|
||||
static value_type constexpr kValueSize = sizeof(value_type) * 8;
|
||||
static value_type constexpr kOne = 1; // force correct type.
|
||||
|
||||
struct Pos {
|
||||
value_type int_pos {0};
|
||||
value_type bit_pos {0};
|
||||
};
|
||||
|
||||
common::Span<value_type> bits_;
|
||||
static_assert(!std::is_signed<VT>::value, "Must use unsiged type as underlying storage.");
|
||||
|
||||
XGBOOST_DEVICE static Pos ToBitPos(value_type pos) {
|
||||
Pos pos_v;
|
||||
if (pos == 0) {
|
||||
return pos_v;
|
||||
}
|
||||
pos_v.int_pos = pos / kValueSize;
|
||||
pos_v.bit_pos = pos % kValueSize;
|
||||
return pos_v;
|
||||
}
|
||||
|
||||
public:
|
||||
BitFieldContainer() = default;
|
||||
XGBOOST_DEVICE BitFieldContainer(common::Span<value_type> bits) : bits_{bits} {}
|
||||
XGBOOST_DEVICE BitFieldContainer(BitFieldContainer const& other) : bits_{other.bits_} {}
|
||||
|
||||
static size_t ComputeStorageSize(size_t size) {
|
||||
auto pos = ToBitPos(size);
|
||||
if (size < kValueSize) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (pos.bit_pos != 0) {
|
||||
return pos.int_pos + 2;
|
||||
} else {
|
||||
return pos.int_pos + 1;
|
||||
}
|
||||
}
|
||||
#if defined(__CUDA_ARCH__)
|
||||
__device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) {
|
||||
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t min_size = min(bits_.size(), rhs.bits_.size());
|
||||
if (tid < min_size) {
|
||||
bits_[tid] |= rhs.bits_[tid];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#else
|
||||
BitFieldContainer& operator|=(BitFieldContainer const& rhs) {
|
||||
size_t min_size = std::min(bits_.size(), rhs.bits_.size());
|
||||
for (size_t i = 0; i < min_size; ++i) {
|
||||
bits_[i] |= rhs.bits_[i];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif // #if defined(__CUDA_ARCH__)
|
||||
|
||||
#if defined(__CUDA_ARCH__)
|
||||
__device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
|
||||
size_t min_size = min(bits_.size(), rhs.bits_.size());
|
||||
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (tid < min_size) {
|
||||
bits_[tid] &= rhs.bits_[tid];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#else
|
||||
BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
|
||||
size_t min_size = std::min(bits_.size(), rhs.bits_.size());
|
||||
for (size_t i = 0; i < min_size; ++i) {
|
||||
bits_[i] &= rhs.bits_[i];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif // defined(__CUDA_ARCH__)
|
||||
|
||||
#if defined(__CUDA_ARCH__)
|
||||
__device__ void Set(value_type pos) {
|
||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||
value_type& value = bits_[pos_v.int_pos];
|
||||
value_type set_bit = kOne << pos_v.bit_pos;
|
||||
static_assert(sizeof(BitFieldAtomicType) == sizeof(value_type), "");
|
||||
AtomicOr(reinterpret_cast<BitFieldAtomicType*>(&value), set_bit);
|
||||
}
|
||||
__device__ void Clear(value_type pos) {
|
||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||
value_type& value = bits_[pos_v.int_pos];
|
||||
value_type clear_bit = ~(kOne << pos_v.bit_pos);
|
||||
static_assert(sizeof(BitFieldAtomicType) == sizeof(value_type), "");
|
||||
AtomicAnd(reinterpret_cast<BitFieldAtomicType*>(&value), clear_bit);
|
||||
}
|
||||
#else
|
||||
void Set(value_type pos) {
|
||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||
value_type& value = bits_[pos_v.int_pos];
|
||||
value_type set_bit = kOne << pos_v.bit_pos;
|
||||
value |= set_bit;
|
||||
}
|
||||
void Clear(value_type pos) {
|
||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||
value_type& value = bits_[pos_v.int_pos];
|
||||
value_type clear_bit = ~(kOne << pos_v.bit_pos);
|
||||
value &= clear_bit;
|
||||
}
|
||||
#endif // defined(__CUDA_ARCH__)
|
||||
|
||||
XGBOOST_DEVICE bool Check(Pos pos_v) const {
|
||||
pos_v = Direction::Shift(pos_v);
|
||||
value_type const value = bits_[pos_v.int_pos];
|
||||
value_type const test_bit = kOne << pos_v.bit_pos;
|
||||
value_type result = test_bit & value;
|
||||
return static_cast<bool>(result);
|
||||
}
|
||||
XGBOOST_DEVICE bool Check(value_type pos) const {
|
||||
Pos pos_v = ToBitPos(pos);
|
||||
return Check(pos_v);
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE size_t Size() const { return kValueSize * bits_.size(); }
|
||||
|
||||
XGBOOST_DEVICE pointer Data() const { return bits_.data(); }
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, BitFieldContainer<VT, Direction> field) {
|
||||
os << "Bits " << "storage size: " << field.bits_.size() << "\n";
|
||||
for (typename common::Span<value_type>::index_type i = 0; i < field.bits_.size(); ++i) {
|
||||
std::bitset<BitFieldContainer<VT, Direction>::kValueSize> bset(field.bits_[i]);
|
||||
os << bset << "\n";
|
||||
}
|
||||
return os;
|
||||
}
|
||||
};
|
||||
|
||||
// Bits start from left most bits (most significant bit).
|
||||
template <typename VT>
|
||||
struct LBitsPolicy : public BitFieldContainer<VT, LBitsPolicy<VT>> {
|
||||
using Container = BitFieldContainer<VT, LBitsPolicy<VT>>;
|
||||
using Pos = typename Container::Pos;
|
||||
using value_type = typename Container::value_type;
|
||||
|
||||
XGBOOST_DEVICE static Pos Shift(Pos pos) {
|
||||
pos.bit_pos = Container::kValueSize - pos.bit_pos - Container::kOne;
|
||||
return pos;
|
||||
}
|
||||
};
|
||||
|
||||
// Bits start from right most bit (least significant bit) of each entry, but integer index
|
||||
// is from left to right.
|
||||
template <typename VT>
|
||||
struct RBitsPolicy : public BitFieldContainer<VT, RBitsPolicy<VT>> {
|
||||
using Container = BitFieldContainer<VT, RBitsPolicy<VT>>;
|
||||
using Pos = typename Container::Pos;
|
||||
using value_type = typename Container::value_type;
|
||||
|
||||
XGBOOST_DEVICE static Pos Shift(Pos pos) {
|
||||
return pos;
|
||||
}
|
||||
};
|
||||
|
||||
// Format: <Direction>BitField<size of underlying type>, underlying type must be unsigned.
|
||||
using LBitField64 = BitFieldContainer<uint64_t, LBitsPolicy<uint64_t>>;
|
||||
using RBitField8 = BitFieldContainer<unsigned char, RBitsPolicy<unsigned char>>;
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
|
||||
template <typename V, typename D>
|
||||
inline void PrintDeviceBits(std::string name, BitFieldContainer<V, D> field) {
|
||||
std::cout << "Bits: " << name << std::endl;
|
||||
std::vector<typename BitFieldContainer<V, D>::value_type> h_field_bits(field.bits_.size());
|
||||
thrust::copy(thrust::device_ptr<typename BitFieldContainer<V, D>::value_type>(field.bits_.data()),
|
||||
thrust::device_ptr<typename BitFieldContainer<V, D>::value_type>(
|
||||
field.bits_.data() + field.bits_.size()),
|
||||
h_field_bits.data());
|
||||
BitFieldContainer<V, D> h_field;
|
||||
h_field.bits_ = {h_field_bits.data(), h_field_bits.data() + h_field_bits.size()};
|
||||
std::cout << h_field;
|
||||
}
|
||||
|
||||
inline void PrintDeviceStorage(std::string name, common::Span<int32_t> list) {
|
||||
std::cout << name << std::endl;
|
||||
std::vector<int32_t> h_list(list.size());
|
||||
thrust::copy(thrust::device_ptr<int32_t>(list.data()),
|
||||
thrust::device_ptr<int32_t>(list.data() + list.size()),
|
||||
h_list.data());
|
||||
for (auto v : h_list) {
|
||||
std::cout << v << ", ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
#endif // defined(__CUDACC__)
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // XGBOOST_COMMON_BITFIELD_H_
|
||||
@@ -51,12 +51,18 @@ inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file,
|
||||
}
|
||||
#endif
|
||||
|
||||
inline void CudaCheckPointerDevice(void* ptr) {
|
||||
inline int32_t CudaGetPointerDevice(void* ptr) {
|
||||
int32_t device = -1;
|
||||
cudaPointerAttributes attr;
|
||||
dh::safe_cuda(cudaPointerGetAttributes(&attr, ptr));
|
||||
int ptr_device = attr.device;
|
||||
device = attr.device;
|
||||
return device;
|
||||
}
|
||||
|
||||
inline void CudaCheckPointerDevice(void* ptr) {
|
||||
auto ptr_device = CudaGetPointerDevice(ptr);
|
||||
int cur_device = -1;
|
||||
cudaGetDevice(&cur_device);
|
||||
dh::safe_cuda(cudaGetDevice(&cur_device));
|
||||
CHECK_EQ(ptr_device, cur_device) << "pointer device: " << ptr_device
|
||||
<< "current device: " << cur_device;
|
||||
}
|
||||
|
||||
@@ -365,8 +365,9 @@ struct HostDeviceVectorImpl {
|
||||
void Shard(const GPUDistribution& distribution) {
|
||||
if (distribution_ == distribution) { return; }
|
||||
CHECK(distribution_.IsEmpty())
|
||||
<< "This: " << distribution_.Devices().Size() << ", "
|
||||
<< "Others: " << distribution.Devices().Size();
|
||||
<< "Data resides on different GPUs: " << "ID: "
|
||||
<< *(distribution_.Devices().begin()) << " and ID: "
|
||||
<< *(distribution.Devices().begin());
|
||||
distribution_ = distribution;
|
||||
InitShards();
|
||||
}
|
||||
@@ -412,6 +413,7 @@ struct HostDeviceVectorImpl {
|
||||
perm_h_.Grant(access);
|
||||
return;
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
if (data_h_.size() != size_d_) { data_h_.resize(size_d_); }
|
||||
dh::ExecuteIndexShards(&shards_, [&](int idx, DeviceShard& shard) {
|
||||
shard.LazySyncHost(access);
|
||||
|
||||
82
src/data/columnar.h
Normal file
82
src/data/columnar.h
Normal file
@@ -0,0 +1,82 @@
|
||||
/*!
|
||||
* Copyright 2019 by Contributors
|
||||
* \file columnar.h
|
||||
* \brief Basic structure holding a reference to arrow columnar data format.
|
||||
*/
|
||||
#ifndef XGBOOST_DATA_COLUMNAR_H_
|
||||
#define XGBOOST_DATA_COLUMNAR_H_
|
||||
|
||||
#include <cinttypes>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "../common/span.h"
|
||||
#include "../common/bitfield.h"
|
||||
|
||||
namespace xgboost {
|
||||
struct Columnar {
|
||||
using mask_type = unsigned char;
|
||||
using index_type = int32_t;
|
||||
|
||||
common::Span<float> data;
|
||||
RBitField8 valid;
|
||||
int32_t size;
|
||||
int32_t null_count;
|
||||
};
|
||||
|
||||
// Common errors in parsing columnar format.
|
||||
struct ColumnarErrors {
|
||||
static char const* Contigious() {
|
||||
return "Memory should be contigious.";
|
||||
}
|
||||
static char const* TypestrFormat() {
|
||||
return "`typestr` should be of format <endian><type><size>.";
|
||||
}
|
||||
// Not supported in Apache Arrow.
|
||||
static char const* BigEndian() {
|
||||
return "Big endian is not supported.";
|
||||
}
|
||||
static char const* Dimension(int32_t d) {
|
||||
static std::string str;
|
||||
str.clear();
|
||||
str += "Only ";
|
||||
str += std::to_string(d);
|
||||
str += " dimensional array is valid.";
|
||||
return str.c_str();
|
||||
}
|
||||
static char const* Version() {
|
||||
return "Only version 1 of __cuda_array_interface__ is being supported.";
|
||||
}
|
||||
static char const* toFloat() {
|
||||
return "Please convert the input into float32 first.";
|
||||
}
|
||||
static char const* toUInt() {
|
||||
return "Please convert the Group into unsigned 32 bit integers first.";
|
||||
}
|
||||
static char const* ofType(std::string type) {
|
||||
static std::string str;
|
||||
str.clear();
|
||||
str += " should be of ";
|
||||
str += type;
|
||||
str += " type.";
|
||||
return str.c_str();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename PtrType>
|
||||
PtrType GetPtrFromArrayData(std::map<std::string, Json> const& obj) {
|
||||
if (obj.find("data") == obj.cend()) {
|
||||
LOG(FATAL) << "Empty data passed in.";
|
||||
}
|
||||
auto p_data = reinterpret_cast<PtrType>(static_cast<size_t>(
|
||||
get<Integer const>(
|
||||
get<Array const>(
|
||||
obj.at("data"))
|
||||
.at(0))));
|
||||
return p_data;
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_DATA_COLUMNAR_H_
|
||||
@@ -1,11 +1,12 @@
|
||||
/*!
|
||||
* Copyright 2015 by Contributors
|
||||
* Copyright 2015-2019 by Contributors
|
||||
* \file data.cc
|
||||
*/
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/logging.h>
|
||||
#include <dmlc/registry.h>
|
||||
#include <cstring>
|
||||
|
||||
#include "./sparse_page_writer.h"
|
||||
#include "./simple_dmatrix.h"
|
||||
#include "./simple_csr_source.h"
|
||||
@@ -110,7 +111,6 @@ inline bool MetaTryLoadFloatInfo(const std::string& fname,
|
||||
default: LOG(FATAL) << "Unknown data type" << dtype; \
|
||||
} \
|
||||
|
||||
|
||||
void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t num) {
|
||||
if (!std::strcmp(key, "root_index")) {
|
||||
root_index_.resize(num);
|
||||
@@ -139,9 +139,17 @@ void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t
|
||||
for (size_t i = 1; i < group_ptr_.size(); ++i) {
|
||||
group_ptr_[i] = group_ptr_[i - 1] + group_ptr_[i];
|
||||
}
|
||||
} else {
|
||||
LOG(FATAL) << "Unknown metainfo: " << key;
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
void MetaInfo::SetInfo(const char * c_key, std::string const& interface_str) {
|
||||
LOG(FATAL) << "XGBoost version is not compiled with GPU support";
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
DMatrix* DMatrix::Load(const std::string& uri,
|
||||
bool silent,
|
||||
bool load_row_split,
|
||||
|
||||
86
src/data/data.cu
Normal file
86
src/data/data.cu
Normal file
@@ -0,0 +1,86 @@
|
||||
/*!
|
||||
* Copyright 2019 by XGBoost Contributors
|
||||
*
|
||||
* \file data.cu
|
||||
*/
|
||||
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "columnar.h"
|
||||
#include "../common/device_helpers.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
void MetaInfo::SetInfo(const char * c_key, std::string const& interface_str) {
|
||||
Json j_arr = Json::Load({interface_str.c_str(), interface_str.size()});
|
||||
auto const& j_arr_obj = get<Object>(j_arr);
|
||||
std::string key {c_key};
|
||||
auto version = get<Integer const>(j_arr_obj.at("version"));
|
||||
CHECK_EQ(version, 1) << ColumnarErrors::Version();
|
||||
if (j_arr_obj.find("mask") != j_arr_obj.cend()) {
|
||||
LOG(FATAL) << "Meta info " << key << " should be dense, found validity mask";
|
||||
}
|
||||
|
||||
auto typestr = get<String const>(j_arr_obj.at("typestr"));
|
||||
CHECK_EQ(typestr.size(), 3) << ColumnarErrors::TypestrFormat();
|
||||
CHECK_NE(typestr.front(), '>') << ColumnarErrors::BigEndian();
|
||||
|
||||
auto j_shape = get<Array const>(j_arr_obj.at("shape"));
|
||||
CHECK_EQ(j_shape.size(), 1) << ColumnarErrors::Dimension(1);
|
||||
auto length = get<Integer const>(j_shape.at(0));
|
||||
CHECK_GT(length, 0) << "Label set cannot be empty.";
|
||||
|
||||
if (j_arr_obj.find("strides") != j_arr_obj.cend()) {
|
||||
auto strides = get<Array const>(j_arr_obj.at("strides"));
|
||||
CHECK_EQ(get<Integer>(strides.at(0)), 4) << ColumnarErrors::Contigious();
|
||||
}
|
||||
|
||||
float* p_data = GetPtrFromArrayData<float*>(j_arr_obj);
|
||||
|
||||
cudaPointerAttributes attr;
|
||||
dh::safe_cuda(cudaPointerGetAttributes(&attr, p_data));
|
||||
int32_t ptr_device = attr.device;
|
||||
dh::safe_cuda(cudaSetDevice(ptr_device));
|
||||
|
||||
thrust::device_ptr<float> p_src {p_data};
|
||||
|
||||
HostDeviceVector<float>* dst;
|
||||
if (key == "root_index") {
|
||||
LOG(FATAL) << "root index for columnar data is not supported.";
|
||||
} else if (key == "label") {
|
||||
dst = &labels_;
|
||||
CHECK_EQ(typestr.at(1), 'f') << "Label"
|
||||
<< ColumnarErrors::ofType("floating point");
|
||||
CHECK_EQ(typestr.at(2), '4') << ColumnarErrors::toFloat();
|
||||
} else if (key == "weight") {
|
||||
dst = &weights_;
|
||||
CHECK_EQ(typestr.at(1), 'f') << "Weight"
|
||||
<< ColumnarErrors::ofType("floating point");;
|
||||
CHECK_EQ(typestr.at(2), '4') << ColumnarErrors::toFloat();
|
||||
} else if (key == "base_margin") {
|
||||
dst = &base_margin_;
|
||||
CHECK_EQ(typestr.at(1), 'f') << "Base Margin"
|
||||
<< ColumnarErrors::ofType("floating point");
|
||||
CHECK_EQ(typestr.at(2), '4') << ColumnarErrors::toFloat();
|
||||
} else if (key == "group") {
|
||||
CHECK_EQ(typestr.at(1), 'u') << "Group"
|
||||
<< ColumnarErrors::ofType("unsigned 32 bit integers");
|
||||
CHECK_EQ(typestr.at(2), '4') << ColumnarErrors::toUInt();
|
||||
group_ptr_.resize(length + 1);
|
||||
group_ptr_[0] = 0;
|
||||
// Ranking is not performed on device.
|
||||
thrust::copy(p_src, p_src + length, group_ptr_.begin() + 1);
|
||||
for (size_t i = 1; i < group_ptr_.size(); ++i) {
|
||||
group_ptr_[i] = group_ptr_[i - 1] + group_ptr_[i];
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
LOG(FATAL) << "Unknown metainfo: " << key;
|
||||
}
|
||||
dst->Reshard(GPUDistribution(GPUSet::Range(ptr_device, 1)));
|
||||
dst->Resize(length);
|
||||
auto p_dst = thrust::device_pointer_cast(dst->DevicePointer(0));
|
||||
thrust::copy(p_src, p_src + length, p_dst);
|
||||
}
|
||||
} // namespace xgboost
|
||||
@@ -1,11 +1,14 @@
|
||||
/*!
|
||||
* Copyright 2015 by Contributors
|
||||
* Copyright 2015-2019 by Contributors
|
||||
* \file simple_csr_source.cc
|
||||
*/
|
||||
#include <dmlc/base.h>
|
||||
#include <xgboost/logging.h>
|
||||
#include <xgboost/json.h>
|
||||
|
||||
#include <limits>
|
||||
#include "./simple_csr_source.h"
|
||||
#include "simple_csr_source.h"
|
||||
#include "columnar.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
@@ -117,5 +120,143 @@ const SparsePage& SimpleCSRSource::Value() const {
|
||||
return page_;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Please be careful that, in official specification, the only three required fields are
|
||||
* `shape', `version' and `typestr'. Any other is optional, including `data'. But here
|
||||
* we have two additional requirements for input data:
|
||||
*
|
||||
* - `data' field is required, passing in an empty dataset is not accepted, as most (if
|
||||
* not all) of our algorithms don't have test for empty dataset. An error is better
|
||||
* than a crash.
|
||||
*
|
||||
* - `null_count' is required when `mask' is presented. We can compute `null_count'
|
||||
* ourselves and copy the result back to host for memory allocation. But it's in the
|
||||
* specification of Apache Arrow hence it should be readily available,
|
||||
*
|
||||
* Sample input:
|
||||
* [
|
||||
* {
|
||||
* "shape": [
|
||||
* 10
|
||||
* ],
|
||||
* "strides": [
|
||||
* 4
|
||||
* ],
|
||||
* "data": [
|
||||
* 30074864128,
|
||||
* false
|
||||
* ],
|
||||
* "typestr": "<f4",
|
||||
* "version": 1,
|
||||
* "mask": {
|
||||
* "shape": [
|
||||
* 64
|
||||
* ],
|
||||
* "strides": [
|
||||
* 1
|
||||
* ],
|
||||
* "data": [
|
||||
* 30074864640,
|
||||
* false
|
||||
* ],
|
||||
* "typestr": "|i1",
|
||||
* "version": 1,
|
||||
* "null_count": 1
|
||||
* }
|
||||
* }
|
||||
* ]
|
||||
*/
|
||||
void SimpleCSRSource::CopyFrom(std::string const& cuda_interfaces_str) {
|
||||
Json interfaces = Json::Load({cuda_interfaces_str.c_str(),
|
||||
cuda_interfaces_str.size()});
|
||||
std::vector<Json> const& columns = get<Array>(interfaces);
|
||||
size_t n_columns = columns.size();
|
||||
CHECK_GT(n_columns, 0);
|
||||
|
||||
std::vector<Columnar> foreign_cols(n_columns);
|
||||
for (size_t i = 0; i < columns.size(); ++i) {
|
||||
CHECK(IsA<Object>(columns[i]));
|
||||
auto const& column = get<Object const>(columns[i]);
|
||||
|
||||
auto version = get<Integer const>(column.at("version"));
|
||||
CHECK_EQ(version, 1) << ColumnarErrors::Version();
|
||||
|
||||
// Find null mask (validity mask) field
|
||||
// Mask object is also an array interface, but with different requirements.
|
||||
|
||||
// TODO(trivialfis): Abstract this into a class that accept a json
|
||||
// object and turn it into an array (for cupy and numba).
|
||||
common::Span<RBitField8::value_type> s_mask;
|
||||
int32_t null_count {0};
|
||||
if (column.find("mask") != column.cend()) {
|
||||
auto const& j_mask = get<Object const>(column.at("mask"));
|
||||
auto p_mask = GetPtrFromArrayData<RBitField8::value_type*>(j_mask);
|
||||
|
||||
auto j_shape = get<Array const>(j_mask.at("shape"));
|
||||
CHECK_EQ(j_shape.size(), 1) << ColumnarErrors::Dimension(1);
|
||||
CHECK_EQ(get<Integer>(j_shape.front()) % 8, 0) <<
|
||||
"Length of validity map must be a multiple of 8 bytes.";
|
||||
int64_t size = get<Integer>(j_shape.at(0)) *
|
||||
sizeof(unsigned char) / sizeof(RBitField8::value_type);
|
||||
s_mask = {p_mask, size};
|
||||
auto typestr = get<String const>(j_mask.at("typestr"));
|
||||
CHECK_EQ(typestr.size(), 3) << ColumnarErrors::TypestrFormat();
|
||||
CHECK_NE(typestr.front(), '>') << ColumnarErrors::BigEndian();
|
||||
CHECK_EQ(typestr.at(1), 'i') << "mask" << ColumnarErrors::ofType("unsigned char");
|
||||
CHECK_EQ(typestr.at(2), '1') << "mask" << ColumnarErrors::toUInt();
|
||||
|
||||
CHECK(j_mask.find("null_count") != j_mask.cend()) <<
|
||||
"Column with null mask must include null_count as "
|
||||
"part of mask object for XGBoost.";
|
||||
null_count = get<Integer const>(j_mask.at("null_count"));
|
||||
}
|
||||
|
||||
// Find data field
|
||||
if (column.find("data") == column.cend()) {
|
||||
LOG(FATAL) << "Empty dataset passed in.";
|
||||
}
|
||||
|
||||
auto typestr = get<String const>(column.at("typestr"));
|
||||
CHECK_EQ(typestr.size(), 3) << ColumnarErrors::TypestrFormat();
|
||||
CHECK_NE(typestr.front(), '>') << ColumnarErrors::BigEndian();
|
||||
CHECK_EQ(typestr.at(1), 'f') << "data" << ColumnarErrors::ofType("floating point");
|
||||
CHECK_EQ(typestr.at(2), '4') << ColumnarErrors::toFloat();
|
||||
|
||||
auto j_shape = get<Array const>(column.at("shape"));
|
||||
CHECK_EQ(j_shape.size(), 1) << ColumnarErrors::Dimension(1);
|
||||
|
||||
if (column.find("strides") != column.cend()) {
|
||||
auto strides = get<Array const>(column.at("strides"));
|
||||
CHECK_EQ(strides.size(), 1) << ColumnarErrors::Dimension(1);
|
||||
CHECK_EQ(get<Integer>(strides.at(0)), 4) << ColumnarErrors::Contigious();
|
||||
}
|
||||
|
||||
auto length = get<Integer const>(j_shape.at(0));
|
||||
|
||||
float* p_data = GetPtrFromArrayData<float*>(column);
|
||||
common::Span<float> s_data {p_data, length};
|
||||
|
||||
foreign_cols[i].data = s_data;
|
||||
foreign_cols[i].valid = RBitField8(s_mask);
|
||||
foreign_cols[i].size = s_data.size();
|
||||
foreign_cols[i].null_count = null_count;
|
||||
}
|
||||
|
||||
info.num_col_ = n_columns;
|
||||
info.num_row_ = foreign_cols[0].size;
|
||||
for (size_t i = 0; i < n_columns; ++i) {
|
||||
CHECK_EQ(foreign_cols[0].size, foreign_cols[i].size);
|
||||
info.num_nonzero_ += foreign_cols[i].data.size() - foreign_cols[i].null_count;
|
||||
}
|
||||
|
||||
this->FromDeviceColumnar(foreign_cols);
|
||||
}
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
void SimpleCSRSource::FromDeviceColumnar(std::vector<Columnar> cols) {
|
||||
LOG(FATAL) << "XGBoost version is not compiled with GPU support";
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
117
src/data/simple_csr_source.cu
Normal file
117
src/data/simple_csr_source.cu
Normal file
@@ -0,0 +1,117 @@
|
||||
/*!
|
||||
* Copyright 2019 by XGBoost Contributors
|
||||
*
|
||||
* \file simple_csr_source.cuh
|
||||
* \brief An extension for the simple CSR source in-memory data structure to accept
|
||||
* foreign columnar.
|
||||
*/
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <thrust/scan.h>
|
||||
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include "simple_csr_source.h"
|
||||
#include "columnar.h"
|
||||
#include "../common/bitfield.h"
|
||||
#include "../common/device_helpers.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
template <size_t kBlockThreads>
|
||||
__global__ void CountValidKernel(common::Span<Columnar const> columns,
|
||||
int32_t const n_rows,
|
||||
common::Span<size_t> offsets) {
|
||||
// One block for a column
|
||||
auto const bid = blockIdx.x;
|
||||
auto const tid = threadIdx.x;
|
||||
if (bid >= columns.size()) {
|
||||
return;
|
||||
}
|
||||
RBitField8 const mask = columns[bid].valid;
|
||||
for (auto r = tid; r < n_rows; r += kBlockThreads) {
|
||||
if (mask.Data() == nullptr || mask.Check(r)) {
|
||||
atomicAdd(reinterpret_cast<BitFieldAtomicType*>(&offsets[r+1]),
|
||||
static_cast<BitFieldAtomicType>(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void CreateCSRKernel(Columnar const column,
|
||||
int32_t colid,
|
||||
common::Span<size_t> offsets,
|
||||
common::Span<Entry> out_data) {
|
||||
auto tid = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
if (column.size <= tid) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (column.valid.Data() == nullptr || column.valid.Check(tid)) {
|
||||
int32_t oid = offsets[tid];
|
||||
out_data[oid].fvalue = column.data[tid];
|
||||
out_data[oid].index = colid;
|
||||
offsets[tid] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
void SimpleCSRSource::FromDeviceColumnar(std::vector<Columnar> cols) {
|
||||
uint64_t const n_cols = cols.size();
|
||||
uint64_t const n_rows = cols[0].size;
|
||||
|
||||
auto ptr = cols[0].data.data();
|
||||
int32_t device = dh::CudaGetPointerDevice(ptr);
|
||||
CHECK_NE(device, -1);
|
||||
|
||||
for (int32_t i = 1; i < n_cols; ++i) {
|
||||
auto ptr = cols[i].data.data();
|
||||
int32_t ptr_device = dh::CudaGetPointerDevice(ptr);
|
||||
CHECK_EQ(device, ptr_device)
|
||||
<< "GPU ID at 0^th column: " << device << ", "
|
||||
<< "GPU ID at column " << i << ": " << ptr_device;
|
||||
}
|
||||
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
|
||||
GPUSet devices = GPUSet::Range(device, 1);
|
||||
|
||||
page_.offset.Reshard(GPUDistribution(devices));
|
||||
page_.offset.Resize(info.num_row_ + 1);
|
||||
|
||||
page_.data.Reshard(GPUDistribution(devices));
|
||||
page_.data.Resize(info.num_nonzero_);
|
||||
|
||||
auto s_data = page_.data.DeviceSpan(device);
|
||||
auto s_offsets = page_.offset.DeviceSpan(device);
|
||||
CHECK_EQ(s_offsets.size(), n_rows + 1);
|
||||
|
||||
int32_t constexpr kThreads = 256;
|
||||
dh::device_vector<Columnar> d_cols(cols);
|
||||
auto s_d_cols = dh::ToSpan(d_cols);
|
||||
|
||||
dh::safe_cuda(cudaMemset(s_offsets.data(), 0, sizeof(int32_t) * (n_rows + 1)));
|
||||
|
||||
CountValidKernel<kThreads><<<n_cols, kThreads>>>(s_d_cols, n_rows, s_offsets);
|
||||
|
||||
thrust::device_ptr<size_t> p_offsets(s_offsets.data());
|
||||
CHECK_GE(s_offsets.size(), n_rows + 1);
|
||||
|
||||
thrust::inclusive_scan(p_offsets, p_offsets + n_rows + 1, p_offsets);
|
||||
// Created for building csr matrix, where we need to change index
|
||||
// after processing each column.
|
||||
dh::device_vector<size_t> tmp_offset(page_.offset.Size());
|
||||
thrust::copy(p_offsets, p_offsets + n_rows + 1, tmp_offset.begin());
|
||||
|
||||
int32_t kBlocks = common::DivRoundUp(n_rows, kThreads);
|
||||
|
||||
for (size_t col = 0; col < n_cols; ++col) {
|
||||
CreateCSRKernel<<<kBlocks, kThreads>>>(d_cols[col], col, dh::ToSpan(tmp_offset), s_data);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
@@ -10,9 +10,12 @@
|
||||
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "columnar.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
@@ -27,7 +30,6 @@ namespace data {
|
||||
*/
|
||||
class SimpleCSRSource : public DataSource<SparsePage> {
|
||||
public:
|
||||
// public data members
|
||||
// MetaInfo info; // inheritated from DataSource
|
||||
SparsePage page_;
|
||||
/*! \brief default constructor */
|
||||
@@ -47,6 +49,11 @@ class SimpleCSRSource : public DataSource<SparsePage> {
|
||||
* \param info The additional information reflected in the parser.
|
||||
*/
|
||||
void CopyFrom(dmlc::Parser<uint32_t>* src);
|
||||
/*!
|
||||
* \brief copy content of data from foreign **GPU** columnar buffer.
|
||||
* \param interfaces_str JSON representation of cuda array interfaces.
|
||||
*/
|
||||
void CopyFrom(std::string const& cuda_interfaces_str);
|
||||
/*!
|
||||
* \brief Load data from binary stream.
|
||||
* \param fi the pointer to load data from.
|
||||
@@ -67,6 +74,11 @@ class SimpleCSRSource : public DataSource<SparsePage> {
|
||||
static const int kMagic = 0xffffab01;
|
||||
|
||||
private:
|
||||
/*!
|
||||
* \brief copy content of data from foreign GPU columnar buffer.
|
||||
* \param cols foreign columns data buffer.
|
||||
*/
|
||||
void FromDeviceColumnar(std::vector<Columnar> cols);
|
||||
/*! \brief internal variable, used to support iterator interface */
|
||||
bool at_first_{true};
|
||||
};
|
||||
|
||||
@@ -22,9 +22,6 @@
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
BitField::value_type constexpr BitField::kValueSize;
|
||||
BitField::value_type constexpr BitField::kOne;
|
||||
|
||||
size_t FeatureInteractionConstraint::Features() const {
|
||||
return d_sets_ptr_.size() - 1;
|
||||
}
|
||||
@@ -51,7 +48,7 @@ void FeatureInteractionConstraint::Configure(
|
||||
}
|
||||
n_sets_ = h_feature_constraints.size();
|
||||
|
||||
size_t const n_feat_storage = BitField::ComputeStorageSize(n_features);
|
||||
size_t const n_feat_storage = LBitField64::ComputeStorageSize(n_features);
|
||||
if (n_feat_storage == 0 && n_features != 0) {
|
||||
LOG(FATAL) << "Wrong storage size, n_features: " << n_features;
|
||||
}
|
||||
@@ -70,13 +67,13 @@ void FeatureInteractionConstraint::Configure(
|
||||
node_constraints_.resize(n_nodes);
|
||||
node_constraints_storage_.resize(n_nodes);
|
||||
for (auto& n : node_constraints_storage_) {
|
||||
n.resize(BitField::ComputeStorageSize(n_features));
|
||||
n.resize(LBitField64::ComputeStorageSize(n_features));
|
||||
}
|
||||
for (size_t i = 0; i < node_constraints_storage_.size(); ++i) {
|
||||
auto span = dh::ToSpan(node_constraints_storage_[i]);
|
||||
node_constraints_[i] = BitField(span);
|
||||
node_constraints_[i] = LBitField64(span);
|
||||
}
|
||||
s_node_constraints_ = common::Span<BitField>(node_constraints_.data(),
|
||||
s_node_constraints_ = common::Span<LBitField64>(node_constraints_.data(),
|
||||
node_constraints_.size());
|
||||
|
||||
// Represent constraints as CSR format, flatten is the value vector,
|
||||
@@ -131,14 +128,14 @@ void FeatureInteractionConstraint::Configure(
|
||||
s_sets_ = dh::ToSpan(d_sets_);
|
||||
s_sets_ptr_ = dh::ToSpan(d_sets_ptr_);
|
||||
|
||||
d_feature_buffer_storage_.resize(BitField::ComputeStorageSize(n_features));
|
||||
d_feature_buffer_storage_.resize(LBitField64::ComputeStorageSize(n_features));
|
||||
feature_buffer_ = dh::ToSpan(d_feature_buffer_storage_);
|
||||
|
||||
// --- Initialize result buffers.
|
||||
output_buffer_bits_storage_.resize(BitField::ComputeStorageSize(n_features));
|
||||
output_buffer_bits_ = BitField(dh::ToSpan(output_buffer_bits_storage_));
|
||||
input_buffer_bits_storage_.resize(BitField::ComputeStorageSize(n_features));
|
||||
input_buffer_bits_ = BitField(dh::ToSpan(input_buffer_bits_storage_));
|
||||
output_buffer_bits_storage_.resize(LBitField64::ComputeStorageSize(n_features));
|
||||
output_buffer_bits_ = LBitField64(dh::ToSpan(output_buffer_bits_storage_));
|
||||
input_buffer_bits_storage_.resize(LBitField64::ComputeStorageSize(n_features));
|
||||
input_buffer_bits_ = LBitField64(dh::ToSpan(input_buffer_bits_storage_));
|
||||
result_buffer_.resize(n_features);
|
||||
s_result_buffer_ = dh::ToSpan(result_buffer_);
|
||||
}
|
||||
@@ -156,7 +153,7 @@ void FeatureInteractionConstraint::Reset() {
|
||||
}
|
||||
|
||||
__global__ void ClearBuffersKernel(
|
||||
BitField result_buffer_output, BitField result_buffer_input) {
|
||||
LBitField64 result_buffer_output, LBitField64 result_buffer_input) {
|
||||
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (tid < result_buffer_output.Size()) {
|
||||
result_buffer_output.Clear(tid);
|
||||
@@ -185,7 +182,7 @@ common::Span<int32_t> FeatureInteractionConstraint::QueryNode(int32_t node_id) {
|
||||
thrust::counting_iterator<int32_t> begin(0);
|
||||
thrust::counting_iterator<int32_t> end(result_buffer_.size());
|
||||
auto p_result_buffer = result_buffer_.data();
|
||||
BitField node_constraints = s_node_constraints_[node_id];
|
||||
LBitField64 node_constraints = s_node_constraints_[node_id];
|
||||
|
||||
thrust::device_ptr<int32_t> const out_end = thrust::copy_if(
|
||||
thrust::device,
|
||||
@@ -201,16 +198,16 @@ common::Span<int32_t> FeatureInteractionConstraint::QueryNode(int32_t node_id) {
|
||||
}
|
||||
|
||||
__global__ void SetInputBufferKernel(common::Span<int32_t> feature_list_input,
|
||||
BitField result_buffer_input) {
|
||||
LBitField64 result_buffer_input) {
|
||||
uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (tid < feature_list_input.size()) {
|
||||
result_buffer_input.Set(feature_list_input[tid]);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void QueryFeatureListKernel(BitField node_constraints,
|
||||
BitField result_buffer_input,
|
||||
BitField result_buffer_output) {
|
||||
__global__ void QueryFeatureListKernel(LBitField64 node_constraints,
|
||||
LBitField64 result_buffer_input,
|
||||
LBitField64 result_buffer_output) {
|
||||
result_buffer_output |= node_constraints;
|
||||
result_buffer_output &= result_buffer_input;
|
||||
}
|
||||
@@ -223,7 +220,7 @@ common::Span<int32_t> FeatureInteractionConstraint::Query(
|
||||
|
||||
ClearBuffers();
|
||||
|
||||
BitField node_constraints = s_node_constraints_[nid];
|
||||
LBitField64 node_constraints = s_node_constraints_[nid];
|
||||
CHECK_EQ(input_buffer_bits_.Size(), output_buffer_bits_.Size());
|
||||
|
||||
int constexpr kBlockThreads = 256;
|
||||
@@ -237,7 +234,7 @@ common::Span<int32_t> FeatureInteractionConstraint::Query(
|
||||
thrust::counting_iterator<int32_t> begin(0);
|
||||
thrust::counting_iterator<int32_t> end(result_buffer_.size());
|
||||
|
||||
BitField local_result_buffer = output_buffer_bits_;
|
||||
LBitField64 local_result_buffer = output_buffer_bits_;
|
||||
|
||||
thrust::device_ptr<int32_t> const out_end = thrust::copy_if(
|
||||
thrust::device,
|
||||
@@ -257,7 +254,7 @@ common::Span<int32_t> FeatureInteractionConstraint::Query(
|
||||
// Find interaction sets for each feature, then store all features in
|
||||
// those sets in a buffer.
|
||||
__global__ void RestoreFeatureListFromSetsKernel(
|
||||
BitField feature_buffer,
|
||||
LBitField64 feature_buffer,
|
||||
|
||||
int32_t fid,
|
||||
common::Span<int32_t> feature_interactions,
|
||||
@@ -283,11 +280,11 @@ __global__ void RestoreFeatureListFromSetsKernel(
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void InteractionConstraintSplitKernel(BitField feature,
|
||||
__global__ void InteractionConstraintSplitKernel(LBitField64 feature,
|
||||
int32_t feature_id,
|
||||
BitField node,
|
||||
BitField left,
|
||||
BitField right) {
|
||||
LBitField64 node,
|
||||
LBitField64 left,
|
||||
LBitField64 right) {
|
||||
auto tid = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
if (tid > node.Size()) {
|
||||
return;
|
||||
@@ -324,9 +321,9 @@ void FeatureInteractionConstraint::Split(
|
||||
CHECK_LT(right_id, s_node_constraints_.size());
|
||||
CHECK_NE(s_node_constraints_.size(), 0);
|
||||
|
||||
BitField node = s_node_constraints_[node_id];
|
||||
BitField left = s_node_constraints_[left_id];
|
||||
BitField right = s_node_constraints_[right_id];
|
||||
LBitField64 node = s_node_constraints_[node_id];
|
||||
LBitField64 left = s_node_constraints_[left_id];
|
||||
LBitField64 right = s_node_constraints_[right_id];
|
||||
|
||||
dim3 const block3(16, 64, 1);
|
||||
dim3 const grid3(common::DivRoundUp(n_sets_, 16),
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
#include "param.h"
|
||||
#include "../common/span.h"
|
||||
#include "../common/bitfield.cuh"
|
||||
#include "../common/bitfield.h"
|
||||
#include "../common/device_helpers.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -115,25 +115,25 @@ struct FeatureInteractionConstraint {
|
||||
|
||||
// Allowed features attached to each node, have n_nodes bitfields,
|
||||
// each of size n_features.
|
||||
std::vector<dh::device_vector<BitField::value_type>> node_constraints_storage_;
|
||||
std::vector<BitField> node_constraints_;
|
||||
common::Span<BitField> s_node_constraints_;
|
||||
std::vector<dh::device_vector<LBitField64::value_type>> node_constraints_storage_;
|
||||
std::vector<LBitField64> node_constraints_;
|
||||
common::Span<LBitField64> s_node_constraints_;
|
||||
|
||||
// buffer storing return feature list from Query, of size n_features.
|
||||
dh::device_vector<int32_t> result_buffer_;
|
||||
common::Span<int32_t> s_result_buffer_;
|
||||
|
||||
// Temp buffers, one bit for each possible feature.
|
||||
dh::device_vector<BitField::value_type> output_buffer_bits_storage_;
|
||||
BitField output_buffer_bits_;
|
||||
dh::device_vector<BitField::value_type> input_buffer_bits_storage_;
|
||||
BitField input_buffer_bits_;
|
||||
dh::device_vector<LBitField64::value_type> output_buffer_bits_storage_;
|
||||
LBitField64 output_buffer_bits_;
|
||||
dh::device_vector<LBitField64::value_type> input_buffer_bits_storage_;
|
||||
LBitField64 input_buffer_bits_;
|
||||
/*
|
||||
* Combined features from all interaction sets that one feature belongs to.
|
||||
* For an input with [[0, 1], [1, 2]], the feature 1 belongs to sets {0, 1}
|
||||
*/
|
||||
dh::device_vector<BitField::value_type> d_feature_buffer_storage_;
|
||||
BitField feature_buffer_; // of Size n features.
|
||||
dh::device_vector<LBitField64::value_type> d_feature_buffer_storage_;
|
||||
LBitField64 feature_buffer_; // of Size n features.
|
||||
|
||||
// Clear out all temp buffers except for `feature_buffer_', which is
|
||||
// handled in `Split'.
|
||||
|
||||
Reference in New Issue
Block a user