Support dmatrix construction from cupy array (#5206)
This commit is contained in:
@@ -206,12 +206,24 @@ int XGDMatrixCreateFromDataIter(
|
||||
}
|
||||
|
||||
#ifndef XGBOOST_USE_CUDA
|
||||
XGB_DLL int XGDMatrixCreateFromArrayInterfaces(
|
||||
char const* c_json_strs, bst_int has_missing, bst_float missing, DMatrixHandle* out) {
|
||||
XGB_DLL int XGDMatrixCreateFromArrayInterfaceColumns(char const* c_json_strs,
|
||||
bst_float missing,
|
||||
int nthread,
|
||||
DMatrixHandle* out) {
|
||||
API_BEGIN();
|
||||
LOG(FATAL) << "Xgboost not compiled with cuda";
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs,
|
||||
bst_float missing,
|
||||
int nthread,
|
||||
DMatrixHandle* out) {
|
||||
API_BEGIN();
|
||||
LOG(FATAL) << "Xgboost not compiled with cuda";
|
||||
API_END();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr,
|
||||
|
||||
@@ -7,14 +7,27 @@
|
||||
#include "../data/device_adapter.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
XGB_DLL int XGDMatrixCreateFromArrayInterfaces(char const* c_json_strs,
|
||||
bst_int has_missing,
|
||||
bst_float missing,
|
||||
DMatrixHandle* out) {
|
||||
XGB_DLL int XGDMatrixCreateFromArrayInterfaceColumns(char const* c_json_strs,
|
||||
bst_float missing,
|
||||
int nthread,
|
||||
DMatrixHandle* out) {
|
||||
API_BEGIN();
|
||||
std::string json_str{c_json_strs};
|
||||
data::CudfAdapter adapter(json_str);
|
||||
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, 1));
|
||||
*out =
|
||||
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs,
|
||||
bst_float missing, int nthread,
|
||||
DMatrixHandle* out) {
|
||||
API_BEGIN();
|
||||
std::string json_str{c_json_strs};
|
||||
data::CupyAdapter adapter(json_str);
|
||||
*out =
|
||||
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));
|
||||
API_END();
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
/*!
|
||||
* Copyright 2019 by Contributors
|
||||
* \file columnar.h
|
||||
* \file array_interface.h
|
||||
* \brief Basic structure holding a reference to arrow columnar data format.
|
||||
*/
|
||||
#ifndef XGBOOST_DATA_COLUMNAR_H_
|
||||
#define XGBOOST_DATA_COLUMNAR_H_
|
||||
#ifndef XGBOOST_DATA_ARRAY_INTERFACE_H_
|
||||
#define XGBOOST_DATA_ARRAY_INTERFACE_H_
|
||||
|
||||
#include <cinttypes>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/json.h"
|
||||
@@ -18,7 +19,7 @@
|
||||
|
||||
namespace xgboost {
|
||||
// Common errors in parsing columnar format.
|
||||
struct ColumnarErrors {
|
||||
struct ArrayInterfaceErrors {
|
||||
static char const* Contigious() {
|
||||
return "Memory should be contigious.";
|
||||
}
|
||||
@@ -119,15 +120,12 @@ class ArrayInterfaceHandler {
|
||||
if (array.find("version") == array.cend()) {
|
||||
LOG(FATAL) << "Missing `version' field for array interface";
|
||||
}
|
||||
auto version = get<Integer const>(array.at("version"));
|
||||
CHECK_EQ(version, 1) << ColumnarErrors::Version();
|
||||
|
||||
if (array.find("typestr") == array.cend()) {
|
||||
LOG(FATAL) << "Missing `typestr' field for array interface";
|
||||
}
|
||||
auto typestr = get<String const>(array.at("typestr"));
|
||||
CHECK_EQ(typestr.size(), 3) << ColumnarErrors::TypestrFormat();
|
||||
CHECK_NE(typestr.front(), '>') << ColumnarErrors::BigEndian();
|
||||
CHECK_EQ(typestr.size(), 3) << ArrayInterfaceErrors::TypestrFormat();
|
||||
CHECK_NE(typestr.front(), '>') << ArrayInterfaceErrors::BigEndian();
|
||||
|
||||
if (array.find("shape") == array.cend()) {
|
||||
LOG(FATAL) << "Missing `shape' field for array interface";
|
||||
@@ -149,7 +147,7 @@ class ArrayInterfaceHandler {
|
||||
auto p_mask = GetPtrFromArrayData<RBitField8::value_type*>(j_mask);
|
||||
|
||||
auto j_shape = get<Array const>(j_mask.at("shape"));
|
||||
CHECK_EQ(j_shape.size(), 1) << ColumnarErrors::Dimension(1);
|
||||
CHECK_EQ(j_shape.size(), 1) << ArrayInterfaceErrors::Dimension(1);
|
||||
auto typestr = get<String const>(j_mask.at("typestr"));
|
||||
// For now this is just 1, we can support different size of interger in mask.
|
||||
int64_t const type_length = typestr.at(2) - 48;
|
||||
@@ -178,8 +176,8 @@ class ArrayInterfaceHandler {
|
||||
|
||||
if (j_mask.find("strides") != j_mask.cend()) {
|
||||
auto strides = get<Array const>(column.at("strides"));
|
||||
CHECK_EQ(strides.size(), 1) << ColumnarErrors::Dimension(1);
|
||||
CHECK_EQ(get<Integer>(strides.at(0)), type_length) << ColumnarErrors::Contigious();
|
||||
CHECK_EQ(strides.size(), 1) << ArrayInterfaceErrors::Dimension(1);
|
||||
CHECK_EQ(get<Integer>(strides.at(0)), type_length) << ArrayInterfaceErrors::Contigious();
|
||||
}
|
||||
|
||||
s_mask = {p_mask, span_size};
|
||||
@@ -188,18 +186,28 @@ class ArrayInterfaceHandler {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t ExtractLength(std::map<std::string, Json> const& column) {
|
||||
static std::pair<size_t, size_t> ExtractShape(
|
||||
std::map<std::string, Json> const& column) {
|
||||
auto j_shape = get<Array const>(column.at("shape"));
|
||||
CHECK_EQ(j_shape.size(), 1) << ColumnarErrors::Dimension(1);
|
||||
auto typestr = get<String const>(column.at("typestr"));
|
||||
if (column.find("strides") != column.cend()) {
|
||||
auto strides = get<Array const>(column.at("strides"));
|
||||
CHECK_EQ(strides.size(), 1) << ColumnarErrors::Dimension(1);
|
||||
CHECK_EQ(get<Integer>(strides.at(0)), typestr.at(2) - '0')
|
||||
<< ColumnarErrors::Contigious();
|
||||
if (!IsA<Null>(column.at("strides"))) {
|
||||
auto strides = get<Array const>(column.at("strides"));
|
||||
CHECK_EQ(strides.size(), j_shape.size())
|
||||
<< ArrayInterfaceErrors::Dimension(1);
|
||||
CHECK_EQ(get<Integer>(strides.at(0)), typestr.at(2) - '0')
|
||||
<< ArrayInterfaceErrors::Contigious();
|
||||
}
|
||||
}
|
||||
|
||||
return static_cast<size_t>(get<Integer const>(j_shape.at(0)));
|
||||
if (j_shape.size() == 1) {
|
||||
return {static_cast<size_t>(get<Integer const>(j_shape.at(0))), 1};
|
||||
} else {
|
||||
CHECK_EQ(j_shape.size(), 2)
|
||||
<< "Only 1D or 2-D arrays currently supported.";
|
||||
return {static_cast<size_t>(get<Integer const>(j_shape.at(0))),
|
||||
static_cast<size_t>(get<Integer const>(j_shape.at(1)))};
|
||||
}
|
||||
}
|
||||
template <typename T>
|
||||
static common::Span<T> ExtractData(std::map<std::string, Json> const& column) {
|
||||
@@ -212,25 +220,27 @@ class ArrayInterfaceHandler {
|
||||
<< "Input data type and typestr mismatch. typestr: " << typestr;
|
||||
|
||||
|
||||
auto length = ExtractLength(column);
|
||||
auto shape = ExtractShape(column);
|
||||
|
||||
T* p_data = ArrayInterfaceHandler::GetPtrFromArrayData<T*>(column);
|
||||
return common::Span<T>{p_data, length};
|
||||
return common::Span<T>{p_data, shape.first * shape.second};
|
||||
}
|
||||
};
|
||||
|
||||
// A view over __array_interface__
|
||||
class Columnar {
|
||||
class ArrayInterface {
|
||||
using mask_type = unsigned char;
|
||||
using index_type = int32_t;
|
||||
|
||||
public:
|
||||
Columnar() = default;
|
||||
explicit Columnar(std::map<std::string, Json> const& column) {
|
||||
ArrayInterface() = default;
|
||||
explicit ArrayInterface(std::map<std::string, Json> const& column) {
|
||||
ArrayInterfaceHandler::Validate(column);
|
||||
data = ArrayInterfaceHandler::GetPtrFromArrayData<void*>(column);
|
||||
CHECK(data) << "Column is null";
|
||||
size = ArrayInterfaceHandler::ExtractLength(column);
|
||||
auto shape = ArrayInterfaceHandler::ExtractShape(column);
|
||||
num_rows = shape.first;
|
||||
num_cols = shape.second;
|
||||
|
||||
common::Span<RBitField8::value_type> s_mask;
|
||||
size_t n_bits = ArrayInterfaceHandler::ExtractMask(column, &s_mask);
|
||||
@@ -238,7 +248,7 @@ class Columnar {
|
||||
valid = RBitField8(s_mask);
|
||||
|
||||
if (s_mask.data()) {
|
||||
CHECK_EQ(n_bits, size)
|
||||
CHECK_EQ(n_bits, num_rows)
|
||||
<< "Shape of bit mask doesn't match data shape. "
|
||||
<< "XGBoost doesn't support internal broadcasting.";
|
||||
}
|
||||
@@ -271,7 +281,7 @@ class Columnar {
|
||||
} else if (type[1] == 'u' && type[2] == '8') {
|
||||
return;
|
||||
} else {
|
||||
LOG(FATAL) << ColumnarErrors::UnSupportedType(type);
|
||||
LOG(FATAL) << ArrayInterfaceErrors::UnSupportedType(type);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -304,10 +314,11 @@ class Columnar {
|
||||
}
|
||||
|
||||
RBitField8 valid;
|
||||
int32_t size;
|
||||
int32_t num_rows;
|
||||
int32_t num_cols;
|
||||
void* data;
|
||||
char type[3];
|
||||
};
|
||||
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_DATA_COLUMNAR_H_
|
||||
#endif // XGBOOST_DATA_ARRAY_INTERFACE_H_
|
||||
@@ -7,14 +7,14 @@
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "columnar.h"
|
||||
#include "array_interface.h"
|
||||
#include "../common/device_helpers.cuh"
|
||||
#include "device_adapter.cuh"
|
||||
#include "simple_dmatrix.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
void CopyInfoImpl(std::map<std::string, Json> const& column, HostDeviceVector<float>* out) {
|
||||
void CopyInfoImpl(ArrayInterface column, HostDeviceVector<float>* out) {
|
||||
auto SetDeviceToPtr = [](void* ptr) {
|
||||
cudaPointerAttributes attr;
|
||||
dh::safe_cuda(cudaPointerGetAttributes(&attr, ptr));
|
||||
@@ -22,43 +22,42 @@ void CopyInfoImpl(std::map<std::string, Json> const& column, HostDeviceVector<fl
|
||||
dh::safe_cuda(cudaSetDevice(ptr_device));
|
||||
return ptr_device;
|
||||
};
|
||||
Columnar foreign_column(column);
|
||||
auto ptr_device = SetDeviceToPtr(foreign_column.data);
|
||||
auto ptr_device = SetDeviceToPtr(column.data);
|
||||
|
||||
out->SetDevice(ptr_device);
|
||||
out->Resize(foreign_column.size);
|
||||
out->Resize(column.num_rows);
|
||||
|
||||
auto p_dst = thrust::device_pointer_cast(out->DevicePointer());
|
||||
|
||||
dh::LaunchN(ptr_device, foreign_column.size, [=] __device__(size_t idx) {
|
||||
p_dst[idx] = foreign_column.GetElement(idx);
|
||||
dh::LaunchN(ptr_device, column.num_rows, [=] __device__(size_t idx) {
|
||||
p_dst[idx] = column.GetElement(idx);
|
||||
});
|
||||
}
|
||||
|
||||
void MetaInfo::SetInfo(const char * c_key, std::string const& interface_str) {
|
||||
Json j_interface = Json::Load({interface_str.c_str(), interface_str.size()});
|
||||
auto const& j_arr = get<Array>(j_interface);
|
||||
CHECK_EQ(j_arr.size(), 1) << "MetaInfo: " << c_key << ". " << ColumnarErrors::Dimension(1);;
|
||||
auto const& j_arr_obj = get<Object const>(j_arr[0]);
|
||||
std::string key {c_key};
|
||||
ArrayInterfaceHandler::Validate(j_arr_obj);
|
||||
if (j_arr_obj.find("mask") != j_arr_obj.cend()) {
|
||||
LOG(FATAL) << "Meta info " << key << " should be dense, found validity mask";
|
||||
}
|
||||
auto const& typestr = get<String const>(j_arr_obj.at("typestr"));
|
||||
CHECK_EQ(j_arr.size(), 1)
|
||||
<< "MetaInfo: " << c_key << ". " << ArrayInterfaceErrors::Dimension(1);
|
||||
ArrayInterface array_interface(get<Object const>(j_arr[0]));
|
||||
std::string key{c_key};
|
||||
CHECK(!array_interface.valid.Data())
|
||||
<< "Meta info " << key << " should be dense, found validity mask";
|
||||
CHECK_EQ(array_interface.num_cols, 1)
|
||||
<< "Meta info should be a single column.";
|
||||
|
||||
if (key == "label") {
|
||||
CopyInfoImpl(j_arr_obj, &labels_);
|
||||
CopyInfoImpl(array_interface, &labels_);
|
||||
} else if (key == "weight") {
|
||||
CopyInfoImpl(j_arr_obj, &weights_);
|
||||
CopyInfoImpl(array_interface, &weights_);
|
||||
} else if (key == "base_margin") {
|
||||
CopyInfoImpl(j_arr_obj, &base_margin_);
|
||||
CopyInfoImpl(array_interface, &base_margin_);
|
||||
} else if (key == "group") {
|
||||
// Ranking is not performed on device.
|
||||
auto s_data = ArrayInterfaceHandler::ExtractData<uint32_t>(j_arr_obj);
|
||||
thrust::device_ptr<uint32_t> p_src {s_data.data()};
|
||||
thrust::device_ptr<uint32_t> p_src{
|
||||
reinterpret_cast<uint32_t*>(array_interface.data)};
|
||||
|
||||
auto length = s_data.size();
|
||||
auto length = array_interface.num_rows;
|
||||
group_ptr_.resize(length + 1);
|
||||
group_ptr_[0] = 0;
|
||||
thrust::copy(p_src, p_src + length, group_ptr_.begin() + 1);
|
||||
@@ -82,4 +81,7 @@ DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread,
|
||||
template DMatrix* DMatrix::Create<data::CudfAdapter>(
|
||||
data::CudfAdapter* adapter, float missing, int nthread,
|
||||
const std::string& cache_prefix, size_t page_size);
|
||||
template DMatrix* DMatrix::Create<data::CupyAdapter>(
|
||||
data::CupyAdapter* adapter, float missing, int nthread,
|
||||
const std::string& cache_prefix, size_t page_size);
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -7,9 +7,9 @@
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "columnar.h"
|
||||
#include "adapter.h"
|
||||
#include "../common/device_helpers.cuh"
|
||||
#include "adapter.h"
|
||||
#include "array_interface.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
@@ -17,12 +17,13 @@ namespace data {
|
||||
class CudfAdapterBatch : public detail::NoMetaInfo {
|
||||
public:
|
||||
CudfAdapterBatch() = default;
|
||||
CudfAdapterBatch(common::Span<Columnar> columns,
|
||||
CudfAdapterBatch(common::Span<ArrayInterface> columns,
|
||||
common::Span<size_t> column_ptr, size_t num_elements)
|
||||
: columns_(columns),column_ptr_(column_ptr), num_elements(num_elements) {}
|
||||
size_t Size()const { return num_elements; }
|
||||
__device__ COOTuple GetElement(size_t idx)const
|
||||
{
|
||||
: columns_(columns),
|
||||
column_ptr_(column_ptr),
|
||||
num_elements(num_elements) {}
|
||||
size_t Size() const { return num_elements; }
|
||||
__device__ COOTuple GetElement(size_t idx) const {
|
||||
size_t column_idx =
|
||||
dh::UpperBound(column_ptr_.data(), column_ptr_.size(), idx) - 1;
|
||||
auto& column = columns_[column_idx];
|
||||
@@ -34,22 +35,23 @@ class CudfAdapterBatch : public detail::NoMetaInfo {
|
||||
}
|
||||
|
||||
private:
|
||||
common::Span<Columnar> columns_;
|
||||
common::Span<ArrayInterface> columns_;
|
||||
common::Span<size_t> column_ptr_;
|
||||
size_t num_elements;
|
||||
};
|
||||
|
||||
/*!
|
||||
* Please be careful that, in official specification, the only three required fields are
|
||||
* `shape', `version' and `typestr'. Any other is optional, including `data'. But here
|
||||
* we have one additional requirements for input data:
|
||||
* Please be careful that, in official specification, the only three required
|
||||
* fields are `shape', `version' and `typestr'. Any other is optional,
|
||||
* including `data'. But here we have one additional requirements for input
|
||||
* data:
|
||||
*
|
||||
* - `data' field is required, passing in an empty dataset is not accepted, as most (if
|
||||
* not all) of our algorithms don't have test for empty dataset. An error is better
|
||||
* than a crash.
|
||||
* - `data' field is required, passing in an empty dataset is not accepted, as
|
||||
* most (if not all) of our algorithms don't have test for empty dataset. An
|
||||
* error is better than a crash.
|
||||
*
|
||||
* What if invalid value from dataframe is 0 but I specify missing=NaN in XGBoost? Since
|
||||
* validity mask is ignored, all 0s are preserved in XGBoost.
|
||||
* What if invalid value from dataframe is 0 but I specify missing=NaN in
|
||||
* XGBoost? Since validity mask is ignored, all 0s are preserved in XGBoost.
|
||||
*
|
||||
* FIXME(trivialfis): Put above into document after we have a consistent way for
|
||||
* processing input data.
|
||||
@@ -96,23 +98,23 @@ class CudfAdapter : public detail::SingleBatchDataIter<CudfAdapterBatch> {
|
||||
CHECK_GT(n_columns, 0) << "Number of columns must not equal to 0.";
|
||||
|
||||
auto const& typestr = get<String const>(json_columns[0]["typestr"]);
|
||||
CHECK_EQ(typestr.size(), 3) << ColumnarErrors::TypestrFormat();
|
||||
CHECK_NE(typestr.front(), '>') << ColumnarErrors::BigEndian();
|
||||
std::vector<Columnar> columns;
|
||||
CHECK_EQ(typestr.size(), 3) << ArrayInterfaceErrors::TypestrFormat();
|
||||
CHECK_NE(typestr.front(), '>') << ArrayInterfaceErrors::BigEndian();
|
||||
std::vector<ArrayInterface> columns;
|
||||
std::vector<size_t> column_ptr({0});
|
||||
auto first_column = Columnar(get<Object const>(json_columns[0]));
|
||||
auto first_column = ArrayInterface(get<Object const>(json_columns[0]));
|
||||
device_idx_ = dh::CudaGetPointerDevice(first_column.data);
|
||||
CHECK_NE(device_idx_, -1);
|
||||
dh::safe_cuda(cudaSetDevice(device_idx_));
|
||||
num_rows_ = first_column.size;
|
||||
num_rows_ = first_column.num_rows;
|
||||
for (auto& json_col : json_columns) {
|
||||
auto column = Columnar(get<Object const>(json_col));
|
||||
auto column = ArrayInterface(get<Object const>(json_col));
|
||||
columns.push_back(column);
|
||||
column_ptr.emplace_back(column_ptr.back() + column.size);
|
||||
num_rows_ = std::max(num_rows_, size_t(column.size));
|
||||
column_ptr.emplace_back(column_ptr.back() + column.num_rows);
|
||||
num_rows_ = std::max(num_rows_, size_t(column.num_rows));
|
||||
CHECK_EQ(device_idx_, dh::CudaGetPointerDevice(column.data))
|
||||
<< "All columns should use the same device.";
|
||||
CHECK_EQ(num_rows_, column.size)
|
||||
CHECK_EQ(num_rows_, column.num_rows)
|
||||
<< "All columns should have same number of rows.";
|
||||
}
|
||||
columns_ = columns;
|
||||
@@ -124,19 +126,65 @@ class CudfAdapter : public detail::SingleBatchDataIter<CudfAdapterBatch> {
|
||||
|
||||
size_t NumRows() const { return num_rows_; }
|
||||
size_t NumColumns() const { return columns_.size(); }
|
||||
size_t DeviceIdx()const {
|
||||
return device_idx_;
|
||||
}
|
||||
size_t DeviceIdx() const { return device_idx_; }
|
||||
|
||||
// Cudf is column major
|
||||
bool IsRowMajor() { return false; }
|
||||
|
||||
private:
|
||||
CudfAdapterBatch batch;
|
||||
dh::device_vector<Columnar> columns_;
|
||||
dh::device_vector<ArrayInterface> columns_;
|
||||
dh::device_vector<size_t> column_ptr_; // Exclusive scan of column sizes
|
||||
size_t num_rows_{0};
|
||||
int device_idx_;
|
||||
};
|
||||
|
||||
class CupyAdapterBatch : public detail::NoMetaInfo {
|
||||
public:
|
||||
CupyAdapterBatch() = default;
|
||||
CupyAdapterBatch(ArrayInterface array_interface)
|
||||
: array_interface_(array_interface) {}
|
||||
size_t Size() const {
|
||||
return array_interface_.num_rows * array_interface_.num_cols;
|
||||
}
|
||||
__device__ COOTuple GetElement(size_t idx) const {
|
||||
size_t column_idx = idx % array_interface_.num_cols;
|
||||
size_t row_idx = idx / array_interface_.num_cols;
|
||||
float value = array_interface_.valid.Data() == nullptr ||
|
||||
array_interface_.valid.Check(row_idx)
|
||||
? array_interface_.GetElement(idx)
|
||||
: std::numeric_limits<float>::quiet_NaN();
|
||||
return COOTuple(row_idx, column_idx, value);
|
||||
}
|
||||
|
||||
private:
|
||||
ArrayInterface array_interface_;
|
||||
};
|
||||
|
||||
class CupyAdapter : public detail::SingleBatchDataIter<CupyAdapterBatch> {
|
||||
public:
|
||||
explicit CupyAdapter(std::string cuda_interface_str) {
|
||||
Json json_array_interface =
|
||||
Json::Load({cuda_interface_str.c_str(), cuda_interface_str.size()});
|
||||
array_interface = ArrayInterface(get<Object const>(json_array_interface));
|
||||
device_idx_ = dh::CudaGetPointerDevice(array_interface.data);
|
||||
CHECK_NE(device_idx_, -1);
|
||||
batch = CupyAdapterBatch(array_interface);
|
||||
}
|
||||
const CupyAdapterBatch& Value() const override { return batch; }
|
||||
|
||||
size_t NumRows() const { return array_interface.num_rows; }
|
||||
size_t NumColumns() const { return array_interface.num_cols; }
|
||||
size_t DeviceIdx() const { return device_idx_; }
|
||||
|
||||
bool IsRowMajor() { return true; }
|
||||
|
||||
private:
|
||||
ArrayInterface array_interface;
|
||||
CupyAdapterBatch batch;
|
||||
int device_idx_;
|
||||
};
|
||||
|
||||
}; // namespace data
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_DATA_DEVICE_ADAPTER_H_
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
#include <xgboost/json.h>
|
||||
|
||||
#include "simple_csr_source.h"
|
||||
#include "columnar.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
@@ -78,6 +78,35 @@ void CopyDataColumnMajor(AdapterT* adapter, common::Span<Entry> data,
|
||||
}
|
||||
}
|
||||
|
||||
struct IsValidFunctor : public thrust::unary_function<Entry, bool> {
|
||||
explicit IsValidFunctor(float missing) : missing(missing) {}
|
||||
|
||||
float missing;
|
||||
__device__ bool operator()(const Entry& x) const {
|
||||
return IsValid(x.fvalue, missing);
|
||||
}
|
||||
};
|
||||
|
||||
// Here the data is already correctly ordered and simply needs to be compacted
|
||||
// to remove missing data
|
||||
template <typename AdapterT>
|
||||
void CopyDataRowMajor(AdapterT* adapter, common::Span<Entry> data,
|
||||
int device_idx, float missing,
|
||||
common::Span<size_t> row_ptr) {
|
||||
auto& batch = adapter->Value();
|
||||
auto transform_f = [=] __device__(size_t idx) {
|
||||
const auto& e = batch.GetElement(idx);
|
||||
return Entry(e.column_idx, e.value);
|
||||
}; // NOLINT
|
||||
auto counting = thrust::make_counting_iterator(0llu);
|
||||
thrust::transform_iterator<decltype(transform_f), decltype(counting), Entry>
|
||||
transform_iter(counting, transform_f);
|
||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||
thrust::copy_if(
|
||||
thrust::cuda::par(alloc), transform_iter, transform_iter + batch.Size(),
|
||||
thrust::device_pointer_cast(data.data()), IsValidFunctor(missing));
|
||||
}
|
||||
|
||||
// Does not currently support metainfo as no on-device data source contains this
|
||||
// Current implementation assumes a single batch. More batches can
|
||||
// be supported in future. Does not currently support inferring row/column size
|
||||
@@ -102,11 +131,14 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
|
||||
mat.info.num_nonzero_ = mat.page_.offset.HostVector().back();
|
||||
mat.page_.data.Resize(mat.info.num_nonzero_);
|
||||
if (adapter->IsRowMajor()) {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
CopyDataRowMajor(adapter, mat.page_.data.DeviceSpan(),
|
||||
adapter->DeviceIdx(), missing, s_offset);
|
||||
} else {
|
||||
CopyDataColumnMajor(adapter, mat.page_.data.DeviceSpan(),
|
||||
adapter->DeviceIdx(), missing, s_offset);
|
||||
}
|
||||
// Sync
|
||||
mat.page_.data.HostVector();
|
||||
|
||||
mat.info.num_col_ = adapter->NumColumns();
|
||||
mat.info.num_row_ = adapter->NumRows();
|
||||
@@ -116,5 +148,7 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
|
||||
|
||||
template SimpleDMatrix::SimpleDMatrix(CudfAdapter* adapter, float missing,
|
||||
int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(CupyAdapter* adapter, float missing,
|
||||
int nthread);
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
Reference in New Issue
Block a user