Implement iterative DMatrix. (#5837)
This commit is contained in:
parent
4d277d750d
commit
1a0801238e
@ -26,39 +26,10 @@
|
||||
// manually define unsigned long
|
||||
typedef uint64_t bst_ulong; // NOLINT(*)
|
||||
|
||||
|
||||
/*! \brief handle to DMatrix */
|
||||
typedef void *DMatrixHandle; // NOLINT(*)
|
||||
/*! \brief handle to Booster */
|
||||
typedef void *BoosterHandle; // NOLINT(*)
|
||||
/*! \brief handle to a data iterator */
|
||||
typedef void *DataIterHandle; // NOLINT(*)
|
||||
/*! \brief handle to a internal data holder. */
|
||||
typedef void *DataHolderHandle; // NOLINT(*)
|
||||
|
||||
/*! \brief Mini batch used in XGBoost Data Iteration */
|
||||
typedef struct { // NOLINT(*)
|
||||
/*! \brief number of rows in the minibatch */
|
||||
size_t size;
|
||||
/* \brief number of columns in the minibatch. */
|
||||
size_t columns;
|
||||
/*! \brief row pointer to the rows in the data */
|
||||
#ifdef __APPLE__
|
||||
/* Necessary as Java on MacOS defines jlong as long int
|
||||
* and gcc defines int64_t as long long int. */
|
||||
long* offset; // NOLINT(*)
|
||||
#else
|
||||
int64_t* offset; // NOLINT(*)
|
||||
#endif // __APPLE__
|
||||
/*! \brief labels of each instance */
|
||||
float* label;
|
||||
/*! \brief weight of each instance, can be NULL */
|
||||
float* weight;
|
||||
/*! \brief feature index */
|
||||
int* index;
|
||||
/*! \brief feature values */
|
||||
float* value;
|
||||
} XGBoostBatchCSR;
|
||||
|
||||
/*!
|
||||
* \brief Return the version of the XGBoost library being currently used.
|
||||
@ -71,29 +42,6 @@ typedef struct { // NOLINT(*)
|
||||
*/
|
||||
XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch);
|
||||
|
||||
/*!
|
||||
* \brief Callback to set the data to handle,
|
||||
* \param handle The handle to the callback.
|
||||
* \param batch The data content to be set.
|
||||
*/
|
||||
XGB_EXTERN_C typedef int XGBCallbackSetData( // NOLINT(*)
|
||||
DataHolderHandle handle, XGBoostBatchCSR batch);
|
||||
|
||||
/*!
|
||||
* \brief The data reading callback function.
|
||||
* The iterator will be able to give subset of batch in the data.
|
||||
*
|
||||
* If there is data, the function will call set_function to set the data.
|
||||
*
|
||||
* \param data_handle The handle to the callback.
|
||||
* \param set_function The batch returned by the iterator
|
||||
* \param set_function_handle The handle to be passed to set function.
|
||||
* \return 0 if we are reaching the end and batch is not returned.
|
||||
*/
|
||||
XGB_EXTERN_C typedef int XGBCallbackDataIterNext( // NOLINT(*)
|
||||
DataIterHandle data_handle, XGBCallbackSetData *set_function,
|
||||
DataHolderHandle set_function_handle);
|
||||
|
||||
/*!
|
||||
* \brief get string message of the last error
|
||||
*
|
||||
@ -126,20 +74,6 @@ XGB_DLL int XGDMatrixCreateFromFile(const char *fname,
|
||||
int silent,
|
||||
DMatrixHandle *out);
|
||||
|
||||
/*!
|
||||
* \brief Create a DMatrix from a data iterator.
|
||||
* \param data_handle The handle to the data.
|
||||
* \param callback The callback to get the data.
|
||||
* \param cache_info Additional information about cache file, can be null.
|
||||
* \param out The created DMatrix
|
||||
* \return 0 when success, -1 when failure happens.
|
||||
*/
|
||||
XGB_DLL int XGDMatrixCreateFromDataIter(
|
||||
DataIterHandle data_handle,
|
||||
XGBCallbackDataIterNext* callback,
|
||||
const char* cache_info,
|
||||
DMatrixHandle *out);
|
||||
|
||||
/*!
|
||||
* \brief create a matrix content from CSR format
|
||||
* \param indptr pointer to row headers
|
||||
@ -221,6 +155,189 @@ XGB_DLL int XGDMatrixCreateFromDT(void** data,
|
||||
bst_ulong ncol,
|
||||
DMatrixHandle* out,
|
||||
int nthread);
|
||||
|
||||
/*
|
||||
* ========================== Begin data callback APIs =========================
|
||||
*
|
||||
* Short notes for data callback
|
||||
*
|
||||
* There are 2 sets of data callbacks for DMatrix. The first one is currently exclusively
|
||||
* used by JVM packages. It uses `XGBoostBatchCSR` to accept batches for CSR formated
|
||||
* input, and concatenate them into 1 final big CSR. The related functions are:
|
||||
*
|
||||
* - XGBCallbackSetData
|
||||
* - XGBCallbackDataIterNext
|
||||
* - XGDMatrixCreateFromDataIter
|
||||
*
|
||||
* Another set is used by Quantile based DMatrix (used by hist algorithm) for reducing
|
||||
* memory usage. Currently only GPU implementation is available. It accept foreign data
|
||||
* iterators as callbacks and works similar to external memory. For GPU Hist, the data is
|
||||
* first compressed by quantile sketching then merged. This is particular useful for
|
||||
* distributed setting as it eliminates 2 copies of data. 1 by a `concat` from external
|
||||
* library to make the data into a blob for normal DMatrix initialization, another by the
|
||||
* internal CSR copy of DMatrix. Related functions are:
|
||||
*
|
||||
* - XGProxyDMatrixCreate
|
||||
* - XGDMatrixCallbackNext
|
||||
* - DataIterResetCallback
|
||||
* - XGDeviceQuantileDMatrixSetDataCudaArrayInterface
|
||||
* - XGDeviceQuantileDMatrixSetDataCudaColumnar
|
||||
* - ... (data setters)
|
||||
*/
|
||||
|
||||
/* ==== First set of callback functions, used exclusively by JVM packages. ==== */
|
||||
|
||||
/*! \brief handle to a external data iterator */
|
||||
typedef void *DataIterHandle; // NOLINT(*)
|
||||
/*! \brief handle to a internal data holder. */
|
||||
typedef void *DataHolderHandle; // NOLINT(*)
|
||||
|
||||
|
||||
/*! \brief Mini batch used in XGBoost Data Iteration */
|
||||
typedef struct { // NOLINT(*)
|
||||
/*! \brief number of rows in the minibatch */
|
||||
size_t size;
|
||||
/* \brief number of columns in the minibatch. */
|
||||
size_t columns;
|
||||
/*! \brief row pointer to the rows in the data */
|
||||
#ifdef __APPLE__
|
||||
/* Necessary as Java on MacOS defines jlong as long int
|
||||
* and gcc defines int64_t as long long int. */
|
||||
long* offset; // NOLINT(*)
|
||||
#else
|
||||
int64_t* offset; // NOLINT(*)
|
||||
#endif // __APPLE__
|
||||
/*! \brief labels of each instance */
|
||||
float* label;
|
||||
/*! \brief weight of each instance, can be NULL */
|
||||
float* weight;
|
||||
/*! \brief feature index */
|
||||
int* index;
|
||||
/*! \brief feature values */
|
||||
float* value;
|
||||
} XGBoostBatchCSR;
|
||||
|
||||
/*!
|
||||
* \brief Callback to set the data to handle,
|
||||
* \param handle The handle to the callback.
|
||||
* \param batch The data content to be set.
|
||||
*/
|
||||
XGB_EXTERN_C typedef int XGBCallbackSetData( // NOLINT(*)
|
||||
DataHolderHandle handle, XGBoostBatchCSR batch);
|
||||
|
||||
/*!
|
||||
* \brief The data reading callback function.
|
||||
* The iterator will be able to give subset of batch in the data.
|
||||
*
|
||||
* If there is data, the function will call set_function to set the data.
|
||||
*
|
||||
* \param data_handle The handle to the callback.
|
||||
* \param set_function The batch returned by the iterator
|
||||
* \param set_function_handle The handle to be passed to set function.
|
||||
* \return 0 if we are reaching the end and batch is not returned.
|
||||
*/
|
||||
XGB_EXTERN_C typedef int XGBCallbackDataIterNext( // NOLINT(*)
|
||||
DataIterHandle data_handle, XGBCallbackSetData *set_function,
|
||||
DataHolderHandle set_function_handle);
|
||||
|
||||
/*!
|
||||
* \brief Create a DMatrix from a data iterator.
|
||||
* \param data_handle The handle to the data.
|
||||
* \param callback The callback to get the data.
|
||||
* \param cache_info Additional information about cache file, can be null.
|
||||
* \param out The created DMatrix
|
||||
* \return 0 when success, -1 when failure happens.
|
||||
*/
|
||||
XGB_DLL int XGDMatrixCreateFromDataIter(
|
||||
DataIterHandle data_handle,
|
||||
XGBCallbackDataIterNext* callback,
|
||||
const char* cache_info,
|
||||
DMatrixHandle *out);
|
||||
|
||||
/* == Second set of callback functions, used by constructing Quantile based DMatrix. ===
|
||||
*
|
||||
* Short note for how to use the second set of callback for GPU Hist tree method.
|
||||
*
|
||||
* Step 0: Define a data iterator with 2 methods `reset`, and `next`.
|
||||
* Step 1: Create a DMatrix proxy by `XGProxyDMatrixCreate` and hold the handle.
|
||||
* Step 2: Pass the iterator handle, proxy handle and 2 methods into
|
||||
* `XGDeviceQuantileDMatrixCreateFromCallback`.
|
||||
* Step 3: Call appropriate data setters in `next` functions.
|
||||
*
|
||||
* See test_iterative_device_dmatrix.cu or Python interface for examples.
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief Create a DMatrix proxy for setting data, can be free by XGDMatrixFree.
|
||||
*
|
||||
* \param out The created Device Quantile DMatrix
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle* out);
|
||||
|
||||
/*!
|
||||
* \brief Callback function prototype for getting next batch of data.
|
||||
*
|
||||
* \param iter A handler to the user defined iterator.
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_EXTERN_C typedef int XGDMatrixCallbackNext(DataIterHandle iter); // NOLINT(*)
|
||||
|
||||
/*!
|
||||
* \brief Callback function prototype for reseting external iterator
|
||||
*/
|
||||
XGB_EXTERN_C typedef void DataIterResetCallback(DataIterHandle handle); // NOLINT(*)
|
||||
|
||||
/*!
|
||||
* \brief Create a device DMatrix with data iterator.
|
||||
*
|
||||
* \param iter A handle to external data iterator.
|
||||
* \param proxy A DMatrix proxy handle created by `XGProxyDMatrixCreate`.
|
||||
* \param reset Callback function reseting the iterator state.
|
||||
* \param next Callback function yieling the next batch of data.
|
||||
* \param missing Which value to represent missing value
|
||||
* \param nthread Number of threads to use, 0 for default.
|
||||
* \param max_bin Maximum number of bins for building histogram.
|
||||
* \param out The created Device Quantile DMatrix
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDeviceQuantileDMatrixCreateFromCallback(
|
||||
DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset,
|
||||
XGDMatrixCallbackNext *next, float missing, int nthread, int max_bin,
|
||||
DMatrixHandle *out);
|
||||
/*!
|
||||
* \brief Set data on a DMatrix proxy.
|
||||
*
|
||||
* \param handle A DMatrix proxy created by XGProxyDMatrixCreate
|
||||
* \param c_interface_str Null terminated JSON document string representation of CUDA
|
||||
* array interface.
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDeviceQuantileDMatrixSetDataCudaArrayInterface(
|
||||
DMatrixHandle handle,
|
||||
const char* c_interface_str);
|
||||
/*!
|
||||
* \brief Set data on a DMatrix proxy.
|
||||
*
|
||||
* \param handle A DMatrix proxy created by XGProxyDMatrixCreate
|
||||
* \param c_interface_str Null terminated JSON document string representation of CUDA
|
||||
* array interface, with an array of columns.
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDeviceQuantileDMatrixSetDataCudaColumnar(
|
||||
DMatrixHandle handle,
|
||||
const char* c_interface_str);
|
||||
/*
|
||||
* ==========================- End data callback APIs ==========================
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
* \brief create a new dmatrix from sliced content of existing matrix
|
||||
* \param handle instance of data matrix to be sliced
|
||||
@ -261,6 +378,18 @@ XGB_DLL int XGDMatrixFree(DMatrixHandle handle);
|
||||
*/
|
||||
XGB_DLL int XGDMatrixSaveBinary(DMatrixHandle handle,
|
||||
const char *fname, int silent);
|
||||
|
||||
/*!
|
||||
* \brief Set content in array interface to a content in info.
|
||||
* \param handle a instance of data matrix
|
||||
* \param field field name.
|
||||
* \param c_interface_str JSON string representation of array interface.
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle,
|
||||
char const* field,
|
||||
char const* c_interface_str);
|
||||
|
||||
/*!
|
||||
* \brief set float vector to a content in info
|
||||
* \param handle a instance of data matrix
|
||||
@ -437,6 +566,10 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
|
||||
int training,
|
||||
bst_ulong *out_len,
|
||||
const float **out_result);
|
||||
|
||||
/*
|
||||
* ========================== Begin Serialization APIs =========================
|
||||
*/
|
||||
/*
|
||||
* Short note for serialization APIs. There are 3 different sets of serialization API.
|
||||
*
|
||||
@ -559,6 +692,10 @@ XGB_DLL int XGBoosterSaveJsonConfig(BoosterHandle handle, bst_ulong *out_len,
|
||||
*/
|
||||
XGB_DLL int XGBoosterLoadJsonConfig(BoosterHandle handle,
|
||||
char const *json_parameters);
|
||||
/*
|
||||
* =========================== End Serialization APIs ==========================
|
||||
*/
|
||||
|
||||
|
||||
/*!
|
||||
* \brief dump model, return array of strings representing model dump
|
||||
|
||||
@ -502,7 +502,33 @@ class DMatrix {
|
||||
const std::string& cache_prefix = "",
|
||||
size_t page_size = kPageSize);
|
||||
|
||||
virtual DMatrix* Slice(common::Span<int32_t const> ridxs) = 0;
|
||||
/**
|
||||
* \brief Create a new Quantile based DMatrix used for histogram based algorithm.
|
||||
*
|
||||
* \tparam DataIterHandle External iterator type, defined in C API.
|
||||
* \tparam DMatrixHandle DMatrix handle, defined in C API.
|
||||
* \tparam DataIterResetCallback Callback for reset, prototype defined in C API.
|
||||
* \tparam XGDMatrixCallbackNext Callback for next, prototype defined in C API.
|
||||
*
|
||||
* \param iter External data iterator
|
||||
* \param proxy A hanlde to ProxyDMatrix
|
||||
* \param reset Callback for reset
|
||||
* \param next Callback for next
|
||||
* \param missing Value that should be treated as missing.
|
||||
* \param nthread number of threads used for initialization.
|
||||
* \param max_bin Maximum number of bins.
|
||||
*
|
||||
* \return A created quantile based DMatrix.
|
||||
*/
|
||||
template <typename DataIterHandle, typename DMatrixHandle,
|
||||
typename DataIterResetCallback, typename XGDMatrixCallbackNext>
|
||||
static DMatrix *Create(DataIterHandle iter, DMatrixHandle proxy,
|
||||
DataIterResetCallback *reset,
|
||||
XGDMatrixCallbackNext *next, float missing,
|
||||
int nthread,
|
||||
int max_bin);
|
||||
|
||||
virtual DMatrix *Slice(common::Span<int32_t const> ridxs) = 0;
|
||||
/*! \brief page size 32 MB */
|
||||
static const size_t kPageSize = 32UL << 20UL;
|
||||
|
||||
|
||||
@ -23,6 +23,7 @@
|
||||
#include "../common/io.h"
|
||||
#include "../data/adapter.h"
|
||||
#include "../data/simple_dmatrix.h"
|
||||
#include "../data/proxy_dmatrix.h"
|
||||
|
||||
using namespace xgboost; // NOLINT(*);
|
||||
|
||||
@ -101,6 +102,50 @@ XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs,
|
||||
|
||||
#endif
|
||||
|
||||
// Create from data iterator
|
||||
XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle* out) {
|
||||
API_BEGIN();
|
||||
*out = new std::shared_ptr<xgboost::DMatrix>(new xgboost::data::DMatrixProxy);;
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int
|
||||
XGDeviceQuantileDMatrixSetDataCudaArrayInterface(DMatrixHandle handle,
|
||||
char const *c_interface_str) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
|
||||
CHECK(p_m);
|
||||
auto m = static_cast<xgboost::data::DMatrixProxy*>(p_m->get());
|
||||
CHECK(m) << "Current DMatrix type does not support set data.";
|
||||
m->SetData(c_interface_str);
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int
|
||||
XGDeviceQuantileDMatrixSetDataCudaColumnar(DMatrixHandle handle,
|
||||
char const *c_interface_str) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
|
||||
CHECK(p_m);
|
||||
auto m = static_cast<xgboost::data::DMatrixProxy*>(p_m->get());
|
||||
CHECK(m) << "Current DMatrix type does not support set data.";
|
||||
m->SetData(c_interface_str);
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDeviceQuantileDMatrixCreateFromCallback(
|
||||
DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset,
|
||||
XGDMatrixCallbackNext *next, float missing, int nthread,
|
||||
int max_bin, DMatrixHandle *out) {
|
||||
API_BEGIN();
|
||||
*out = new std::shared_ptr<xgboost::DMatrix>{
|
||||
xgboost::DMatrix::Create(iter, proxy, reset, next, missing, nthread, max_bin)};
|
||||
API_END();
|
||||
}
|
||||
// End Create from data iterator
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr,
|
||||
const unsigned* indices,
|
||||
const bst_float* data,
|
||||
|
||||
@ -68,9 +68,11 @@ struct SketchContainer {
|
||||
// Prevent copying/assigning/moving this as its internals can't be
|
||||
// assigned/copied/moved
|
||||
SketchContainer(const SketchContainer&) = delete;
|
||||
SketchContainer(const SketchContainer&&) = delete;
|
||||
SketchContainer(SketchContainer&& that) {
|
||||
std::swap(sketches_, that.sketches_);
|
||||
}
|
||||
SketchContainer& operator=(const SketchContainer&) = delete;
|
||||
SketchContainer& operator=(const SketchContainer&&) = delete;
|
||||
SketchContainer& operator=(SketchContainer&&) = delete;
|
||||
};
|
||||
|
||||
struct EntryCompareOp {
|
||||
|
||||
@ -19,6 +19,7 @@
|
||||
#include "../common/version.h"
|
||||
#include "../common/group_data.h"
|
||||
#include "../data/adapter.h"
|
||||
#include "../data/iterative_device_dmatrix.h"
|
||||
|
||||
#if DMLC_ENABLE_STD_THREAD
|
||||
#include "./sparse_page_source.h"
|
||||
@ -569,6 +570,26 @@ DMatrix* DMatrix::Load(const std::string& uri,
|
||||
}
|
||||
return dmat;
|
||||
}
|
||||
template <typename DataIterHandle, typename DMatrixHandle,
|
||||
typename DataIterResetCallback, typename XGDMatrixCallbackNext>
|
||||
DMatrix *DMatrix::Create(DataIterHandle iter, DMatrixHandle proxy,
|
||||
DataIterResetCallback *reset,
|
||||
XGDMatrixCallbackNext *next, float missing,
|
||||
int nthread,
|
||||
int max_bin) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
return new data::IterativeDeviceDMatrix(iter, proxy, reset, next, missing, nthread, max_bin);
|
||||
#else
|
||||
common::AssertGPUSupport();
|
||||
return nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
template DMatrix *DMatrix::Create<DataIterHandle, DMatrixHandle,
|
||||
DataIterResetCallback, XGDMatrixCallbackNext>(
|
||||
DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset,
|
||||
XGDMatrixCallbackNext *next, float missing, int nthread,
|
||||
int max_bin);
|
||||
|
||||
template <typename AdapterT>
|
||||
DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread,
|
||||
|
||||
@ -252,6 +252,31 @@ EllpackPageImpl::EllpackPageImpl(AdapterT* adapter, float missing, bool is_dense
|
||||
ELLPACK_SPECIALIZATION(data::CudfAdapter)
|
||||
ELLPACK_SPECIALIZATION(data::CupyAdapter)
|
||||
|
||||
|
||||
template <typename AdapterBatch>
|
||||
EllpackPageImpl::EllpackPageImpl(AdapterBatch batch, float missing, int device,
|
||||
bool is_dense, int nthread,
|
||||
common::Span<size_t> row_counts_span,
|
||||
size_t row_stride, size_t n_rows, size_t n_cols,
|
||||
common::HistogramCuts const& cuts) {
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
|
||||
*this = EllpackPageImpl(device, cuts, is_dense, row_stride, n_rows);
|
||||
CopyDataToEllpack(batch, this, device, missing);
|
||||
WriteNullValues(this, device, row_counts_span);
|
||||
}
|
||||
|
||||
#define ELLPACK_BATCH_SPECIALIZE(__BATCH_T) \
|
||||
template EllpackPageImpl::EllpackPageImpl( \
|
||||
__BATCH_T batch, float missing, int device, \
|
||||
bool is_dense, int nthread, \
|
||||
common::Span<size_t> row_counts_span, \
|
||||
size_t row_stride, size_t n_rows, size_t n_cols, \
|
||||
common::HistogramCuts const& cuts);
|
||||
|
||||
ELLPACK_BATCH_SPECIALIZE(data::CudfAdapterBatch)
|
||||
ELLPACK_BATCH_SPECIALIZE(data::CupyAdapterBatch)
|
||||
|
||||
// A functor that copies the data from one EllpackPage to another.
|
||||
struct CopyPage {
|
||||
common::CompressedBufferWriter cbw;
|
||||
@ -279,6 +304,10 @@ size_t EllpackPageImpl::Copy(int device, EllpackPageImpl* page, size_t offset) {
|
||||
CHECK_EQ(row_stride, page->row_stride);
|
||||
CHECK_EQ(NumSymbols(), page->NumSymbols());
|
||||
CHECK_GE(n_rows * row_stride, offset + num_elements);
|
||||
if (page == this) {
|
||||
LOG(FATAL) << "Concatenating the same Ellpack.";
|
||||
return this->n_rows * this->row_stride;
|
||||
}
|
||||
gidx_buffer.SetDevice(device);
|
||||
page->gidx_buffer.SetDevice(device);
|
||||
dh::LaunchN(device, num_elements, CopyPage(this, page, offset));
|
||||
|
||||
@ -149,7 +149,7 @@ class EllpackPageImpl {
|
||||
|
||||
EllpackPageImpl(int device, common::HistogramCuts cuts,
|
||||
const SparsePage& page,
|
||||
bool is_dense,size_t row_stride);
|
||||
bool is_dense, size_t row_stride);
|
||||
|
||||
/*!
|
||||
* \brief Constructor from an existing DMatrix.
|
||||
@ -161,8 +161,16 @@ class EllpackPageImpl {
|
||||
|
||||
template <typename AdapterT>
|
||||
explicit EllpackPageImpl(AdapterT* adapter, float missing, bool is_dense, int nthread,
|
||||
int max_bin, common::Span<size_t> row_counts_span,
|
||||
int max_bin,
|
||||
common::Span<size_t> row_counts_span,
|
||||
size_t row_stride);
|
||||
|
||||
template <typename AdapterBatch>
|
||||
explicit EllpackPageImpl(AdapterBatch batch, float missing, int device, bool is_dense, int nthread,
|
||||
common::Span<size_t> row_counts_span,
|
||||
size_t row_stride, size_t n_rows, size_t n_cols,
|
||||
common::HistogramCuts const& cuts);
|
||||
|
||||
/*! \brief Copy the elements of the given ELLPACK page into this page.
|
||||
*
|
||||
* @param device The GPU device to use.
|
||||
|
||||
188
src/data/iterative_device_dmatrix.cu
Normal file
188
src/data/iterative_device_dmatrix.cu
Normal file
@ -0,0 +1,188 @@
|
||||
/*!
|
||||
* Copyright 2020 XGBoost contributors
|
||||
*/
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <algorithm>
|
||||
|
||||
#include "../common/hist_util.cuh"
|
||||
#include "simple_batch_iterator.h"
|
||||
#include "iterative_device_dmatrix.h"
|
||||
#include "sparse_page_source.h"
|
||||
#include "ellpack_page.cuh"
|
||||
#include "proxy_dmatrix.h"
|
||||
#include "device_adapter.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
template <typename Fn>
|
||||
decltype(auto) Dispatch(DMatrixProxy const* proxy, Fn fn) {
|
||||
if (proxy->Adapter().type() == typeid(std::shared_ptr<CupyAdapter>)) {
|
||||
auto value = dmlc::get<std::shared_ptr<CupyAdapter>>(
|
||||
proxy->Adapter())->Value();
|
||||
return fn(value);
|
||||
} else if (proxy->Adapter().type() == typeid(std::shared_ptr<CudfAdapter>)) {
|
||||
auto value = dmlc::get<std::shared_ptr<CudfAdapter>>(
|
||||
proxy->Adapter())->Value();
|
||||
return fn(value);
|
||||
} else {
|
||||
LOG(FATAL) << "Unknown type: " << proxy->Adapter().type().name();
|
||||
auto value = dmlc::get<std::shared_ptr<CudfAdapter>>(
|
||||
proxy->Adapter())->Value();
|
||||
return fn(value);
|
||||
}
|
||||
}
|
||||
|
||||
void IterativeDeviceDMatrix::Initialize(DataIterHandle iter_handle, float missing, int nthread) {
|
||||
// A handle passed to external iterator.
|
||||
auto handle = static_cast<std::shared_ptr<DMatrix>*>(proxy_);
|
||||
CHECK(handle);
|
||||
DMatrixProxy* proxy = static_cast<DMatrixProxy*>(handle->get());
|
||||
CHECK(proxy);
|
||||
// The external iterator
|
||||
auto iter = DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{
|
||||
iter_handle, reset_, next_};
|
||||
|
||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||
|
||||
auto num_rows = [&]() {
|
||||
return Dispatch(proxy, [](auto const &value) { return value.NumRows(); });
|
||||
};
|
||||
auto num_cols = [&]() {
|
||||
return Dispatch(proxy, [](auto const &value) { return value.NumCols(); });
|
||||
};
|
||||
|
||||
size_t row_stride = 0;
|
||||
size_t nnz = 0;
|
||||
// Sketch for all batches.
|
||||
iter.Reset();
|
||||
common::HistogramCuts cuts;
|
||||
common::DenseCuts dense_cuts(&cuts);
|
||||
|
||||
std::vector<common::SketchContainer> sketch_containers;
|
||||
size_t batches = 0;
|
||||
size_t accumulated_rows = 0;
|
||||
bst_feature_t cols = 0;
|
||||
while (iter.Next()) {
|
||||
auto device = proxy->DeviceIdx();
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
if (cols == 0) {
|
||||
cols = num_cols();
|
||||
} else {
|
||||
CHECK_EQ(cols, num_cols()) << "Inconsistent number of columns.";
|
||||
}
|
||||
sketch_containers.emplace_back(batch_param_.max_bin, num_cols(), num_rows());
|
||||
auto* p_sketch = &sketch_containers.back();
|
||||
if (proxy->Info().weights_.Size() != 0) {
|
||||
proxy->Info().weights_.SetDevice(device);
|
||||
Dispatch(proxy, [&](auto const &value) {
|
||||
common::AdapterDeviceSketchWeighted(value, batch_param_.max_bin,
|
||||
proxy->Info(),
|
||||
missing, device, p_sketch);
|
||||
});
|
||||
} else {
|
||||
Dispatch(proxy, [&](auto const &value) {
|
||||
common::AdapterDeviceSketch(value, batch_param_.max_bin, missing,
|
||||
device, p_sketch);
|
||||
});
|
||||
}
|
||||
|
||||
auto batch_rows = num_rows();
|
||||
accumulated_rows += batch_rows;
|
||||
dh::caching_device_vector<size_t> row_counts(batch_rows + 1, 0);
|
||||
common::Span<size_t> row_counts_span(row_counts.data().get(),
|
||||
row_counts.size());
|
||||
row_stride =
|
||||
std::max(row_stride, Dispatch(proxy, [=](auto const& value) {
|
||||
return GetRowCounts(value, row_counts_span, device, missing);
|
||||
}));
|
||||
nnz += thrust::reduce(thrust::cuda::par(alloc),
|
||||
row_counts.begin(), row_counts.end());
|
||||
batches++;
|
||||
}
|
||||
|
||||
// Merging multiple batches for each column
|
||||
std::vector<common::WQSketch::SummaryContainer> summary_array(cols);
|
||||
size_t intermediate_num_cuts = std::min(
|
||||
accumulated_rows, static_cast<size_t>(batch_param_.max_bin *
|
||||
common::SketchContainer::kFactor));
|
||||
size_t nbytes =
|
||||
common::WQSketch::SummaryContainer::CalcMemCost(intermediate_num_cuts);
|
||||
#pragma omp parallel for num_threads(nthread) if (nthread > 0)
|
||||
for (omp_ulong c = 0; c < cols; ++c) {
|
||||
for (auto& sketch_batch : sketch_containers) {
|
||||
common::WQSketch::SummaryContainer summary;
|
||||
sketch_batch.sketches_.at(c).GetSummary(&summary);
|
||||
sketch_batch.sketches_.at(c).Init(0, 1);
|
||||
summary_array.at(c).Reduce(summary, nbytes);
|
||||
}
|
||||
}
|
||||
sketch_containers.clear();
|
||||
|
||||
// Build the final summary.
|
||||
std::vector<common::WQSketch> sketches(cols);
|
||||
#pragma omp parallel for num_threads(nthread) if (nthread > 0)
|
||||
for (omp_ulong c = 0; c < cols; ++c) {
|
||||
sketches.at(c).Init(
|
||||
accumulated_rows,
|
||||
1.0 / (common::SketchContainer::kFactor * batch_param_.max_bin));
|
||||
sketches.at(c).PushSummary(summary_array.at(c));
|
||||
}
|
||||
dense_cuts.Init(&sketches, batch_param_.max_bin, accumulated_rows);
|
||||
summary_array.clear();
|
||||
|
||||
this->info_.num_col_ = cols;
|
||||
this->info_.num_row_ = accumulated_rows;
|
||||
this->info_.num_nonzero_ = nnz;
|
||||
|
||||
// Construct the final ellpack page.
|
||||
page_.reset(new EllpackPage);
|
||||
*(page_->Impl()) = EllpackPageImpl(proxy->DeviceIdx(), cuts, this->IsDense(),
|
||||
row_stride, accumulated_rows);
|
||||
|
||||
size_t offset = 0;
|
||||
iter.Reset();
|
||||
while (iter.Next()) {
|
||||
auto device = proxy->DeviceIdx();
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
auto rows = num_rows();
|
||||
dh::caching_device_vector<size_t> row_counts(rows + 1, 0);
|
||||
common::Span<size_t> row_counts_span(row_counts.data().get(),
|
||||
row_counts.size());
|
||||
Dispatch(proxy, [=](auto const& value) {
|
||||
return GetRowCounts(value, row_counts_span, device, missing);
|
||||
});
|
||||
auto is_dense = this->IsDense();
|
||||
auto new_impl = Dispatch(proxy, [&](auto const &value) {
|
||||
return EllpackPageImpl(value, missing, device, is_dense, nthread,
|
||||
row_counts_span, row_stride, rows, cols, cuts);
|
||||
});
|
||||
size_t num_elements = page_->Impl()->Copy(device, &new_impl, offset);
|
||||
offset += num_elements;
|
||||
|
||||
proxy->Info().num_row_ = num_rows();
|
||||
proxy->Info().num_col_ = cols;
|
||||
if (batches != 1) {
|
||||
this->info_.Extend(std::move(proxy->Info()), false);
|
||||
}
|
||||
}
|
||||
|
||||
if (batches == 1) {
|
||||
this->info_ = std::move(proxy->Info());
|
||||
CHECK_EQ(proxy->Info().labels_.Size(), 0);
|
||||
}
|
||||
|
||||
iter.Reset();
|
||||
// Synchronise worker columns
|
||||
rabit::Allreduce<rabit::op::Max>(&info_.num_col_, 1);
|
||||
}
|
||||
|
||||
BatchSet<EllpackPage> IterativeDeviceDMatrix::GetEllpackBatches(const BatchParam& param) {
|
||||
CHECK(page_);
|
||||
auto begin_iter =
|
||||
BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(page_.get()));
|
||||
return BatchSet<EllpackPage>(begin_iter);
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
76
src/data/iterative_device_dmatrix.h
Normal file
76
src/data/iterative_device_dmatrix.h
Normal file
@ -0,0 +1,76 @@
|
||||
/*!
|
||||
* Copyright 2020 by Contributors
|
||||
* \file iterative_device_dmatrix.h
|
||||
*/
|
||||
#ifndef XGBOOST_DATA_ITERATIVE_DEVICE_DMATRIX_H_
|
||||
#define XGBOOST_DATA_ITERATIVE_DEVICE_DMATRIX_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/c_api.h"
|
||||
#include "proxy_dmatrix.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
class IterativeDeviceDMatrix : public DMatrix {
|
||||
MetaInfo info_;
|
||||
BatchParam batch_param_;
|
||||
std::shared_ptr<EllpackPage> page_;
|
||||
|
||||
DMatrixHandle proxy_;
|
||||
DataIterResetCallback *reset_;
|
||||
XGDMatrixCallbackNext *next_;
|
||||
|
||||
public:
|
||||
void Initialize(DataIterHandle iter, float missing, int nthread);
|
||||
|
||||
public:
|
||||
explicit IterativeDeviceDMatrix(DataIterHandle iter, DMatrixHandle proxy,
|
||||
DataIterResetCallback *reset,
|
||||
XGDMatrixCallbackNext *next, float missing,
|
||||
int nthread, int max_bin)
|
||||
: proxy_{proxy}, reset_{reset}, next_{next} {
|
||||
batch_param_ = BatchParam{0, max_bin, 0};
|
||||
this->Initialize(iter, missing, nthread);
|
||||
}
|
||||
|
||||
bool EllpackExists() const override { return true; }
|
||||
bool SparsePageExists() const override { return false; }
|
||||
DMatrix *Slice(common::Span<int32_t const> ridxs) override {
|
||||
LOG(FATAL) << "Slicing DMatrix is not supported for Device DMatrix.";
|
||||
return nullptr;
|
||||
}
|
||||
BatchSet<SparsePage> GetRowBatches() override {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<SparsePage>(BatchIterator<SparsePage>(nullptr));
|
||||
}
|
||||
BatchSet<CSCPage> GetColumnBatches() override {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<CSCPage>(BatchIterator<CSCPage>(nullptr));
|
||||
}
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches() override {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<SortedCSCPage>(BatchIterator<SortedCSCPage>(nullptr));
|
||||
}
|
||||
|
||||
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) override;
|
||||
|
||||
bool SingleColBlock() const override { return false; }
|
||||
|
||||
MetaInfo& Info() override {
|
||||
return info_;
|
||||
}
|
||||
MetaInfo const& Info() const override {
|
||||
return info_;
|
||||
}
|
||||
};
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // XGBOOST_DATA_ITERATIVE_DEVICE_DMATRIX_H_
|
||||
166
tests/cpp/data/test_iterative_device_dmatrix.cu
Normal file
166
tests/cpp/data/test_iterative_device_dmatrix.cu
Normal file
@ -0,0 +1,166 @@
|
||||
/*!
|
||||
* Copyright 2020 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/data/iterative_device_dmatrix.h"
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/device_adapter.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
void TestEquivalent(float sparsity) {
|
||||
CudaArrayIterForTest iter{sparsity};
|
||||
IterativeDeviceDMatrix m(
|
||||
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
|
||||
0, 256);
|
||||
size_t offset = 0;
|
||||
auto first = (*m.GetEllpackBatches({}).begin()).Impl();
|
||||
std::unique_ptr<EllpackPageImpl> page_concatenated {
|
||||
new EllpackPageImpl(0, first->Cuts(), first->is_dense,
|
||||
first->row_stride, 1000 * 100)};
|
||||
for (auto& batch : m.GetBatches<EllpackPage>()) {
|
||||
auto page = batch.Impl();
|
||||
size_t num_elements = page_concatenated->Copy(0, page, offset);
|
||||
offset += num_elements;
|
||||
}
|
||||
auto from_iter = page_concatenated->GetDeviceAccessor(0);
|
||||
ASSERT_EQ(m.Info().num_col_, CudaArrayIterForTest::kCols);
|
||||
ASSERT_EQ(m.Info().num_row_, CudaArrayIterForTest::kRows);
|
||||
|
||||
std::string interface_str = iter.AsArray();
|
||||
auto adapter = CupyAdapter(interface_str);
|
||||
std::unique_ptr<DMatrix> dm{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0)};
|
||||
BatchParam bp {0, 256};
|
||||
for (auto& ellpack : dm->GetBatches<EllpackPage>(bp)) {
|
||||
auto from_data = ellpack.Impl()->GetDeviceAccessor(0);
|
||||
|
||||
std::vector<float> cuts_from_iter(from_iter.gidx_fvalue_map.size());
|
||||
std::vector<float> min_fvalues_iter(from_iter.min_fvalue.size());
|
||||
std::vector<uint32_t> cut_ptrs_iter(from_iter.feature_segments.size());
|
||||
dh::CopyDeviceSpanToVector(&cuts_from_iter, from_iter.gidx_fvalue_map);
|
||||
dh::CopyDeviceSpanToVector(&min_fvalues_iter, from_iter.min_fvalue);
|
||||
dh::CopyDeviceSpanToVector(&cut_ptrs_iter, from_iter.feature_segments);
|
||||
|
||||
std::vector<float> cuts_from_data(from_data.gidx_fvalue_map.size());
|
||||
std::vector<float> min_fvalues_data(from_data.min_fvalue.size());
|
||||
std::vector<uint32_t> cut_ptrs_data(from_data.feature_segments.size());
|
||||
dh::CopyDeviceSpanToVector(&cuts_from_data, from_data.gidx_fvalue_map);
|
||||
dh::CopyDeviceSpanToVector(&min_fvalues_data, from_data.min_fvalue);
|
||||
dh::CopyDeviceSpanToVector(&cut_ptrs_data, from_data.feature_segments);
|
||||
|
||||
ASSERT_EQ(cuts_from_iter.size(), cuts_from_data.size());
|
||||
for (size_t i = 0; i < cuts_from_iter.size(); ++i) {
|
||||
EXPECT_NEAR(cuts_from_iter[i], cuts_from_data[i], kRtEps);
|
||||
}
|
||||
ASSERT_EQ(min_fvalues_iter.size(), min_fvalues_data.size());
|
||||
for (size_t i = 0; i < min_fvalues_iter.size(); ++i) {
|
||||
ASSERT_NEAR(min_fvalues_iter[i], min_fvalues_data[i], kRtEps);
|
||||
}
|
||||
ASSERT_EQ(cut_ptrs_iter.size(), cut_ptrs_data.size());
|
||||
for (size_t i = 0; i < cut_ptrs_iter.size(); ++i) {
|
||||
ASSERT_EQ(cut_ptrs_iter[i], cut_ptrs_data[i]);
|
||||
}
|
||||
|
||||
auto const& buffer_from_iter = page_concatenated->gidx_buffer;
|
||||
auto const& buffer_from_data = ellpack.Impl()->gidx_buffer;
|
||||
ASSERT_NE(buffer_from_data.Size(), 0);
|
||||
ASSERT_EQ(buffer_from_data.ConstHostVector(), buffer_from_data.ConstHostVector());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IterativeDeviceDMatrix, Basic) {
|
||||
TestEquivalent(0.0);
|
||||
TestEquivalent(0.5);
|
||||
}
|
||||
|
||||
TEST(IterativeDeviceDMatrix, RowMajor) {
|
||||
CudaArrayIterForTest iter(0.0f);
|
||||
IterativeDeviceDMatrix m(
|
||||
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
|
||||
0, 256);
|
||||
size_t n_batches = 0;
|
||||
std::string interface_str = iter.AsArray();
|
||||
for (auto& ellpack : m.GetBatches<EllpackPage>()) {
|
||||
n_batches ++;
|
||||
auto impl = ellpack.Impl();
|
||||
common::CompressedIterator<uint32_t> iterator(
|
||||
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
|
||||
auto cols = CudaArrayIterForTest::kCols;
|
||||
auto rows = CudaArrayIterForTest::kRows;
|
||||
|
||||
auto j_interface =
|
||||
Json::Load({interface_str.c_str(), interface_str.size()});
|
||||
ArrayInterface loaded {get<Object const>(j_interface)};
|
||||
std::vector<float> h_data(cols * rows);
|
||||
common::Span<float> s_data{static_cast<float*>(loaded.data), cols * rows};
|
||||
dh::CopyDeviceSpanToVector(&h_data, s_data);
|
||||
|
||||
for(auto i = 0ull; i < rows * cols; i++) {
|
||||
int column_idx = i % cols;
|
||||
EXPECT_EQ(impl->Cuts().SearchBin(h_data[i], column_idx), iterator[i]);
|
||||
}
|
||||
EXPECT_EQ(m.Info().num_col_, cols);
|
||||
EXPECT_EQ(m.Info().num_row_, rows);
|
||||
EXPECT_EQ(m.Info().num_nonzero_, rows * cols);
|
||||
}
|
||||
// All batches are concatenated.
|
||||
ASSERT_EQ(n_batches, 1);
|
||||
}
|
||||
|
||||
TEST(IterativeDeviceDMatrix, RowMajorMissing) {
|
||||
const float kMissing = std::numeric_limits<float>::quiet_NaN();
|
||||
size_t rows = 10;
|
||||
size_t cols = 2;
|
||||
CudaArrayIterForTest iter(0.0f, rows, cols, 2);
|
||||
std::string interface_str = iter.AsArray();
|
||||
auto j_interface =
|
||||
Json::Load({interface_str.c_str(), interface_str.size()});
|
||||
ArrayInterface loaded {get<Object const>(j_interface)};
|
||||
std::vector<float> h_data(cols * rows);
|
||||
common::Span<float> s_data{static_cast<float*>(loaded.data), cols * rows};
|
||||
dh::CopyDeviceSpanToVector(&h_data, s_data);
|
||||
h_data[1] = kMissing;
|
||||
h_data[5] = kMissing;
|
||||
h_data[6] = kMissing;
|
||||
auto ptr = thrust::device_ptr<float>(
|
||||
reinterpret_cast<float *>(get<Integer>(j_interface["data"][0])));
|
||||
thrust::copy(h_data.cbegin(), h_data.cend(), ptr);
|
||||
|
||||
IterativeDeviceDMatrix m(
|
||||
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
|
||||
0, 256);
|
||||
auto &ellpack = *m.GetBatches<EllpackPage>({0, 256, 0}).begin();
|
||||
auto impl = ellpack.Impl();
|
||||
common::CompressedIterator<uint32_t> iterator(
|
||||
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
|
||||
EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(0).NullValue());
|
||||
EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(0).NullValue());
|
||||
// null values get placed after valid values in a row
|
||||
EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(0).NullValue());
|
||||
EXPECT_EQ(m.Info().num_col_, cols);
|
||||
EXPECT_EQ(m.Info().num_row_, rows);
|
||||
EXPECT_EQ(m.Info().num_nonzero_, rows* cols - 3);
|
||||
}
|
||||
|
||||
TEST(IterativeDeviceDMatrix, IsDense) {
|
||||
int num_bins = 16;
|
||||
auto test = [num_bins] (float sparsity) {
|
||||
CudaArrayIterForTest iter(sparsity);
|
||||
IterativeDeviceDMatrix m(
|
||||
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
|
||||
0, 256);
|
||||
if (sparsity == 0.0) {
|
||||
ASSERT_TRUE(m.IsDense());
|
||||
} else {
|
||||
ASSERT_FALSE(m.IsDense());
|
||||
}
|
||||
};
|
||||
test(0.0);
|
||||
test(0.1);
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
@ -1,17 +1,43 @@
|
||||
#include <xgboost/c_api.h>
|
||||
|
||||
#include "helpers.h"
|
||||
#include "../../src/data/device_adapter.cuh"
|
||||
#include "../../src/data/device_dmatrix.h"
|
||||
#include "../../src/data/iterative_device_dmatrix.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
CudaArrayIterForTest::CudaArrayIterForTest(float sparsity, size_t rows,
|
||||
size_t cols, size_t batches)
|
||||
: rows_{rows}, cols_{cols}, n_batches_{batches} {
|
||||
XGProxyDMatrixCreate(&proxy_);
|
||||
rng_.reset(new RandomDataGenerator{rows_, cols_, sparsity});
|
||||
rng_->Device(0);
|
||||
std::tie(batches_, interface_) =
|
||||
rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
|
||||
this->Reset();
|
||||
}
|
||||
|
||||
CudaArrayIterForTest::~CudaArrayIterForTest() { XGDMatrixFree(proxy_); }
|
||||
|
||||
int CudaArrayIterForTest::Next() {
|
||||
if (iter_ == n_batches_) {
|
||||
return 0;
|
||||
}
|
||||
XGDeviceQuantileDMatrixSetDataCudaArrayInterface(proxy_, batches_[iter_].c_str());
|
||||
iter_++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t constexpr CudaArrayIterForTest::kRows;
|
||||
size_t constexpr CudaArrayIterForTest::kCols;
|
||||
|
||||
std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDeviceDMatrix(bool with_label,
|
||||
bool float_label,
|
||||
size_t classes) {
|
||||
std::vector<HostDeviceVector<float>> storage(cols_);
|
||||
std::string arr = this->GenerateColumnarArrayInterface(&storage);
|
||||
auto adapter = data::CudfAdapter(arr);
|
||||
std::shared_ptr<DMatrix> m {
|
||||
new data::DeviceDMatrix{&adapter,
|
||||
std::numeric_limits<float>::quiet_NaN(), 1, 256}};
|
||||
CudaArrayIterForTest iter{this->sparsity_, this->rows_, this->cols_, 1};
|
||||
auto m = std::make_shared<data::IterativeDeviceDMatrix>(
|
||||
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
|
||||
0, bins_);
|
||||
return m;
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@ -304,5 +304,51 @@ inline HostDeviceVector<GradientPair> GenerateRandomGradients(const size_t n_row
|
||||
HostDeviceVector<GradientPair> gpair(h_gpair);
|
||||
return gpair;
|
||||
}
|
||||
|
||||
typedef void *DMatrixHandle; // NOLINT(*);
|
||||
|
||||
class CudaArrayIterForTest {
|
||||
HostDeviceVector<float> data_;
|
||||
size_t iter_ {0};
|
||||
DMatrixHandle proxy_;
|
||||
std::unique_ptr<RandomDataGenerator> rng_;
|
||||
|
||||
std::vector<std::string> batches_;
|
||||
std::string interface_;
|
||||
size_t rows_;
|
||||
size_t cols_;
|
||||
size_t n_batches_;
|
||||
|
||||
public:
|
||||
size_t static constexpr kRows { 1000 };
|
||||
size_t static constexpr kBatches { 100 };
|
||||
size_t static constexpr kCols { 13 };
|
||||
|
||||
explicit CudaArrayIterForTest(float sparsity, size_t rows = kRows,
|
||||
size_t cols = kCols, size_t batches = kBatches);
|
||||
~CudaArrayIterForTest();
|
||||
|
||||
std::string AsArray() const {
|
||||
return interface_;
|
||||
}
|
||||
|
||||
int Next();
|
||||
void Reset() {
|
||||
iter_ = 0;
|
||||
}
|
||||
size_t Iter() const { return iter_; }
|
||||
auto Proxy() -> decltype(proxy_) { return proxy_; }
|
||||
};
|
||||
|
||||
typedef void *DataIterHandle; // NOLINT(*)
|
||||
|
||||
inline void Reset(DataIterHandle self) {
|
||||
static_cast<CudaArrayIterForTest*>(self)->Reset();
|
||||
}
|
||||
|
||||
inline int Next(DataIterHandle self) {
|
||||
return static_cast<CudaArrayIterForTest*>(self)->Next();
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
#endif
|
||||
|
||||
@ -76,15 +76,15 @@ TEST(GPUPredictor, EllpackTraining) {
|
||||
.Bins(kBins)
|
||||
.Device(0)
|
||||
.GenerateDeviceDMatrix(true);
|
||||
std::vector<HostDeviceVector<float>> storage(kCols);
|
||||
HostDeviceVector<float> storage(kRows * kCols);
|
||||
auto columnar = RandomDataGenerator{kRows, kCols, 0.0}
|
||||
.Device(0)
|
||||
.GenerateColumnarArrayInterface(&storage);
|
||||
auto adapter = data::CudfAdapter(columnar);
|
||||
.GenerateArrayInterface(&storage);
|
||||
auto adapter = data::CupyAdapter(columnar);
|
||||
std::shared_ptr<DMatrix> p_full {
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)
|
||||
};
|
||||
TestTrainingPrediction(kRows, "gpu_hist", p_full, p_ellpack);
|
||||
TestTrainingPrediction(kRows, kBins, "gpu_hist", p_full, p_ellpack);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, ExternalMemoryTest) {
|
||||
|
||||
@ -32,7 +32,8 @@ TEST(Predictor, PredictionCache) {
|
||||
EXPECT_ANY_THROW(container.Entry(m));
|
||||
}
|
||||
|
||||
void TestTrainingPrediction(size_t rows, std::string tree_method,
|
||||
void TestTrainingPrediction(size_t rows, size_t bins,
|
||||
std::string tree_method,
|
||||
std::shared_ptr<DMatrix> p_full,
|
||||
std::shared_ptr<DMatrix> p_hist) {
|
||||
size_t constexpr kCols = 16;
|
||||
|
||||
@ -52,7 +52,7 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
|
||||
}
|
||||
|
||||
// p_full and p_hist should come from the same data set.
|
||||
void TestTrainingPrediction(size_t rows, std::string tree_method,
|
||||
void TestTrainingPrediction(size_t rows, size_t bins, std::string tree_method,
|
||||
std::shared_ptr<DMatrix> p_full,
|
||||
std::shared_ptr<DMatrix> p_hist);
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user