Support dataframe data format in native XGBoost. (#9828)
- Implement a columnar adapter. - Refactor Python pandas handling code to avoid converting into a single numpy array. - Add support in R for transforming columns. - Support R data.frame and factor type.
This commit is contained in:
@@ -361,49 +361,57 @@ XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle *out) {
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int
|
||||
XGProxyDMatrixSetDataCudaArrayInterface(DMatrixHandle handle,
|
||||
char const *c_interface_str) {
|
||||
XGB_DLL int XGProxyDMatrixSetDataCudaArrayInterface(DMatrixHandle handle,
|
||||
char const *c_interface_str) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
xgboost_CHECK_C_ARG_PTR(c_interface_str);
|
||||
auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
|
||||
CHECK(p_m);
|
||||
auto m = static_cast<xgboost::data::DMatrixProxy*>(p_m->get());
|
||||
auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());
|
||||
CHECK(m) << "Current DMatrix type does not support set data.";
|
||||
m->SetCUDAArray(c_interface_str);
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGProxyDMatrixSetDataCudaColumnar(DMatrixHandle handle,
|
||||
char const *c_interface_str) {
|
||||
XGB_DLL int XGProxyDMatrixSetDataCudaColumnar(DMatrixHandle handle, char const *c_interface_str) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
xgboost_CHECK_C_ARG_PTR(c_interface_str);
|
||||
auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
|
||||
CHECK(p_m);
|
||||
auto m = static_cast<xgboost::data::DMatrixProxy*>(p_m->get());
|
||||
auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());
|
||||
CHECK(m) << "Current DMatrix type does not support set data.";
|
||||
m->SetCUDAArray(c_interface_str);
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGProxyDMatrixSetDataDense(DMatrixHandle handle,
|
||||
char const *c_interface_str) {
|
||||
XGB_DLL int XGProxyDMatrixSetDataColumnar(DMatrixHandle handle, char const *c_interface_str) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
xgboost_CHECK_C_ARG_PTR(c_interface_str);
|
||||
auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
|
||||
CHECK(p_m);
|
||||
auto m = static_cast<xgboost::data::DMatrixProxy*>(p_m->get());
|
||||
auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());
|
||||
CHECK(m) << "Current DMatrix type does not support set data.";
|
||||
m->SetColumnarData(c_interface_str);
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGProxyDMatrixSetDataDense(DMatrixHandle handle, char const *c_interface_str) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
xgboost_CHECK_C_ARG_PTR(c_interface_str);
|
||||
auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
|
||||
CHECK(p_m);
|
||||
auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());
|
||||
CHECK(m) << "Current DMatrix type does not support set data.";
|
||||
m->SetArrayData(c_interface_str);
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr,
|
||||
char const *indices, char const *data,
|
||||
xgboost::bst_ulong ncol) {
|
||||
XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr, char const *indices,
|
||||
char const *data, xgboost::bst_ulong ncol) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
xgboost_CHECK_C_ARG_PTR(indptr);
|
||||
@@ -411,7 +419,7 @@ XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr,
|
||||
xgboost_CHECK_C_ARG_PTR(data);
|
||||
auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
|
||||
CHECK(p_m);
|
||||
auto m = static_cast<xgboost::data::DMatrixProxy*>(p_m->get());
|
||||
auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());
|
||||
CHECK(m) << "Current DMatrix type does not support set data.";
|
||||
m->SetCSRData(indptr, indices, data, ncol, true);
|
||||
API_END();
|
||||
@@ -429,6 +437,25 @@ XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indic
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromColumnar(char const *data, char const *c_json_config,
|
||||
DMatrixHandle *out) {
|
||||
API_BEGIN();
|
||||
xgboost_CHECK_C_ARG_PTR(c_json_config);
|
||||
xgboost_CHECK_C_ARG_PTR(data);
|
||||
|
||||
auto config = Json::Load(c_json_config);
|
||||
float missing = GetMissing(config);
|
||||
auto n_threads = OptionalArg<Integer, std::int64_t>(config, "nthread", 0);
|
||||
auto data_split_mode =
|
||||
static_cast<DataSplitMode>(OptionalArg<Integer, int64_t>(config, "data_split_mode", 0));
|
||||
|
||||
data::ColumnarAdapter adapter{data};
|
||||
*out = new std::shared_ptr<DMatrix>(
|
||||
DMatrix::Create(&adapter, missing, n_threads, "", data_split_mode));
|
||||
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char const *data,
|
||||
xgboost::bst_ulong ncol, char const *c_json_config,
|
||||
DMatrixHandle *out) {
|
||||
@@ -1196,6 +1223,27 @@ XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, char const *array_in
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGBoosterPredictFromColumnar(BoosterHandle handle, char const *array_interface,
|
||||
char const *c_json_config, DMatrixHandle m,
|
||||
xgboost::bst_ulong const **out_shape,
|
||||
xgboost::bst_ulong *out_dim, const float **out_result) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
std::shared_ptr<DMatrix> p_m{nullptr};
|
||||
if (!m) {
|
||||
p_m.reset(new data::DMatrixProxy);
|
||||
} else {
|
||||
p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
|
||||
}
|
||||
auto proxy = dynamic_cast<data::DMatrixProxy *>(p_m.get());
|
||||
CHECK(proxy) << "Invalid input type for inplace predict.";
|
||||
xgboost_CHECK_C_ARG_PTR(array_interface);
|
||||
proxy->SetColumnarData(array_interface);
|
||||
auto *learner = static_cast<xgboost::Learner *>(handle);
|
||||
InplacePredictImpl(p_m, c_json_config, learner, out_shape, out_dim, out_result);
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, char const *indices,
|
||||
char const *data, xgboost::bst_ulong cols,
|
||||
char const *c_json_config, DMatrixHandle m,
|
||||
|
||||
@@ -97,6 +97,7 @@ void HostSketchContainer::PushAdapterBatch(Batch const &batch, size_t base_rowid
|
||||
// the nnz from info is not reliable as sketching might be the first place to go through
|
||||
// the data.
|
||||
auto is_dense = info.num_nonzero_ == info.num_col_ * info.num_row_;
|
||||
CHECK(!this->columns_size_.empty());
|
||||
this->PushRowPageImpl(batch, base_rowid, weights, info.num_nonzero_, info.num_col_, is_dense,
|
||||
is_valid);
|
||||
}
|
||||
@@ -110,6 +111,7 @@ INSTANTIATE(CSRArrayAdapterBatch)
|
||||
INSTANTIATE(CSCAdapterBatch)
|
||||
INSTANTIATE(DataTableAdapterBatch)
|
||||
INSTANTIATE(SparsePageAdapterBatch)
|
||||
INSTANTIATE(ColumnarAdapterBatch)
|
||||
|
||||
namespace {
|
||||
/**
|
||||
|
||||
@@ -25,9 +25,7 @@
|
||||
#include "xgboost/span.h"
|
||||
#include "xgboost/string_view.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
namespace xgboost::data {
|
||||
/** External data formats should implement an adapter as below. The
|
||||
* adapter provides a uniform access to data outside xgboost, allowing
|
||||
* construction of DMatrix objects from a range of sources without duplicating
|
||||
@@ -279,9 +277,9 @@ class ArrayAdapterBatch : public detail::NoMetaInfo {
|
||||
return Line{array_interface_, idx};
|
||||
}
|
||||
|
||||
size_t NumRows() const { return array_interface_.Shape(0); }
|
||||
size_t NumCols() const { return array_interface_.Shape(1); }
|
||||
size_t Size() const { return this->NumRows(); }
|
||||
[[nodiscard]] std::size_t NumRows() const { return array_interface_.Shape(0); }
|
||||
[[nodiscard]] std::size_t NumCols() const { return array_interface_.Shape(1); }
|
||||
[[nodiscard]] std::size_t Size() const { return this->NumRows(); }
|
||||
|
||||
explicit ArrayAdapterBatch(ArrayInterface<2> array_interface)
|
||||
: array_interface_{std::move(array_interface)} {}
|
||||
@@ -326,11 +324,11 @@ class CSRArrayAdapterBatch : public detail::NoMetaInfo {
|
||||
: indices_{std::move(indices)}, values_{std::move(values)}, ridx_{ridx},
|
||||
offset_{offset} {}
|
||||
|
||||
COOTuple GetElement(std::size_t idx) const {
|
||||
[[nodiscard]] COOTuple GetElement(std::size_t idx) const {
|
||||
return {ridx_, TypedIndex<std::size_t, 1>{indices_}(offset_ + idx), values_(offset_ + idx)};
|
||||
}
|
||||
|
||||
size_t Size() const {
|
||||
[[nodiscard]] std::size_t Size() const {
|
||||
return values_.Shape(0);
|
||||
}
|
||||
};
|
||||
@@ -539,9 +537,11 @@ class CSCArrayAdapter : public detail::SingleBatchDataIter<CSCArrayAdapterBatch>
|
||||
batch_{CSCArrayAdapterBatch{indptr_, indices_, values_}} {}
|
||||
|
||||
// JVM package sends 0 as unknown
|
||||
size_t NumRows() const { return num_rows_ == 0 ? kAdapterUnknownSize : num_rows_; }
|
||||
size_t NumColumns() const { return indptr_.n - 1; }
|
||||
const CSCArrayAdapterBatch& Value() const override { return batch_; }
|
||||
[[nodiscard]] std::size_t NumRows() const {
|
||||
return num_rows_ == 0 ? kAdapterUnknownSize : num_rows_;
|
||||
}
|
||||
[[nodiscard]] std::size_t NumColumns() const { return indptr_.n - 1; }
|
||||
[[nodiscard]] const CSCArrayAdapterBatch& Value() const override { return batch_; }
|
||||
};
|
||||
|
||||
class DataTableAdapterBatch : public detail::NoMetaInfo {
|
||||
@@ -634,15 +634,15 @@ class DataTableAdapterBatch : public detail::NoMetaInfo {
|
||||
public:
|
||||
Line(std::size_t ridx, void const* const* const data, std::vector<DTType> const& ft)
|
||||
: row_idx_{ridx}, data_{data}, feature_types_{ft} {}
|
||||
std::size_t Size() const { return feature_types_.size(); }
|
||||
COOTuple GetElement(std::size_t idx) const {
|
||||
[[nodiscard]] std::size_t Size() const { return feature_types_.size(); }
|
||||
[[nodiscard]] COOTuple GetElement(std::size_t idx) const {
|
||||
return COOTuple{row_idx_, idx, DTGetValue(data_[idx], feature_types_[idx], row_idx_)};
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
size_t Size() const { return num_rows_; }
|
||||
const Line GetLine(std::size_t ridx) const { return {ridx, data_, feature_types_}; }
|
||||
[[nodiscard]] size_t Size() const { return num_rows_; }
|
||||
[[nodiscard]] const Line GetLine(std::size_t ridx) const { return {ridx, data_, feature_types_}; }
|
||||
static constexpr bool kIsRowMajor = true;
|
||||
|
||||
private:
|
||||
@@ -659,9 +659,9 @@ class DataTableAdapter : public detail::SingleBatchDataIter<DataTableAdapterBatc
|
||||
: batch_(data, feature_stypes, num_rows, num_features),
|
||||
num_rows_(num_rows),
|
||||
num_columns_(num_features) {}
|
||||
const DataTableAdapterBatch& Value() const override { return batch_; }
|
||||
std::size_t NumRows() const { return num_rows_; }
|
||||
std::size_t NumColumns() const { return num_columns_; }
|
||||
[[nodiscard]] const DataTableAdapterBatch& Value() const override { return batch_; }
|
||||
[[nodiscard]] std::size_t NumRows() const { return num_rows_; }
|
||||
[[nodiscard]] std::size_t NumColumns() const { return num_columns_; }
|
||||
|
||||
private:
|
||||
DataTableAdapterBatch batch_;
|
||||
@@ -669,6 +669,74 @@ class DataTableAdapter : public detail::SingleBatchDataIter<DataTableAdapterBatc
|
||||
std::size_t num_columns_;
|
||||
};
|
||||
|
||||
class ColumnarAdapterBatch : public detail::NoMetaInfo {
|
||||
common::Span<ArrayInterface<1, false>> columns_;
|
||||
|
||||
class Line {
|
||||
common::Span<ArrayInterface<1, false>> const& columns_;
|
||||
std::size_t ridx_;
|
||||
|
||||
public:
|
||||
explicit Line(common::Span<ArrayInterface<1, false>> const& columns, std::size_t ridx)
|
||||
: columns_{columns}, ridx_{ridx} {}
|
||||
[[nodiscard]] std::size_t Size() const { return columns_.empty() ? 0 : columns_.size(); }
|
||||
|
||||
[[nodiscard]] COOTuple GetElement(std::size_t idx) const {
|
||||
return {ridx_, idx, columns_[idx](ridx_)};
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
ColumnarAdapterBatch() = default;
|
||||
explicit ColumnarAdapterBatch(common::Span<ArrayInterface<1, false>> columns)
|
||||
: columns_{columns} {}
|
||||
[[nodiscard]] Line GetLine(std::size_t ridx) const { return Line{columns_, ridx}; }
|
||||
[[nodiscard]] std::size_t Size() const {
|
||||
return columns_.empty() ? 0 : columns_.front().Shape(0);
|
||||
}
|
||||
[[nodiscard]] std::size_t NumCols() const { return columns_.empty() ? 0 : columns_.size(); }
|
||||
[[nodiscard]] std::size_t NumRows() const { return this->Size(); }
|
||||
|
||||
static constexpr bool kIsRowMajor = true;
|
||||
};
|
||||
|
||||
class ColumnarAdapter : public detail::SingleBatchDataIter<ColumnarAdapterBatch> {
|
||||
std::vector<ArrayInterface<1, false>> columns_;
|
||||
ColumnarAdapterBatch batch_;
|
||||
|
||||
public:
|
||||
explicit ColumnarAdapter(StringView columns) {
|
||||
auto jarray = Json::Load(columns);
|
||||
CHECK(IsA<Array>(jarray));
|
||||
auto const& array = get<Array const>(jarray);
|
||||
for (auto col : array) {
|
||||
columns_.emplace_back(get<Object const>(col));
|
||||
}
|
||||
bool consistent =
|
||||
columns_.empty() ||
|
||||
std::all_of(columns_.cbegin(), columns_.cend(), [&](ArrayInterface<1, false> const& array) {
|
||||
return array.Shape(0) == columns_[0].Shape(0);
|
||||
});
|
||||
CHECK(consistent) << "Size of columns should be the same.";
|
||||
batch_ = ColumnarAdapterBatch{columns_};
|
||||
}
|
||||
|
||||
[[nodiscard]] ColumnarAdapterBatch const& Value() const override { return batch_; }
|
||||
|
||||
[[nodiscard]] std::size_t NumRows() const {
|
||||
if (!columns_.empty()) {
|
||||
return columns_.front().shape[0];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
[[nodiscard]] std::size_t NumColumns() const {
|
||||
if (!columns_.empty()) {
|
||||
return columns_.size();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
class FileAdapterBatch {
|
||||
public:
|
||||
class Line {
|
||||
@@ -851,6 +919,5 @@ class SparsePageAdapterBatch {
|
||||
Line GetLine(size_t ridx) const { return Line{page_[ridx].data(), page_[ridx].size(), ridx}; }
|
||||
size_t Size() const { return page_.Size(); }
|
||||
};
|
||||
}; // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
#endif // XGBOOST_DATA_ADAPTER_H_
|
||||
|
||||
@@ -947,38 +947,24 @@ DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, const st
|
||||
return new data::SimpleDMatrix(adapter, missing, nthread, data_split_mode);
|
||||
}
|
||||
|
||||
template DMatrix* DMatrix::Create<data::DenseAdapter>(data::DenseAdapter* adapter, float missing,
|
||||
std::int32_t nthread,
|
||||
const std::string& cache_prefix,
|
||||
DataSplitMode data_split_mode);
|
||||
template DMatrix* DMatrix::Create<data::ArrayAdapter>(data::ArrayAdapter* adapter, float missing,
|
||||
std::int32_t nthread,
|
||||
const std::string& cache_prefix,
|
||||
DataSplitMode data_split_mode);
|
||||
template DMatrix* DMatrix::Create<data::CSRAdapter>(data::CSRAdapter* adapter, float missing,
|
||||
std::int32_t nthread,
|
||||
const std::string& cache_prefix,
|
||||
DataSplitMode data_split_mode);
|
||||
template DMatrix* DMatrix::Create<data::CSCAdapter>(data::CSCAdapter* adapter, float missing,
|
||||
std::int32_t nthread,
|
||||
const std::string& cache_prefix,
|
||||
DataSplitMode data_split_mode);
|
||||
template DMatrix* DMatrix::Create<data::DataTableAdapter>(data::DataTableAdapter* adapter,
|
||||
float missing, std::int32_t nthread,
|
||||
const std::string& cache_prefix,
|
||||
DataSplitMode data_split_mode);
|
||||
template DMatrix* DMatrix::Create<data::FileAdapter>(data::FileAdapter* adapter, float missing,
|
||||
std::int32_t nthread,
|
||||
const std::string& cache_prefix,
|
||||
DataSplitMode data_split_mode);
|
||||
template DMatrix* DMatrix::Create<data::CSRArrayAdapter>(data::CSRArrayAdapter* adapter,
|
||||
float missing, std::int32_t nthread,
|
||||
const std::string& cache_prefix,
|
||||
DataSplitMode data_split_mode);
|
||||
template DMatrix* DMatrix::Create<data::CSCArrayAdapter>(data::CSCArrayAdapter* adapter,
|
||||
float missing, std::int32_t nthread,
|
||||
const std::string& cache_prefix,
|
||||
DataSplitMode data_split_mode);
|
||||
// Instantiate the factory function for various adapters
|
||||
#define INSTANTIATION_CREATE(_AdapterT) \
|
||||
template DMatrix* DMatrix::Create<data::_AdapterT>( \
|
||||
data::_AdapterT * adapter, float missing, std::int32_t nthread, \
|
||||
const std::string& cache_prefix, DataSplitMode data_split_mode);
|
||||
|
||||
INSTANTIATION_CREATE(DenseAdapter)
|
||||
INSTANTIATION_CREATE(ArrayAdapter)
|
||||
INSTANTIATION_CREATE(CSRAdapter)
|
||||
INSTANTIATION_CREATE(CSCAdapter)
|
||||
INSTANTIATION_CREATE(DataTableAdapter)
|
||||
INSTANTIATION_CREATE(FileAdapter)
|
||||
INSTANTIATION_CREATE(CSRArrayAdapter)
|
||||
INSTANTIATION_CREATE(CSCArrayAdapter)
|
||||
INSTANTIATION_CREATE(ColumnarAdapter)
|
||||
|
||||
#undef INSTANTIATION_CREATE
|
||||
|
||||
template DMatrix* DMatrix::Create(
|
||||
data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>* adapter,
|
||||
float missing, int nthread, const std::string& cache_prefix, DataSplitMode data_split_mode);
|
||||
@@ -1156,7 +1142,6 @@ uint64_t SparsePage::Push(const AdapterBatchT& batch, float missing, int nthread
|
||||
builder.InitStorage();
|
||||
|
||||
// Second pass over batch, placing elements in correct position
|
||||
|
||||
auto is_valid = data::IsValidFunctor{missing};
|
||||
#pragma omp parallel num_threads(nthread)
|
||||
{
|
||||
@@ -1253,9 +1238,10 @@ template uint64_t SparsePage::Push(const data::CSCAdapterBatch& batch, float mis
|
||||
template uint64_t SparsePage::Push(const data::DataTableAdapterBatch& batch, float missing,
|
||||
int nthread);
|
||||
template uint64_t SparsePage::Push(const data::FileAdapterBatch& batch, float missing, int nthread);
|
||||
template uint64_t SparsePage::Push(const data::ColumnarAdapterBatch& batch, float missing,
|
||||
std::int32_t nthread);
|
||||
|
||||
namespace data {
|
||||
|
||||
// List of files that will be force linked in static links.
|
||||
DMLC_REGISTRY_LINK_TAG(sparse_page_raw_format);
|
||||
DMLC_REGISTRY_LINK_TAG(gradient_index_format);
|
||||
|
||||
@@ -120,7 +120,7 @@ void GHistIndexMatrix::PushAdapterBatchColumns(Context const *ctx, Batch const &
|
||||
INSTANTIATION_PUSH(data::CSRArrayAdapterBatch)
|
||||
INSTANTIATION_PUSH(data::ArrayAdapterBatch)
|
||||
INSTANTIATION_PUSH(data::SparsePageAdapterBatch)
|
||||
|
||||
INSTANTIATION_PUSH(data::ColumnarAdapterBatch)
|
||||
#undef INSTANTIATION_PUSH
|
||||
|
||||
void GHistIndexMatrix::ResizeIndex(const size_t n_index, const bool isDense) {
|
||||
|
||||
@@ -5,7 +5,22 @@
|
||||
|
||||
#include "proxy_dmatrix.h"
|
||||
|
||||
#include <memory> // for shared_ptr
|
||||
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h" // for DMatrix
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/string_view.h" // for StringView
|
||||
|
||||
namespace xgboost::data {
|
||||
void DMatrixProxy::SetColumnarData(StringView interface_str) {
|
||||
std::shared_ptr<ColumnarAdapter> adapter{new ColumnarAdapter{interface_str}};
|
||||
this->batch_ = adapter;
|
||||
this->Info().num_col_ = adapter->NumColumns();
|
||||
this->Info().num_row_ = adapter->NumRows();
|
||||
this->ctx_.Init(Args{{"device", "cpu"}});
|
||||
}
|
||||
|
||||
void DMatrixProxy::SetArrayData(StringView interface_str) {
|
||||
std::shared_ptr<ArrayAdapter> adapter{new ArrayAdapter{interface_str}};
|
||||
this->batch_ = adapter;
|
||||
|
||||
@@ -62,6 +62,8 @@ class DMatrixProxy : public DMatrix {
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
void SetColumnarData(StringView interface_str);
|
||||
|
||||
void SetArrayData(StringView interface_str);
|
||||
void SetCSRData(char const* c_indptr, char const* c_indices, char const* c_values,
|
||||
bst_feature_t n_features, bool on_host);
|
||||
@@ -151,6 +153,17 @@ decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_
|
||||
if (type_error) {
|
||||
*type_error = false;
|
||||
}
|
||||
} else if (proxy->Adapter().type() == typeid(std::shared_ptr<ColumnarAdapter>)) {
|
||||
if constexpr (get_value) {
|
||||
auto value = std::any_cast<std::shared_ptr<ColumnarAdapter>>(proxy->Adapter())->Value();
|
||||
return fn(value);
|
||||
} else {
|
||||
auto value = std::any_cast<std::shared_ptr<ColumnarAdapter>>(proxy->Adapter());
|
||||
return fn(value);
|
||||
}
|
||||
if (type_error) {
|
||||
*type_error = false;
|
||||
}
|
||||
} else {
|
||||
if (type_error) {
|
||||
*type_error = true;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2014~2023 by XGBoost Contributors
|
||||
* Copyright 2014~2023, XGBoost Contributors
|
||||
* \file simple_dmatrix.cc
|
||||
* \brief the input data structure for gradient boosting
|
||||
* \author Tianqi Chen
|
||||
@@ -356,6 +356,8 @@ template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing,
|
||||
DataSplitMode data_split_mode);
|
||||
template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread,
|
||||
DataSplitMode data_split_mode);
|
||||
template SimpleDMatrix::SimpleDMatrix(ColumnarAdapter* adapter, float missing, int nthread,
|
||||
DataSplitMode data_split_mode);
|
||||
template SimpleDMatrix::SimpleDMatrix(
|
||||
IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>* adapter,
|
||||
float missing, int nthread, DataSplitMode data_split_mode);
|
||||
|
||||
@@ -761,6 +761,9 @@ class CPUPredictor : public Predictor {
|
||||
} else if (x.type() == typeid(std::shared_ptr<data::CSRArrayAdapter>)) {
|
||||
this->DispatchedInplacePredict<data::CSRArrayAdapter, 1>(x, p_m, model, missing, out_preds,
|
||||
tree_begin, tree_end);
|
||||
} else if (x.type() == typeid(std::shared_ptr<data::ColumnarAdapter>)) {
|
||||
this->DispatchedInplacePredict<data::ColumnarAdapter, kBlockOfRowsSize>(
|
||||
x, p_m, model, missing, out_preds, tree_begin, tree_end);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user