Implement iterative DMatrix for CPU. (#8116)

This commit is contained in:
Jiaming Yuan 2022-07-26 22:34:21 +08:00 committed by GitHub
parent 546de5efd2
commit 2c70751d1e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 636 additions and 190 deletions

View File

@ -43,6 +43,7 @@
#include "../src/data/gradient_index_format.cc" #include "../src/data/gradient_index_format.cc"
#include "../src/data/sparse_page_dmatrix.cc" #include "../src/data/sparse_page_dmatrix.cc"
#include "../src/data/proxy_dmatrix.cc" #include "../src/data/proxy_dmatrix.cc"
#include "../src/data/iterative_dmatrix.cc"
// prediction // prediction
#include "../src/predictor/predictor.cc" #include "../src/predictor/predictor.cc"

View File

@ -559,6 +559,7 @@ class DMatrix {
* *
* \param iter External data iterator * \param iter External data iterator
* \param proxy A hanlde to ProxyDMatrix * \param proxy A hanlde to ProxyDMatrix
* \param ref Reference Quantile DMatrix.
* \param reset Callback for reset * \param reset Callback for reset
* \param next Callback for next * \param next Callback for next
* \param missing Value that should be treated as missing. * \param missing Value that should be treated as missing.
@ -567,13 +568,11 @@ class DMatrix {
* *
* \return A created quantile based DMatrix. * \return A created quantile based DMatrix.
*/ */
template <typename DataIterHandle, typename DMatrixHandle, template <typename DataIterHandle, typename DMatrixHandle, typename DataIterResetCallback,
typename DataIterResetCallback, typename XGDMatrixCallbackNext> typename XGDMatrixCallbackNext>
static DMatrix *Create(DataIterHandle iter, DMatrixHandle proxy, static DMatrix* Create(DataIterHandle iter, DMatrixHandle proxy, std::shared_ptr<DMatrix> ref,
DataIterResetCallback *reset, DataIterResetCallback* reset, XGDMatrixCallbackNext* next, float missing,
XGDMatrixCallbackNext *next, float missing, int nthread, bst_bin_t max_bin);
int nthread,
int max_bin);
/** /**
* \brief Create an external memory DMatrix with callbacks. * \brief Create an external memory DMatrix with callbacks.
@ -613,6 +612,7 @@ class DMatrix {
virtual BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam& param) = 0; virtual BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam& param) = 0;
virtual bool EllpackExists() const = 0; virtual bool EllpackExists() const = 0;
virtual bool GHistIndexExists() const = 0;
virtual bool SparsePageExists() const = 0; virtual bool SparsePageExists() const = 0;
}; };
@ -621,11 +621,16 @@ inline BatchSet<SparsePage> DMatrix::GetBatches() {
return GetRowBatches(); return GetRowBatches();
} }
template<> template <>
inline bool DMatrix::PageExists<EllpackPage>() const { inline bool DMatrix::PageExists<EllpackPage>() const {
return this->EllpackExists(); return this->EllpackExists();
} }
template <>
inline bool DMatrix::PageExists<GHistIndexMatrix>() const {
return this->GHistIndexExists();
}
template<> template<>
inline bool DMatrix::PageExists<SparsePage>() const { inline bool DMatrix::PageExists<SparsePage>() const {
return this->SparsePageExists(); return this->SparsePageExists();

View File

@ -275,13 +275,14 @@ XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy
API_END(); API_END();
} }
XGB_DLL int XGDeviceQuantileDMatrixCreateFromCallback( XGB_DLL int XGDeviceQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy,
DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset, DataIterResetCallback *reset,
XGDMatrixCallbackNext *next, float missing, int nthread, XGDMatrixCallbackNext *next, float missing,
int max_bin, DMatrixHandle *out) { int nthread, int max_bin,
DMatrixHandle *out) {
API_BEGIN(); API_BEGIN();
*out = new std::shared_ptr<xgboost::DMatrix>{ *out = new std::shared_ptr<xgboost::DMatrix>{
xgboost::DMatrix::Create(iter, proxy, reset, next, missing, nthread, max_bin)}; xgboost::DMatrix::Create(iter, proxy, nullptr, reset, next, missing, nthread, max_bin)};
API_END(); API_END();
} }

View File

@ -931,15 +931,13 @@ DMatrix* DMatrix::Load(const std::string& uri,
} }
return dmat; return dmat;
} }
template <typename DataIterHandle, typename DMatrixHandle,
typename DataIterResetCallback, typename XGDMatrixCallbackNext> template <typename DataIterHandle, typename DMatrixHandle, typename DataIterResetCallback,
DMatrix *DMatrix::Create(DataIterHandle iter, DMatrixHandle proxy, typename XGDMatrixCallbackNext>
DataIterResetCallback *reset, DMatrix* DMatrix::Create(DataIterHandle iter, DMatrixHandle proxy, std::shared_ptr<DMatrix> ref,
XGDMatrixCallbackNext *next, float missing, DataIterResetCallback* reset, XGDMatrixCallbackNext* next, float missing,
int nthread, int nthread, bst_bin_t max_bin) {
int max_bin) { return new data::IterativeDMatrix(iter, proxy, ref, reset, next, missing, nthread, max_bin);
return new data::IterativeDMatrix(iter, proxy, reset, next, missing,
nthread, max_bin);
} }
template <typename DataIterHandle, typename DMatrixHandle, template <typename DataIterHandle, typename DMatrixHandle,
@ -953,11 +951,12 @@ DMatrix *DMatrix::Create(DataIterHandle iter, DMatrixHandle proxy,
cache); cache);
} }
template DMatrix *DMatrix::Create<DataIterHandle, DMatrixHandle, template DMatrix* DMatrix::Create<DataIterHandle, DMatrixHandle, DataIterResetCallback,
DataIterResetCallback, XGDMatrixCallbackNext>( XGDMatrixCallbackNext>(DataIterHandle iter, DMatrixHandle proxy,
DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset, std::shared_ptr<DMatrix> ref,
XGDMatrixCallbackNext *next, float missing, int nthread, DataIterResetCallback* reset,
int max_bin); XGDMatrixCallbackNext* next, float missing,
int nthread, int max_bin);
template DMatrix *DMatrix::Create<DataIterHandle, DMatrixHandle, template DMatrix *DMatrix::Create<DataIterHandle, DMatrixHandle,
DataIterResetCallback, XGDMatrixCallbackNext>( DataIterResetCallback, XGDMatrixCallbackNext>(

View File

@ -0,0 +1,214 @@
/*!
* Copyright 2022 XGBoost contributors
*/
#include "iterative_dmatrix.h"
#include <rabit/rabit.h>
#include "../common/column_matrix.h"
#include "../common/hist_util.h"
#include "gradient_index.h"
#include "proxy_dmatrix.h"
#include "simple_batch_iterator.h"
namespace xgboost {
namespace data {
void GetCutsFromRef(std::shared_ptr<DMatrix> ref_, bst_feature_t n_features, BatchParam p,
common::HistogramCuts* p_cuts) {
CHECK(ref_);
CHECK(p_cuts);
auto csr = [&]() {
for (auto const& page : ref_->GetBatches<GHistIndexMatrix>(p)) {
*p_cuts = page.cut;
break;
}
};
auto ellpack = [&]() {
for (auto const& page : ref_->GetBatches<EllpackPage>(p)) {
GetCutsFromEllpack(page, p_cuts);
break;
}
};
if (ref_->PageExists<GHistIndexMatrix>()) {
csr();
} else if (ref_->PageExists<EllpackPage>()) {
ellpack();
} else {
if (p.gpu_id == Context::kCpuId) {
csr();
} else {
ellpack();
}
}
CHECK_EQ(ref_->Info().num_col_, n_features)
<< "Invalid ref DMatrix, different number of features.";
}
void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
std::shared_ptr<DMatrix> ref) {
DMatrixProxy* proxy = MakeProxy(proxy_);
CHECK(proxy);
// The external iterator
auto iter =
DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{iter_handle, reset_, next_};
common::HistogramCuts cuts;
auto num_rows = [&]() {
return HostAdapterDispatch(proxy, [](auto const& value) { return value.Size(); });
};
auto num_cols = [&]() {
return HostAdapterDispatch(proxy, [](auto const& value) { return value.NumCols(); });
};
std::vector<size_t> column_sizes;
auto const is_valid = data::IsValidFunctor{missing};
auto nnz_cnt = [&]() {
return HostAdapterDispatch(proxy, [&](auto const& value) {
size_t n_threads = ctx_.Threads();
size_t n_features = column_sizes.size();
linalg::Tensor<size_t, 2> column_sizes_tloc({n_threads, n_features}, Context::kCpuId);
auto view = column_sizes_tloc.HostView();
common::ParallelFor(value.Size(), n_threads, common::Sched::Static(256), [&](auto i) {
auto const& line = value.GetLine(i);
for (size_t j = 0; j < line.Size(); ++j) {
data::COOTuple const& elem = line.GetElement(j);
if (is_valid(elem)) {
view(omp_get_thread_num(), elem.column_idx)++;
}
}
});
auto ptr = column_sizes_tloc.Data()->HostPointer();
auto result = std::accumulate(ptr, ptr + column_sizes_tloc.Size(), static_cast<size_t>(0));
for (size_t tidx = 0; tidx < n_threads; ++tidx) {
for (size_t fidx = 0; fidx < n_features; ++fidx) {
column_sizes[fidx] += view(tidx, fidx);
}
}
return result;
});
};
size_t n_features = 0;
size_t n_batches = 0;
size_t accumulated_rows{0};
size_t nnz{0};
/**
* CPU impl needs an additional loop for accumulating the column size.
*/
std::unique_ptr<common::HostSketchContainer> p_sketch;
std::vector<size_t> batch_nnz;
do {
// We use do while here as the first batch is fetched in ctor
if (n_features == 0) {
n_features = num_cols();
rabit::Allreduce<rabit::op::Max>(&n_features, 1);
column_sizes.resize(n_features);
info_.num_col_ = n_features;
} else {
CHECK_EQ(n_features, num_cols()) << "Inconsistent number of columns.";
}
size_t batch_size = num_rows();
batch_nnz.push_back(nnz_cnt());
nnz += batch_nnz.back();
accumulated_rows += batch_size;
n_batches++;
} while (iter.Next());
iter.Reset();
// From here on Info() has the correct data shape
Info().num_row_ = accumulated_rows;
Info().num_nonzero_ = nnz;
rabit::Allreduce<rabit::op::Max>(&info_.num_col_, 1);
CHECK(std::none_of(column_sizes.cbegin(), column_sizes.cend(), [&](auto f) {
return f > accumulated_rows;
})) << "Something went wrong during iteration.";
/**
* Generate quantiles
*/
accumulated_rows = 0;
if (ref) {
GetCutsFromRef(ref, Info().num_col_, batch_param_, &cuts);
} else {
size_t i = 0;
while (iter.Next()) {
if (!p_sketch) {
p_sketch.reset(new common::HostSketchContainer{batch_param_.max_bin,
proxy->Info().feature_types.ConstHostSpan(),
column_sizes, false, ctx_.Threads()});
}
HostAdapterDispatch(proxy, [&](auto const& batch) {
proxy->Info().num_nonzero_ = batch_nnz[i];
// We don't need base row idx here as Info is from proxy and the number of rows in
// it is consistent with data batch.
p_sketch->PushAdapterBatch(batch, 0, proxy->Info(), missing);
});
accumulated_rows += num_rows();
++i;
}
iter.Reset();
CHECK_EQ(accumulated_rows, Info().num_row_);
CHECK(p_sketch);
p_sketch->MakeCuts(&cuts);
}
/**
* Generate gradient index.
*/
this->ghist_ = std::make_unique<GHistIndexMatrix>(Info(), std::move(cuts), batch_param_.max_bin);
size_t rbegin = 0;
size_t prev_sum = 0;
size_t i = 0;
while (iter.Next()) {
HostAdapterDispatch(proxy, [&](auto const& batch) {
proxy->Info().num_nonzero_ = batch_nnz[i];
this->ghist_->PushAdapterBatch(&ctx_, rbegin, prev_sum, batch, missing,
proxy->Info().feature_types.ConstHostSpan(),
batch_param_.sparse_thresh, Info().num_row_);
});
if (n_batches != 1) {
this->info_.Extend(std::move(proxy->Info()), false, true);
}
size_t batch_size = num_rows();
prev_sum = this->ghist_->row_ptr[rbegin + batch_size];
rbegin += batch_size;
++i;
}
iter.Reset();
CHECK_EQ(rbegin, Info().num_row_);
/**
* Generate column matrix
*/
accumulated_rows = 0;
while (iter.Next()) {
HostAdapterDispatch(proxy, [&](auto const& batch) {
this->ghist_->PushAdapterBatchColumns(&ctx_, batch, missing, accumulated_rows);
});
accumulated_rows += num_rows();
}
iter.Reset();
CHECK_EQ(accumulated_rows, Info().num_row_);
if (n_batches == 1) {
this->info_ = std::move(proxy->Info());
this->info_.num_nonzero_ = nnz;
CHECK_EQ(proxy->Info().labels.Size(), 0);
}
}
BatchSet<GHistIndexMatrix> IterativeDMatrix::GetGradientIndex(BatchParam const& param) {
CheckParam(param);
CHECK(ghist_) << "Not initialized with CPU data";
auto begin_iter =
BatchIterator<GHistIndexMatrix>(new SimpleBatchIteratorImpl<GHistIndexMatrix>(ghist_));
return BatchSet<GHistIndexMatrix>(begin_iter);
}
} // namespace data
} // namespace xgboost

View File

@ -1,44 +1,43 @@
/*! /*!
* Copyright 2020-2022 XGBoost contributors * Copyright 2020-2022 XGBoost contributors
*/ */
#include <algorithm>
#include <memory> #include <memory>
#include <type_traits> #include <type_traits>
#include <algorithm>
#include "../common/hist_util.cuh" #include "../common/hist_util.cuh"
#include "simple_batch_iterator.h"
#include "iterative_dmatrix.h"
#include "sparse_page_source.h"
#include "ellpack_page.cuh"
#include "proxy_dmatrix.h"
#include "proxy_dmatrix.cuh"
#include "device_adapter.cuh" #include "device_adapter.cuh"
#include "ellpack_page.cuh"
#include "iterative_dmatrix.h"
#include "proxy_dmatrix.cuh"
#include "proxy_dmatrix.h"
#include "simple_batch_iterator.h"
#include "sparse_page_source.h"
namespace xgboost { namespace xgboost {
namespace data { namespace data {
void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing) { void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
std::shared_ptr<DMatrix> ref) {
// A handle passed to external iterator. // A handle passed to external iterator.
DMatrixProxy* proxy = MakeProxy(proxy_); DMatrixProxy* proxy = MakeProxy(proxy_);
CHECK(proxy); CHECK(proxy);
// The external iterator // The external iterator
auto iter = DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{ auto iter =
iter_handle, reset_, next_}; DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{iter_handle, reset_, next_};
dh::XGBCachingDeviceAllocator<char> alloc; dh::XGBCachingDeviceAllocator<char> alloc;
auto num_rows = [&]() { auto num_rows = [&]() {
return Dispatch(proxy, [](auto const &value) { return value.NumRows(); }); return Dispatch(proxy, [](auto const& value) { return value.NumRows(); });
}; };
auto num_cols = [&]() { auto num_cols = [&]() {
return Dispatch(proxy, [](auto const &value) { return value.NumCols(); }); return Dispatch(proxy, [](auto const& value) { return value.NumCols(); });
}; };
size_t row_stride = 0; size_t row_stride = 0;
size_t nnz = 0; size_t nnz = 0;
// Sketch for all batches. // Sketch for all batches.
iter.Reset();
std::vector<common::SketchContainer> sketch_containers; std::vector<common::SketchContainer> sketch_containers;
size_t batches = 0; size_t batches = 0;
size_t accumulated_rows = 0; size_t accumulated_rows = 0;
@ -52,69 +51,77 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing) {
return d; return d;
}; };
while (iter.Next()) { /**
* Generate quantiles
*/
common::HistogramCuts cuts;
do {
// We use do while here as the first batch is fetched in ctor
ctx_.gpu_id = proxy->DeviceIdx(); ctx_.gpu_id = proxy->DeviceIdx();
CHECK_LT(ctx_.gpu_id, common::AllVisibleGPUs()); CHECK_LT(ctx_.gpu_id, common::AllVisibleGPUs());
dh::safe_cuda(cudaSetDevice(get_device())); dh::safe_cuda(cudaSetDevice(get_device()));
if (cols == 0) { if (cols == 0) {
cols = num_cols(); cols = num_cols();
rabit::Allreduce<rabit::op::Max>(&cols, 1); rabit::Allreduce<rabit::op::Max>(&cols, 1);
this->info_.num_col_ = cols;
} else { } else {
CHECK_EQ(cols, num_cols()) << "Inconsistent number of columns."; CHECK_EQ(cols, num_cols()) << "Inconsistent number of columns.";
} }
sketch_containers.emplace_back(proxy->Info().feature_types, if (!ref) {
batch_param_.max_bin, cols, num_rows(), get_device()); sketch_containers.emplace_back(proxy->Info().feature_types, batch_param_.max_bin, cols,
auto* p_sketch = &sketch_containers.back(); num_rows(), get_device());
proxy->Info().weights_.SetDevice(get_device()); auto* p_sketch = &sketch_containers.back();
Dispatch(proxy, [&](auto const &value) { proxy->Info().weights_.SetDevice(get_device());
common::AdapterDeviceSketch(value, batch_param_.max_bin, Dispatch(proxy, [&](auto const& value) {
proxy->Info(), missing, p_sketch); common::AdapterDeviceSketch(value, batch_param_.max_bin, proxy->Info(), missing, p_sketch);
}); });
}
auto batch_rows = num_rows(); auto batch_rows = num_rows();
accumulated_rows += batch_rows; accumulated_rows += batch_rows;
dh::caching_device_vector<size_t> row_counts(batch_rows + 1, 0); dh::caching_device_vector<size_t> row_counts(batch_rows + 1, 0);
common::Span<size_t> row_counts_span(row_counts.data().get(), common::Span<size_t> row_counts_span(row_counts.data().get(), row_counts.size());
row_counts.size()); row_stride = std::max(row_stride, Dispatch(proxy, [=](auto const& value) {
row_stride = std::max(row_stride, Dispatch(proxy, [=](auto const &value) { return GetRowCounts(value, row_counts_span, get_device(), missing);
return GetRowCounts(value, row_counts_span, }));
get_device(), missing); nnz += thrust::reduce(thrust::cuda::par(alloc), row_counts.begin(), row_counts.end());
}));
nnz += thrust::reduce(thrust::cuda::par(alloc), row_counts.begin(),
row_counts.end());
batches++; batches++;
} } while (iter.Next());
iter.Reset(); iter.Reset();
dh::safe_cuda(cudaSetDevice(get_device())); dh::safe_cuda(cudaSetDevice(get_device()));
HostDeviceVector<FeatureType> ft; if (!ref) {
common::SketchContainer final_sketch( HostDeviceVector<FeatureType> ft;
sketch_containers.empty() ? ft : sketch_containers.front().FeatureTypes(), common::SketchContainer final_sketch(
batch_param_.max_bin, cols, accumulated_rows, get_device()); sketch_containers.empty() ? ft : sketch_containers.front().FeatureTypes(),
for (auto const& sketch : sketch_containers) { batch_param_.max_bin, cols, accumulated_rows, get_device());
final_sketch.Merge(sketch.ColumnsPtr(), sketch.Data()); for (auto const& sketch : sketch_containers) {
final_sketch.FixError(); final_sketch.Merge(sketch.ColumnsPtr(), sketch.Data());
final_sketch.FixError();
}
sketch_containers.clear();
sketch_containers.shrink_to_fit();
final_sketch.MakeCuts(&cuts);
} else {
GetCutsFromRef(ref, Info().num_col_, batch_param_, &cuts);
} }
sketch_containers.clear();
sketch_containers.shrink_to_fit();
common::HistogramCuts cuts;
final_sketch.MakeCuts(&cuts);
this->info_.num_col_ = cols;
this->info_.num_row_ = accumulated_rows; this->info_.num_row_ = accumulated_rows;
this->info_.num_nonzero_ = nnz; this->info_.num_nonzero_ = nnz;
auto init_page = [this, &proxy, &cuts, row_stride, accumulated_rows, auto init_page = [this, &proxy, &cuts, row_stride, accumulated_rows, get_device]() {
get_device]() { if (!ellpack_) {
if (!page_) {
// Should be put inside the while loop to protect against empty batch. In // Should be put inside the while loop to protect against empty batch. In
// that case device id is invalid. // that case device id is invalid.
page_.reset(new EllpackPage); ellpack_.reset(new EllpackPage);
*(page_->Impl()) = EllpackPageImpl(get_device(), cuts, this->IsDense(), *(ellpack_->Impl()) =
row_stride, accumulated_rows); EllpackPageImpl(get_device(), cuts, this->IsDense(), row_stride, accumulated_rows);
} }
}; };
// Construct the final ellpack page. /**
* Generate gradient index.
*/
size_t offset = 0; size_t offset = 0;
iter.Reset(); iter.Reset();
size_t n_batches_for_verification = 0; size_t n_batches_for_verification = 0;
@ -123,11 +130,10 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing) {
dh::safe_cuda(cudaSetDevice(get_device())); dh::safe_cuda(cudaSetDevice(get_device()));
auto rows = num_rows(); auto rows = num_rows();
dh::caching_device_vector<size_t> row_counts(rows + 1, 0); dh::caching_device_vector<size_t> row_counts(rows + 1, 0);
common::Span<size_t> row_counts_span(row_counts.data().get(), common::Span<size_t> row_counts_span(row_counts.data().get(), row_counts.size());
row_counts.size());
Dispatch(proxy, [=](auto const& value) { Dispatch(proxy, [=](auto const& value) {
return GetRowCounts(value, row_counts_span, get_device(), missing); return GetRowCounts(value, row_counts_span, get_device(), missing);
}); });
auto is_dense = this->IsDense(); auto is_dense = this->IsDense();
proxy->Info().feature_types.SetDevice(get_device()); proxy->Info().feature_types.SetDevice(get_device());
@ -136,7 +142,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing) {
return EllpackPageImpl(value, missing, get_device(), is_dense, row_counts_span, return EllpackPageImpl(value, missing, get_device(), is_dense, row_counts_span,
d_feature_types, row_stride, rows, cuts); d_feature_types, row_stride, rows, cuts);
}); });
size_t num_elements = page_->Impl()->Copy(get_device(), &new_impl, offset); size_t num_elements = ellpack_->Impl()->Copy(get_device(), &new_impl, offset);
offset += num_elements; offset += num_elements;
proxy->Info().num_row_ = num_rows(); proxy->Info().num_row_ = num_rows();
@ -160,15 +166,15 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing) {
rabit::Allreduce<rabit::op::Max>(&info_.num_col_, 1); rabit::Allreduce<rabit::op::Max>(&info_.num_col_, 1);
} }
BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(const BatchParam& param) { BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(BatchParam const& param) {
CHECK(page_); CheckParam(param);
// FIXME(Jiamingy): https://github.com/dmlc/xgboost/issues/7976 CHECK(ellpack_) << "Not initialized with GPU data";
if (param.max_bin != batch_param_.max_bin) { auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));
LOG(WARNING) << "Inconsistent max_bin between Quantile DMatrix and Booster:" << param.max_bin
<< " vs. " << batch_param_.max_bin;
}
auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(page_));
return BatchSet<EllpackPage>(begin_iter); return BatchSet<EllpackPage>(begin_iter);
} }
void GetCutsFromEllpack(EllpackPage const& page, common::HistogramCuts* cuts) {
*cuts = page.Impl()->Cuts();
}
} // namespace data } // namespace data
} // namespace xgboost } // namespace xgboost

View File

@ -5,45 +5,87 @@
#ifndef XGBOOST_DATA_ITERATIVE_DMATRIX_H_ #ifndef XGBOOST_DATA_ITERATIVE_DMATRIX_H_
#define XGBOOST_DATA_ITERATIVE_DMATRIX_H_ #define XGBOOST_DATA_ITERATIVE_DMATRIX_H_
#include <vector> #include <memory>
#include <string> #include <string>
#include <utility> #include <utility>
#include <memory> #include <vector>
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/c_api.h"
#include "proxy_dmatrix.h" #include "proxy_dmatrix.h"
#include "simple_batch_iterator.h" #include "simple_batch_iterator.h"
#include "xgboost/base.h"
#include "xgboost/c_api.h"
#include "xgboost/data.h"
namespace xgboost { namespace xgboost {
namespace common {
class HistogramCuts;
}
namespace data { namespace data {
class IterativeDMatrix : public DMatrix { class IterativeDMatrix : public DMatrix {
MetaInfo info_; MetaInfo info_;
Context ctx_; Context ctx_;
BatchParam batch_param_; BatchParam batch_param_;
std::shared_ptr<EllpackPage> page_; std::shared_ptr<EllpackPage> ellpack_;
std::shared_ptr<GHistIndexMatrix> ghist_;
DMatrixHandle proxy_; DMatrixHandle proxy_;
DataIterResetCallback *reset_; DataIterResetCallback *reset_;
XGDMatrixCallbackNext *next_; XGDMatrixCallbackNext *next_;
public: void CheckParam(BatchParam const &param) {
void InitFromCUDA(DataIterHandle iter, float missing); // FIXME(Jiamingy): https://github.com/dmlc/xgboost/issues/7976
if (param.max_bin != batch_param_.max_bin && param.max_bin != 0) {
LOG(WARNING) << "Inconsistent max_bin between Quantile DMatrix and Booster:" << param.max_bin
<< " vs. " << batch_param_.max_bin;
}
CHECK(!param.regen) << "Only `hist` and `gpu_hist` tree method can use `QuantileDMatrix`.";
}
template <typename Page>
static auto InvalidTreeMethod() {
LOG(FATAL) << "Only `hist` and `gpu_hist` tree method can use `QuantileDMatrix`.";
return BatchSet<Page>(BatchIterator<Page>(nullptr));
}
public: public:
explicit IterativeDMatrix(DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset, void InitFromCUDA(DataIterHandle iter, float missing, std::shared_ptr<DMatrix> ref);
XGDMatrixCallbackNext *next, float missing, int nthread, int max_bin) void InitFromCPU(DataIterHandle iter_handle, float missing, std::shared_ptr<DMatrix> ref);
public:
explicit IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy,
std::shared_ptr<DMatrix> ref, DataIterResetCallback *reset,
XGDMatrixCallbackNext *next, float missing, int nthread,
bst_bin_t max_bin)
: proxy_{proxy}, reset_{reset}, next_{next} { : proxy_{proxy}, reset_{reset}, next_{next} {
batch_param_ = BatchParam{MakeProxy(proxy_)->DeviceIdx(), max_bin}; // fetch the first batch
auto iter =
DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{iter_handle, reset_, next_};
iter.Reset();
bool valid = iter.Next();
CHECK(valid) << "Iterative DMatrix must have at least 1 batch.";
auto d = MakeProxy(proxy_)->DeviceIdx();
if (batch_param_.gpu_id != Context::kCpuId) {
CHECK_EQ(d, batch_param_.gpu_id) << "All batch should be on the same device.";
}
batch_param_ = BatchParam{d, max_bin};
batch_param_.sparse_thresh = 0.2; // default from TrainParam
ctx_.UpdateAllowUnknown(Args{{"nthread", std::to_string(nthread)}}); ctx_.UpdateAllowUnknown(Args{{"nthread", std::to_string(nthread)}});
this->InitFromCUDA(iter, missing); if (d == Context::kCpuId) {
this->InitFromCPU(iter_handle, missing, ref);
} else {
this->InitFromCUDA(iter_handle, missing, ref);
}
} }
~IterativeDMatrix() override = default; ~IterativeDMatrix() override = default;
bool EllpackExists() const override { return true; } bool EllpackExists() const override { return static_cast<bool>(ellpack_); }
bool GHistIndexExists() const override { return static_cast<bool>(ghist_); }
bool SparsePageExists() const override { return false; } bool SparsePageExists() const override { return false; }
DMatrix *Slice(common::Span<int32_t const>) override { DMatrix *Slice(common::Span<int32_t const>) override {
LOG(FATAL) << "Slicing DMatrix is not supported for Quantile DMatrix."; LOG(FATAL) << "Slicing DMatrix is not supported for Quantile DMatrix.";
return nullptr; return nullptr;
@ -52,20 +94,13 @@ class IterativeDMatrix : public DMatrix {
LOG(FATAL) << "Not implemented."; LOG(FATAL) << "Not implemented.";
return BatchSet<SparsePage>(BatchIterator<SparsePage>(nullptr)); return BatchSet<SparsePage>(BatchIterator<SparsePage>(nullptr));
} }
BatchSet<CSCPage> GetColumnBatches() override { BatchSet<CSCPage> GetColumnBatches() override { return InvalidTreeMethod<CSCPage>(); }
LOG(FATAL) << "Not implemented.";
return BatchSet<CSCPage>(BatchIterator<CSCPage>(nullptr));
}
BatchSet<SortedCSCPage> GetSortedColumnBatches() override { BatchSet<SortedCSCPage> GetSortedColumnBatches() override {
LOG(FATAL) << "Not implemented."; return InvalidTreeMethod<SortedCSCPage>();
return BatchSet<SortedCSCPage>(BatchIterator<SortedCSCPage>(nullptr));
}
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam&) override {
LOG(FATAL) << "Not implemented.";
return BatchSet<GHistIndexMatrix>(BatchIterator<GHistIndexMatrix>(nullptr));
} }
BatchSet<GHistIndexMatrix> GetGradientIndex(BatchParam const &param) override;
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) override; BatchSet<EllpackPage> GetEllpackBatches(const BatchParam &param) override;
bool SingleColBlock() const override { return true; } bool SingleColBlock() const override { return true; }
@ -75,20 +110,34 @@ class IterativeDMatrix : public DMatrix {
Context const *Ctx() const override { return &ctx_; } Context const *Ctx() const override { return &ctx_; }
}; };
/**
* \brief Get quantile cuts from reference Quantile DMatrix.
*/
void GetCutsFromRef(std::shared_ptr<DMatrix> ref_, bst_feature_t n_features, BatchParam p,
common::HistogramCuts *p_cuts);
/**
* \brief Get quantile cuts from ellpack page.
*/
void GetCutsFromEllpack(EllpackPage const &page, common::HistogramCuts *cuts);
#if !defined(XGBOOST_USE_CUDA) #if !defined(XGBOOST_USE_CUDA)
inline void IterativeDMatrix::InitFromCUDA(DataIterHandle iter, float missing) { inline void IterativeDMatrix::InitFromCUDA(DataIterHandle iter, float missing,
std::shared_ptr<DMatrix> ref) {
// silent the warning about unused variables. // silent the warning about unused variables.
(void)(proxy_); (void)(proxy_);
(void)(reset_); (void)(reset_);
(void)(next_); (void)(next_);
common::AssertGPUSupport(); common::AssertGPUSupport();
} }
inline BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(const BatchParam& param) { inline BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(const BatchParam &param) {
common::AssertGPUSupport(); common::AssertGPUSupport();
auto begin_iter = auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));
BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(page_));
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter)); return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));
} }
inline void GetCutsFromEllpack(EllpackPage const &, common::HistogramCuts *) {
common::AssertGPUSupport();
}
#endif // !defined(XGBOOST_USE_CUDA) #endif // !defined(XGBOOST_USE_CUDA)
} // namespace data } // namespace data
} // namespace xgboost } // namespace xgboost

View File

@ -8,22 +8,22 @@
namespace xgboost { namespace xgboost {
namespace data { namespace data {
void DMatrixProxy::SetArrayData(char const *c_interface) { void DMatrixProxy::SetArrayData(char const *c_interface) {
std::shared_ptr<ArrayAdapter> adapter{ std::shared_ptr<ArrayAdapter> adapter{new ArrayAdapter(StringView{c_interface})};
new ArrayAdapter(StringView{c_interface})};
this->batch_ = adapter; this->batch_ = adapter;
this->Info().num_col_ = adapter->NumColumns(); this->Info().num_col_ = adapter->NumColumns();
this->Info().num_row_ = adapter->NumRows(); this->Info().num_row_ = adapter->NumRows();
this->ctx_.gpu_id = Context::kCpuId;
} }
void DMatrixProxy::SetCSRData(char const *c_indptr, char const *c_indices, void DMatrixProxy::SetCSRData(char const *c_indptr, char const *c_indices,
char const *c_values, bst_feature_t n_features, bool on_host) { char const *c_values, bst_feature_t n_features, bool on_host) {
CHECK(on_host) << "Not implemented on device."; CHECK(on_host) << "Not implemented on device.";
std::shared_ptr<CSRArrayAdapter> adapter{ std::shared_ptr<CSRArrayAdapter> adapter{new CSRArrayAdapter(
new CSRArrayAdapter(StringView{c_indptr}, StringView{c_indices}, StringView{c_indptr}, StringView{c_indices}, StringView{c_values}, n_features)};
StringView{c_values}, n_features)};
this->batch_ = adapter; this->batch_ = adapter;
this->Info().num_col_ = adapter->NumColumns(); this->Info().num_col_ = adapter->NumColumns();
this->Info().num_row_ = adapter->NumRows(); this->Info().num_row_ = adapter->NumRows();
this->ctx_.gpu_id = Context::kCpuId;
} }
} // namespace data } // namespace data
} // namespace xgboost } // namespace xgboost

View File

@ -16,6 +16,7 @@ void DMatrixProxy::FromCudaColumnar(StringView interface_str) {
this->Info().num_row_ = adapter->NumRows(); this->Info().num_row_ = adapter->NumRows();
if (ctx_.gpu_id < 0) { if (ctx_.gpu_id < 0) {
CHECK_EQ(this->Info().num_row_, 0); CHECK_EQ(this->Info().num_row_, 0);
ctx_.gpu_id = dh::CurrentDevice();
} }
} }
@ -27,6 +28,7 @@ void DMatrixProxy::FromCudaArray(StringView interface_str) {
this->Info().num_row_ = adapter->NumRows(); this->Info().num_row_ = adapter->NumRows();
if (ctx_.gpu_id < 0) { if (ctx_.gpu_id < 0) {
CHECK_EQ(this->Info().num_row_, 0); CHECK_EQ(this->Info().num_row_, 0);
ctx_.gpu_id = dh::CurrentDevice();
} }
} }
} // namespace data } // namespace data

View File

@ -65,9 +65,6 @@ class DMatrixProxy : public DMatrix {
} else { } else {
this->FromCudaArray(interface_str); this->FromCudaArray(interface_str);
} }
if (this->info_.num_row_ == 0) {
this->ctx_.gpu_id = Context::kCpuId;
}
#endif // defined(XGBOOST_USE_CUDA) #endif // defined(XGBOOST_USE_CUDA)
} }
@ -80,9 +77,11 @@ class DMatrixProxy : public DMatrix {
MetaInfo const& Info() const override { return info_; } MetaInfo const& Info() const override { return info_; }
Context const* Ctx() const override { return &ctx_; } Context const* Ctx() const override { return &ctx_; }
bool SingleColBlock() const override { return true; } bool SingleColBlock() const override { return false; }
bool EllpackExists() const override { return true; } bool EllpackExists() const override { return false; }
bool GHistIndexExists() const override { return false; }
bool SparsePageExists() const override { return false; } bool SparsePageExists() const override { return false; }
DMatrix* Slice(common::Span<int32_t const> /*ridxs*/) override { DMatrix* Slice(common::Span<int32_t const> /*ridxs*/) override {
LOG(FATAL) << "Slicing DMatrix is not supported for Proxy DMatrix."; LOG(FATAL) << "Slicing DMatrix is not supported for Proxy DMatrix.";
return nullptr; return nullptr;

View File

@ -55,12 +55,9 @@ class SimpleDMatrix : public DMatrix {
std::shared_ptr<GHistIndexMatrix> gradient_index_{nullptr}; std::shared_ptr<GHistIndexMatrix> gradient_index_{nullptr};
BatchParam batch_param_; BatchParam batch_param_;
bool EllpackExists() const override { bool EllpackExists() const override { return static_cast<bool>(ellpack_page_); }
return static_cast<bool>(ellpack_page_); bool GHistIndexExists() const override { return static_cast<bool>(gradient_index_); }
} bool SparsePageExists() const override { return true; }
bool SparsePageExists() const override {
return true;
}
private: private:
Context ctx_; Context ctx_;

View File

@ -120,15 +120,11 @@ class SparsePageDMatrix : public DMatrix {
std::shared_ptr<EllpackPageSource> ellpack_page_source_; std::shared_ptr<EllpackPageSource> ellpack_page_source_;
std::shared_ptr<CSCPageSource> column_source_; std::shared_ptr<CSCPageSource> column_source_;
std::shared_ptr<SortedCSCPageSource> sorted_column_source_; std::shared_ptr<SortedCSCPageSource> sorted_column_source_;
std::shared_ptr<GHistIndexMatrix> ghist_index_page_; // hist
std::shared_ptr<GradientIndexPageSource> ghist_index_source_; std::shared_ptr<GradientIndexPageSource> ghist_index_source_;
bool EllpackExists() const override { bool EllpackExists() const override { return static_cast<bool>(ellpack_page_source_); }
return static_cast<bool>(ellpack_page_source_); bool GHistIndexExists() const override { return static_cast<bool>(ghist_index_source_); }
} bool SparsePageExists() const override { return static_cast<bool>(sparse_page_source_); }
bool SparsePageExists() const override {
return static_cast<bool>(sparse_page_source_);
}
}; };
inline std::string MakeId(std::string prefix, SparsePageDMatrix *ptr) { inline std::string MakeId(std::string prefix, SparsePageDMatrix *ptr) {

View File

@ -12,6 +12,7 @@
#include "../common/math.h" #include "../common/math.h"
#include "../common/threading_utils.h" #include "../common/threading_utils.h"
#include "../data/adapter.h" #include "../data/adapter.h"
#include "../data/gradient_index.h"
#include "../data/proxy_dmatrix.h" #include "../data/proxy_dmatrix.h"
#include "../gbm/gbtree_model.h" #include "../gbm/gbtree_model.h"
#include "predict_fn.h" #include "predict_fn.h"
@ -125,30 +126,71 @@ void FVecDrop(const size_t block_size, const size_t batch_offset, DataView* batc
} }
} }
template <size_t kUnrollLen = 8> namespace {
static size_t constexpr kUnroll = 8;
} // anonymous namespace
struct SparsePageView { struct SparsePageView {
bst_row_t base_rowid; bst_row_t base_rowid;
HostSparsePageView view; HostSparsePageView view;
static size_t constexpr kUnroll = kUnrollLen;
explicit SparsePageView(SparsePage const *p) explicit SparsePageView(SparsePage const *p) : base_rowid{p->base_rowid} { view = p->GetView(); }
: base_rowid{p->base_rowid} {
view = p->GetView();
}
SparsePage::Inst operator[](size_t i) { return view[i]; } SparsePage::Inst operator[](size_t i) { return view[i]; }
size_t Size() const { return view.Size(); } size_t Size() const { return view.Size(); }
}; };
template <typename Adapter, size_t kUnrollLen = 8> struct GHistIndexMatrixView {
private:
GHistIndexMatrix const &page_;
uint64_t n_features_;
common::Span<FeatureType const> ft_;
common::Span<Entry> workspace_;
std::vector<size_t> current_unroll_;
public:
size_t base_rowid;
public:
GHistIndexMatrixView(GHistIndexMatrix const &_page, uint64_t n_feat,
common::Span<FeatureType const> ft, common::Span<Entry> workplace,
int32_t n_threads)
: page_{_page},
n_features_{n_feat},
ft_{ft},
workspace_{workplace},
current_unroll_(n_threads > 0 ? n_threads : 1, 0),
base_rowid{_page.base_rowid} {}
SparsePage::Inst operator[](size_t r) {
auto t = omp_get_thread_num();
auto const beg = (n_features_ * kUnroll * t) + (current_unroll_[t] * n_features_);
size_t non_missing{beg};
for (bst_feature_t c = 0; c < n_features_; ++c) {
float f = page_.GetFvalue(r, c, common::IsCat(ft_, c));
if (!common::CheckNAN(f)) {
workspace_[non_missing] = Entry{c, f};
++non_missing;
}
}
auto ret = workspace_.subspan(beg, non_missing - beg);
current_unroll_[t]++;
if (current_unroll_[t] == kUnroll) {
current_unroll_[t] = 0;
}
return ret;
}
size_t Size() const { return page_.Size(); }
};
template <typename Adapter>
class AdapterView { class AdapterView {
Adapter* adapter_; Adapter* adapter_;
float missing_; float missing_;
common::Span<Entry> workspace_; common::Span<Entry> workspace_;
std::vector<size_t> current_unroll_; std::vector<size_t> current_unroll_;
public:
static size_t constexpr kUnroll = kUnrollLen;
public: public:
explicit AdapterView(Adapter *adapter, float missing, common::Span<Entry> workplace, explicit AdapterView(Adapter *adapter, float missing, common::Span<Entry> workplace,
int32_t n_threads) int32_t n_threads)
@ -251,33 +293,59 @@ class CPUPredictor : public Predictor {
} }
} }
void PredictGHistIndex(DMatrix *p_fmat, gbm::GBTreeModel const &model, int32_t tree_begin,
int32_t tree_end, std::vector<bst_float> *out_preds) const {
auto const n_threads = this->ctx_->Threads();
constexpr double kDensityThresh = .5;
size_t total =
std::max(p_fmat->Info().num_row_ * p_fmat->Info().num_col_, static_cast<uint64_t>(1));
double density = static_cast<double>(p_fmat->Info().num_nonzero_) / static_cast<double>(total);
bool blocked = density > kDensityThresh;
std::vector<RegTree::FVec> feat_vecs;
InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1), &feat_vecs);
std::vector<Entry> workspace(p_fmat->Info().num_col_ * kUnroll * n_threads);
auto ft = p_fmat->Info().feature_types.ConstHostVector();
for (auto const &batch : p_fmat->GetBatches<GHistIndexMatrix>({})) {
if (blocked) {
PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, kBlockOfRowsSize>(
GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads},
out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads);
} else {
PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, 1>(
GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads},
out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads);
}
}
}
void PredictDMatrix(DMatrix *p_fmat, std::vector<bst_float> *out_preds, void PredictDMatrix(DMatrix *p_fmat, std::vector<bst_float> *out_preds,
gbm::GBTreeModel const &model, int32_t tree_begin, gbm::GBTreeModel const &model, int32_t tree_begin, int32_t tree_end) const {
int32_t tree_end) const { if (!p_fmat->PageExists<SparsePage>()) {
this->PredictGHistIndex(p_fmat, model, tree_begin, tree_end, out_preds);
return;
}
auto const n_threads = this->ctx_->Threads(); auto const n_threads = this->ctx_->Threads();
constexpr double kDensityThresh = .5; constexpr double kDensityThresh = .5;
size_t total = std::max(p_fmat->Info().num_row_ * p_fmat->Info().num_col_, size_t total =
static_cast<uint64_t>(1)); std::max(p_fmat->Info().num_row_ * p_fmat->Info().num_col_, static_cast<uint64_t>(1));
double density = static_cast<double>(p_fmat->Info().num_nonzero_) / double density = static_cast<double>(p_fmat->Info().num_nonzero_) / static_cast<double>(total);
static_cast<double>(total);
bool blocked = density > kDensityThresh; bool blocked = density > kDensityThresh;
std::vector<RegTree::FVec> feat_vecs; std::vector<RegTree::FVec> feat_vecs;
InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1), &feat_vecs); InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1), &feat_vecs);
for (auto const &batch : p_fmat->GetBatches<SparsePage>()) { for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {
CHECK_EQ(out_preds->size(), CHECK_EQ(out_preds->size(),
p_fmat->Info().num_row_ * p_fmat->Info().num_row_ * model.learner_model_param->num_output_group);
model.learner_model_param->num_output_group);
size_t constexpr kUnroll = 8;
if (blocked) { if (blocked) {
PredictBatchByBlockOfRowsKernel<SparsePageView<kUnroll>, kBlockOfRowsSize>( PredictBatchByBlockOfRowsKernel<SparsePageView, kBlockOfRowsSize>(
SparsePageView<kUnroll>{&batch}, out_preds, model, tree_begin, tree_end, &feat_vecs, SparsePageView{&batch}, out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads);
n_threads);
} else { } else {
PredictBatchByBlockOfRowsKernel<SparsePageView<kUnroll>, 1>( PredictBatchByBlockOfRowsKernel<SparsePageView, 1>(
SparsePageView<kUnroll>{&batch}, out_preds, model, tree_begin, tree_end, &feat_vecs, SparsePageView{&batch}, out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads);
n_threads);
} }
} }
} }
@ -316,7 +384,7 @@ class CPUPredictor : public Predictor {
info.num_row_ = m->NumRows(); info.num_row_ = m->NumRows();
this->InitOutPredictions(info, &(out_preds->predictions), model); this->InitOutPredictions(info, &(out_preds->predictions), model);
} }
std::vector<Entry> workspace(m->NumColumns() * 8 * n_threads); std::vector<Entry> workspace(m->NumColumns() * kUnroll * n_threads);
auto &predictions = out_preds->predictions.HostVector(); auto &predictions = out_preds->predictions.HostVector();
std::vector<RegTree::FVec> thread_temp; std::vector<RegTree::FVec> thread_temp;
InitThreadTemp(n_threads * kBlockSize, &thread_temp); InitThreadTemp(n_threads * kBlockSize, &thread_temp);

View File

@ -149,10 +149,10 @@ struct SparsePageLoader {
struct EllpackLoader { struct EllpackLoader {
EllpackDeviceAccessor const& matrix; EllpackDeviceAccessor const& matrix;
XGBOOST_DEVICE EllpackLoader(EllpackDeviceAccessor const& m, bool, XGBOOST_DEVICE EllpackLoader(EllpackDeviceAccessor const& m, bool, bst_feature_t, bst_row_t,
bst_feature_t, bst_row_t, size_t, float) size_t, float)
: matrix{m} {} : matrix{m} {}
__device__ __forceinline__ float GetElement(size_t ridx, size_t fidx) const { __device__ __forceinline__ float GetElement(size_t ridx, size_t fidx) const {
auto gidx = matrix.GetBinIndex(ridx, fidx); auto gidx = matrix.GetBinIndex(ridx, fidx);
if (gidx == -1) { if (gidx == -1) {
return nan(""); return nan("");

View File

@ -0,0 +1,36 @@
/*!
* Copyright 2022 XGBoost contributors
*/
#include "test_iterative_dmatrix.h"
#include <gtest/gtest.h>
#include "../../../src/data/gradient_index.h"
#include "../../../src/data/iterative_dmatrix.h"
#include "../helpers.h"
namespace xgboost {
namespace data {
TEST(IterativeDMatrix, Ref) {
TestRefDMatrix<GHistIndexMatrix, NumpyArrayIterForTest>(
[&](GHistIndexMatrix const& page) { return page.cut; });
}
TEST(IterativeDMatrix, IsDense) {
int n_bins = 16;
auto test = [n_bins](float sparsity) {
NumpyArrayIterForTest iter(sparsity);
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
if (sparsity == 0.0) {
ASSERT_TRUE(m.IsDense());
} else {
ASSERT_FALSE(m.IsDense());
}
};
test(0.0);
test(0.1);
test(1.0);
}
} // namespace data
} // namespace xgboost

View File

@ -3,19 +3,19 @@
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "../helpers.h"
#include "../../../src/data/iterative_dmatrix.h"
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/data/device_adapter.cuh" #include "../../../src/data/device_adapter.cuh"
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/data/iterative_dmatrix.h"
#include "../helpers.h"
#include "test_iterative_dmatrix.h"
namespace xgboost { namespace xgboost {
namespace data { namespace data {
void TestEquivalent(float sparsity) { void TestEquivalent(float sparsity) {
CudaArrayIterForTest iter{sparsity}; CudaArrayIterForTest iter{sparsity};
IterativeDMatrix m( IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(), std::numeric_limits<float>::quiet_NaN(), 0, 256);
0, 256);
size_t offset = 0; size_t offset = 0;
auto first = (*m.GetEllpackBatches({}).begin()).Impl(); auto first = (*m.GetEllpackBatches({}).begin()).Impl();
std::unique_ptr<EllpackPageImpl> page_concatenated { std::unique_ptr<EllpackPageImpl> page_concatenated {
@ -88,9 +88,8 @@ TEST(IterativeDeviceDMatrix, Basic) {
TEST(IterativeDeviceDMatrix, RowMajor) { TEST(IterativeDeviceDMatrix, RowMajor) {
CudaArrayIterForTest iter(0.0f); CudaArrayIterForTest iter(0.0f);
IterativeDMatrix m( IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(), std::numeric_limits<float>::quiet_NaN(), 0, 256);
0, 256);
size_t n_batches = 0; size_t n_batches = 0;
std::string interface_str = iter.AsArray(); std::string interface_str = iter.AsArray();
for (auto& ellpack : m.GetBatches<EllpackPage>({})) { for (auto& ellpack : m.GetBatches<EllpackPage>({})) {
@ -139,9 +138,8 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
reinterpret_cast<float *>(get<Integer>(j_interface["data"][0]))); reinterpret_cast<float *>(get<Integer>(j_interface["data"][0])));
thrust::copy(h_data.cbegin(), h_data.cend(), ptr); thrust::copy(h_data.cbegin(), h_data.cend(), ptr);
IterativeDMatrix m( IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(), std::numeric_limits<float>::quiet_NaN(), 0, 256);
0, 256);
auto &ellpack = *m.GetBatches<EllpackPage>({0, 256}).begin(); auto &ellpack = *m.GetBatches<EllpackPage>({0, 256}).begin();
auto impl = ellpack.Impl(); auto impl = ellpack.Impl();
common::CompressedIterator<uint32_t> iterator( common::CompressedIterator<uint32_t> iterator(
@ -157,11 +155,10 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
TEST(IterativeDeviceDMatrix, IsDense) { TEST(IterativeDeviceDMatrix, IsDense) {
int num_bins = 16; int num_bins = 16;
auto test = [num_bins] (float sparsity) { auto test = [num_bins](float sparsity) {
CudaArrayIterForTest iter(sparsity); CudaArrayIterForTest iter(sparsity);
IterativeDMatrix m( IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(), std::numeric_limits<float>::quiet_NaN(), 0, num_bins);
0, 256);
if (sparsity == 0.0) { if (sparsity == 0.0) {
ASSERT_TRUE(m.IsDense()); ASSERT_TRUE(m.IsDense());
} else { } else {
@ -170,6 +167,12 @@ TEST(IterativeDeviceDMatrix, IsDense) {
}; };
test(0.0); test(0.0);
test(0.1); test(0.1);
test(1.0);
}
TEST(IterativeDeviceDMatrix, Ref) {
TestRefDMatrix<EllpackPage, CudaArrayIterForTest>(
[](EllpackPage const& page) { return page.Impl()->Cuts(); });
} }
} // namespace data } // namespace data
} // namespace xgboost } // namespace xgboost

View File

@ -0,0 +1,59 @@
/*!
* Copyright 2022 XGBoost contributors
*/
#pragma once
#include <memory> // std::make_shared
#include "../../../src/data/iterative_dmatrix.h"
#include "../helpers.h"
namespace xgboost {
namespace data {
template <typename Page, typename Iter, typename Cuts>
void TestRefDMatrix(Cuts&& get_cuts) {
int n_bins = 256;
Iter iter(0.3, 2048);
auto m = std::make_shared<IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
Iter iter_1(0.8, 32, Iter::Cols(), 13);
auto m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), m, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
for (auto const& page_0 : m->template GetBatches<Page>({})) {
for (auto const& page_1 : m_1->template GetBatches<Page>({})) {
auto const& cuts_0 = get_cuts(page_0);
auto const& cuts_1 = get_cuts(page_1);
ASSERT_EQ(cuts_0.Values(), cuts_1.Values());
ASSERT_EQ(cuts_0.Ptrs(), cuts_1.Ptrs());
ASSERT_EQ(cuts_0.MinValues(), cuts_1.MinValues());
}
}
m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), nullptr, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
for (auto const& page_0 : m->template GetBatches<Page>({})) {
for (auto const& page_1 : m_1->template GetBatches<Page>({})) {
auto const& cuts_0 = get_cuts(page_0);
auto const& cuts_1 = get_cuts(page_1);
ASSERT_NE(cuts_0.Values(), cuts_1.Values());
ASSERT_NE(cuts_0.Ptrs(), cuts_1.Ptrs());
}
}
// Use DMatrix as ref
auto dm = RandomDataGenerator(2048, Iter::Cols(), 0.5).GenerateDMatrix(true);
auto dqm = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), dm, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
for (auto const& page_0 : dm->template GetBatches<Page>({})) {
for (auto const& page_1 : dqm->template GetBatches<Page>({})) {
auto const& cuts_0 = get_cuts(page_0);
auto const& cuts_1 = get_cuts(page_1);
ASSERT_EQ(cuts_0.Values(), cuts_1.Values());
ASSERT_EQ(cuts_0.Ptrs(), cuts_1.Ptrs());
ASSERT_EQ(cuts_0.MinValues(), cuts_1.MinValues());
}
}
}
} // namespace data
} // namespace xgboost

View File

@ -384,7 +384,7 @@ RandomDataGenerator::GenerateDMatrix(bool with_label, bool float_label,
std::shared_ptr<DMatrix> RandomDataGenerator::GenerateQuantileDMatrix() { std::shared_ptr<DMatrix> RandomDataGenerator::GenerateQuantileDMatrix() {
NumpyArrayIterForTest iter{this->sparsity_, this->rows_, this->cols_, 1}; NumpyArrayIterForTest iter{this->sparsity_, this->rows_, this->cols_, 1};
auto m = std::make_shared<data::IterativeDMatrix>( auto m = std::make_shared<data::IterativeDMatrix>(
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, bins_); &iter, iter.Proxy(), nullptr, Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, bins_);
return m; return m;
} }
@ -569,7 +569,7 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(
auto& h_gpair = gpair.HostVector(); auto& h_gpair = gpair.HostVector();
h_gpair.resize(kRows); h_gpair.resize(kRows);
for (size_t i = 0; i < kRows; ++i) { for (size_t i = 0; i < kRows; ++i) {
h_gpair[i] = {static_cast<float>(i), 1}; h_gpair[i] = GradientPair{static_cast<float>(i), 1};
} }
PredictionCacheEntry predts; PredictionCacheEntry predts;

View File

@ -27,7 +27,7 @@ int CudaArrayIterForTest::Next() {
std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDeviceDMatrix() { std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDeviceDMatrix() {
CudaArrayIterForTest iter{this->sparsity_, this->rows_, this->cols_, 1}; CudaArrayIterForTest iter{this->sparsity_, this->rows_, this->cols_, 1};
auto m = std::make_shared<data::IterativeDMatrix>( auto m = std::make_shared<data::IterativeDMatrix>(
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, bins_); &iter, iter.Proxy(), nullptr, Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, bins_);
return m; return m;
} }
} // namespace xgboost } // namespace xgboost

View File

@ -245,6 +245,17 @@ void TestUpdatePredictionCache(bool use_subsampling) {
} }
} }
TEST(CPUPredictor, GHistIndex) {
size_t constexpr kRows{128}, kCols{16}, kBins{64};
auto p_hist = RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).GenerateQuantileDMatrix();
HostDeviceVector<float> storage(kRows * kCols);
auto columnar = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&storage);
auto adapter = data::ArrayAdapter(columnar.c_str());
std::shared_ptr<DMatrix> p_full{
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
TestTrainingPrediction(kRows, kBins, "hist", p_full, p_hist);
}
TEST(CPUPredictor, CategoricalPrediction) { TEST(CPUPredictor, CategoricalPrediction) {
TestCategoricalPrediction("cpu_predictor"); TestCategoricalPrediction("cpu_predictor");
} }