Sketching from adapters (#5365)
* Sketching from adapters * Add weights test
This commit is contained in:
@@ -71,6 +71,7 @@ namespace data {
|
||||
constexpr size_t kAdapterUnknownSize = std::numeric_limits<size_t >::max();
|
||||
|
||||
struct COOTuple {
|
||||
COOTuple() = default;
|
||||
XGBOOST_DEVICE COOTuple(size_t row_idx, size_t column_idx, float value)
|
||||
: row_idx(row_idx), column_idx(column_idx), value(value) {}
|
||||
|
||||
|
||||
@@ -78,6 +78,20 @@ EllpackPageImpl::EllpackPageImpl(int device, EllpackInfo info, size_t n_rows) {
|
||||
monitor_.StopCuda("InitCompressedData");
|
||||
}
|
||||
|
||||
size_t GetRowStride(DMatrix* dmat) {
|
||||
if (dmat->IsDense()) return dmat->Info().num_col_;
|
||||
|
||||
size_t row_stride = 0;
|
||||
for (const auto& batch : dmat->GetBatches<SparsePage>()) {
|
||||
const auto& row_offset = batch.offset.ConstHostVector();
|
||||
for (auto i = 1ull; i < row_offset.size(); i++) {
|
||||
row_stride = std::max(
|
||||
row_stride, static_cast<size_t>(row_offset[i] - row_offset[i - 1]));
|
||||
}
|
||||
}
|
||||
return row_stride;
|
||||
}
|
||||
|
||||
// Construct an ELLPACK matrix in memory.
|
||||
EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param) {
|
||||
monitor_.Init("ellpack_page");
|
||||
@@ -87,13 +101,13 @@ EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param) {
|
||||
|
||||
monitor_.StartCuda("Quantiles");
|
||||
// Create the quantile sketches for the dmatrix and initialize HistogramCuts.
|
||||
common::HistogramCuts hmat;
|
||||
size_t row_stride =
|
||||
common::DeviceSketch(param.gpu_id, param.max_bin, param.gpu_batch_nrows, dmat, &hmat);
|
||||
size_t row_stride = GetRowStride(dmat);
|
||||
auto cuts = common::DeviceSketch(param.gpu_id, dmat, param.max_bin,
|
||||
param.gpu_batch_nrows);
|
||||
monitor_.StopCuda("Quantiles");
|
||||
|
||||
monitor_.StartCuda("InitEllpackInfo");
|
||||
InitInfo(param.gpu_id, dmat->IsDense(), row_stride, hmat);
|
||||
InitInfo(param.gpu_id, dmat->IsDense(), row_stride, cuts);
|
||||
monitor_.StopCuda("InitEllpackInfo");
|
||||
|
||||
monitor_.StartCuda("InitCompressedData");
|
||||
|
||||
@@ -70,6 +70,20 @@ const EllpackPage& EllpackPageSource::Value() const {
|
||||
return impl_->Value();
|
||||
}
|
||||
|
||||
size_t GetRowStride(DMatrix* dmat) {
|
||||
if (dmat->IsDense()) return dmat->Info().num_col_;
|
||||
|
||||
size_t row_stride = 0;
|
||||
for (const auto& batch : dmat->GetBatches<SparsePage>()) {
|
||||
const auto& row_offset = batch.offset.ConstHostVector();
|
||||
for (auto i = 1ull; i < row_offset.size(); i++) {
|
||||
row_stride = std::max(
|
||||
row_stride, static_cast<size_t>(row_offset[i] - row_offset[i - 1]));
|
||||
}
|
||||
}
|
||||
return row_stride;
|
||||
}
|
||||
|
||||
// Build the quantile sketch across the whole input data, then use the histogram cuts to compress
|
||||
// each CSR page, and write the accumulated ELLPACK pages to disk.
|
||||
EllpackPageSourceImpl::EllpackPageSourceImpl(DMatrix* dmat,
|
||||
@@ -85,13 +99,13 @@ EllpackPageSourceImpl::EllpackPageSourceImpl(DMatrix* dmat,
|
||||
dh::safe_cuda(cudaSetDevice(device_));
|
||||
|
||||
monitor_.StartCuda("Quantiles");
|
||||
common::HistogramCuts hmat;
|
||||
size_t row_stride =
|
||||
common::DeviceSketch(device_, param.max_bin, param.gpu_batch_nrows, dmat, &hmat);
|
||||
size_t row_stride = GetRowStride(dmat);
|
||||
auto cuts = common::DeviceSketch(param.gpu_id, dmat, param.max_bin,
|
||||
param.gpu_batch_nrows);
|
||||
monitor_.StopCuda("Quantiles");
|
||||
|
||||
monitor_.StartCuda("CreateEllpackInfo");
|
||||
ellpack_info_ = EllpackInfo(device_, dmat->IsDense(), row_stride, hmat, &ba_);
|
||||
ellpack_info_ = EllpackInfo(device_, dmat->IsDense(), row_stride, cuts, &ba_);
|
||||
monitor_.StopCuda("CreateEllpackInfo");
|
||||
|
||||
monitor_.StartCuda("WriteEllpackPages");
|
||||
|
||||
Reference in New Issue
Block a user