[EM] Avoid writing cut matrix to cache. (#10444)

This commit is contained in:
Jiaming Yuan 2024-06-19 18:03:38 +08:00 committed by GitHub
parent 63418d2f35
commit e5f1720656
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
31 changed files with 429 additions and 298 deletions

View File

@ -113,7 +113,10 @@ class MetaInfo {
MetaInfo Slice(common::Span<int32_t const> ridxs) const;
MetaInfo Copy() const;
/**
* @brief Whether the matrix is dense.
*/
bool IsDense() const { return num_col_ * num_row_ == num_nonzero_; }
/*!
* \brief Get weight of each instances.
* \param i Instance index.
@ -538,10 +541,10 @@ class DMatrix {
/*! \brief virtual destructor */
virtual ~DMatrix();
/*! \brief Whether the matrix is dense. */
[[nodiscard]] bool IsDense() const {
return Info().num_nonzero_ == Info().num_row_ * Info().num_col_;
}
/**
* @brief Whether the matrix is dense.
*/
[[nodiscard]] bool IsDense() const { return this->Info().IsDense(); }
/**
* \brief Load DMatrix from URI.

View File

@ -9,7 +9,6 @@
#include <limits>
#include <string>
#include <utility>
#include <vector>
#include "allreduce.h"
#include "broadcast.h"

View File

@ -162,6 +162,17 @@ class HistogramCuts {
}
return vals[bin_idx - 1];
}
void SetDevice(DeviceOrd d) const {
this->cut_ptrs_.SetDevice(d);
this->cut_ptrs_.ConstDevicePointer();
this->cut_values_.SetDevice(d);
this->cut_values_.ConstDevicePointer();
this->min_vals_.SetDevice(d);
this->min_vals_.ConstDevicePointer();
}
};
/**

View File

@ -1,5 +1,5 @@
/**
* Copyright 2019-2023, XGBoost contributors
* Copyright 2019-2024, XGBoost contributors
*/
#ifndef XGBOOST_USE_CUDA
@ -7,15 +7,17 @@
#include <xgboost/data.h>
#include <memory> // for shared_ptr
// dummy implementation of EllpackPage in case CUDA is not used
namespace xgboost {
class EllpackPageImpl {
common::HistogramCuts cuts_;
std::shared_ptr<common::HistogramCuts> cuts_;
public:
[[nodiscard]] common::HistogramCuts& Cuts() { return cuts_; }
[[nodiscard]] common::HistogramCuts const& Cuts() const { return cuts_; }
[[nodiscard]] common::HistogramCuts const& Cuts() const { return *cuts_; }
[[nodiscard]] std::shared_ptr<common::HistogramCuts const> CutsShared() const { return cuts_; }
};
EllpackPage::EllpackPage() = default;
@ -40,12 +42,6 @@ size_t EllpackPage::Size() const {
return 0;
}
[[nodiscard]] common::HistogramCuts& EllpackPage::Cuts() {
LOG(FATAL) << "Internal Error: XGBoost is not compiled with CUDA but "
"EllpackPage is required";
return impl_->Cuts();
}
[[nodiscard]] common::HistogramCuts const& EllpackPage::Cuts() const {
LOG(FATAL) << "Internal Error: XGBoost is not compiled with CUDA but "
"EllpackPage is required";

View File

@ -12,8 +12,8 @@
#include "../common/cuda_context.cuh"
#include "../common/hist_util.cuh"
#include "../common/transform_iterator.h" // MakeIndexTransformIter
#include "./ellpack_page.cuh"
#include "device_adapter.cuh" // for NoInfInData
#include "device_adapter.cuh" // for NoInfInData
#include "ellpack_page.cuh"
#include "ellpack_page.h"
#include "gradient_index.h"
#include "xgboost/data.h"
@ -33,11 +33,6 @@ size_t EllpackPage::Size() const { return impl_->Size(); }
void EllpackPage::SetBaseRowId(std::size_t row_id) { impl_->SetBaseRowId(row_id); }
[[nodiscard]] common::HistogramCuts& EllpackPage::Cuts() {
CHECK(impl_);
return impl_->Cuts();
}
[[nodiscard]] common::HistogramCuts const& EllpackPage::Cuts() const {
CHECK(impl_);
return impl_->Cuts();
@ -94,7 +89,8 @@ __global__ void CompressBinEllpackKernel(
}
// Construct an ELLPACK matrix with the given number of empty rows.
EllpackPageImpl::EllpackPageImpl(DeviceOrd device, common::HistogramCuts cuts, bool is_dense,
EllpackPageImpl::EllpackPageImpl(DeviceOrd device,
std::shared_ptr<common::HistogramCuts const> cuts, bool is_dense,
size_t row_stride, size_t n_rows)
: is_dense(is_dense), cuts_(std::move(cuts)), row_stride(row_stride), n_rows(n_rows) {
monitor_.Init("ellpack_page");
@ -105,12 +101,11 @@ EllpackPageImpl::EllpackPageImpl(DeviceOrd device, common::HistogramCuts cuts, b
monitor_.Stop("InitCompressedData");
}
EllpackPageImpl::EllpackPageImpl(DeviceOrd device, common::HistogramCuts cuts,
const SparsePage &page, bool is_dense,
size_t row_stride,
EllpackPageImpl::EllpackPageImpl(DeviceOrd device,
std::shared_ptr<common::HistogramCuts const> cuts,
const SparsePage& page, bool is_dense, size_t row_stride,
common::Span<FeatureType const> feature_types)
: cuts_(std::move(cuts)), is_dense(is_dense), n_rows(page.Size()),
row_stride(row_stride) {
: cuts_(std::move(cuts)), is_dense(is_dense), n_rows(page.Size()), row_stride(row_stride) {
this->InitCompressedData(device);
this->CreateHistIndices(device, page, feature_types);
}
@ -127,9 +122,10 @@ EllpackPageImpl::EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchP
// Create the quantile sketches for the dmatrix and initialize HistogramCuts.
row_stride = GetRowStride(dmat);
if (!param.hess.empty()) {
cuts_ = common::DeviceSketchWithHessian(ctx, dmat, param.max_bin, param.hess);
cuts_ = std::make_shared<common::HistogramCuts>(
common::DeviceSketchWithHessian(ctx, dmat, param.max_bin, param.hess));
} else {
cuts_ = common::DeviceSketch(ctx, dmat, param.max_bin);
cuts_ = std::make_shared<common::HistogramCuts>(common::DeviceSketch(ctx, dmat, param.max_bin));
}
monitor_.Stop("Quantiles");
@ -297,7 +293,7 @@ template <typename AdapterBatch>
EllpackPageImpl::EllpackPageImpl(AdapterBatch batch, float missing, DeviceOrd device, bool is_dense,
common::Span<size_t> row_counts_span,
common::Span<FeatureType const> feature_types, size_t row_stride,
size_t n_rows, common::HistogramCuts const& cuts) {
size_t n_rows, std::shared_ptr<common::HistogramCuts const> cuts) {
dh::safe_cuda(cudaSetDevice(device.ordinal));
*this = EllpackPageImpl(device, cuts, is_dense, row_stride, n_rows);
@ -309,7 +305,7 @@ EllpackPageImpl::EllpackPageImpl(AdapterBatch batch, float missing, DeviceOrd de
template EllpackPageImpl::EllpackPageImpl( \
__BATCH_T batch, float missing, DeviceOrd device, bool is_dense, \
common::Span<size_t> row_counts_span, common::Span<FeatureType const> feature_types, \
size_t row_stride, size_t n_rows, common::HistogramCuts const& cuts);
size_t row_stride, size_t n_rows, std::shared_ptr<common::HistogramCuts const> cuts);
ELLPACK_BATCH_SPECIALIZE(data::CudfAdapterBatch)
ELLPACK_BATCH_SPECIALIZE(data::CupyAdapterBatch)
@ -359,7 +355,11 @@ void CopyGHistToEllpack(GHistIndexMatrix const& page, common::Span<size_t const>
EllpackPageImpl::EllpackPageImpl(Context const* ctx, GHistIndexMatrix const& page,
common::Span<FeatureType const> ft)
: is_dense{page.IsDense()}, base_rowid{page.base_rowid}, n_rows{page.Size()}, cuts_{page.cut} {
: is_dense{page.IsDense()},
base_rowid{page.base_rowid},
n_rows{page.Size()},
// This makes a copy of the cut values.
cuts_{std::make_shared<common::HistogramCuts>(page.cut)} {
auto it = common::MakeIndexTransformIter(
[&](size_t i) { return page.row_ptr[i + 1] - page.row_ptr[i]; });
row_stride = *std::max_element(it, it + page.Size());

View File

@ -23,20 +23,20 @@ struct EllpackDeviceAccessor {
bool is_dense;
/*! \brief Row length for ELLPACK, equal to number of features. */
size_t row_stride;
size_t base_rowid{};
size_t n_rows{};
common::CompressedIterator<uint32_t> gidx_iter;
bst_idx_t base_rowid{0};
bst_idx_t n_rows{0};
common::CompressedIterator<std::uint32_t> gidx_iter;
/*! \brief Minimum value for each feature. Size equals to number of features. */
common::Span<const bst_float> min_fvalue;
common::Span<const float> min_fvalue;
/*! \brief Histogram cut pointers. Size equals to (number of features + 1). */
common::Span<const uint32_t> feature_segments;
common::Span<const std::uint32_t> feature_segments;
/*! \brief Histogram cut values. Size equals to (bins per feature * number of features). */
common::Span<const bst_float> gidx_fvalue_map;
common::Span<const float> gidx_fvalue_map;
common::Span<const FeatureType> feature_types;
EllpackDeviceAccessor(DeviceOrd device, const common::HistogramCuts& cuts, bool is_dense,
size_t row_stride, size_t base_rowid, size_t n_rows,
EllpackDeviceAccessor(DeviceOrd device, std::shared_ptr<const common::HistogramCuts> cuts,
bool is_dense, size_t row_stride, size_t base_rowid, size_t n_rows,
common::CompressedIterator<uint32_t> gidx_iter,
common::Span<FeatureType const> feature_types)
: is_dense(is_dense),
@ -46,16 +46,16 @@ struct EllpackDeviceAccessor {
gidx_iter(gidx_iter),
feature_types{feature_types} {
if (device.IsCPU()) {
gidx_fvalue_map = cuts.cut_values_.ConstHostSpan();
feature_segments = cuts.cut_ptrs_.ConstHostSpan();
min_fvalue = cuts.min_vals_.ConstHostSpan();
gidx_fvalue_map = cuts->cut_values_.ConstHostSpan();
feature_segments = cuts->cut_ptrs_.ConstHostSpan();
min_fvalue = cuts->min_vals_.ConstHostSpan();
} else {
cuts.cut_values_.SetDevice(device);
cuts.cut_ptrs_.SetDevice(device);
cuts.min_vals_.SetDevice(device);
gidx_fvalue_map = cuts.cut_values_.ConstDeviceSpan();
feature_segments = cuts.cut_ptrs_.ConstDeviceSpan();
min_fvalue = cuts.min_vals_.ConstDeviceSpan();
cuts->cut_values_.SetDevice(device);
cuts->cut_ptrs_.SetDevice(device);
cuts->min_vals_.SetDevice(device);
gidx_fvalue_map = cuts->cut_values_.ConstDeviceSpan();
feature_segments = cuts->cut_ptrs_.ConstDeviceSpan();
min_fvalue = cuts->min_vals_.ConstDeviceSpan();
}
}
// Get a matrix element, uses binary search for look up Return NaN if missing
@ -142,13 +142,14 @@ class EllpackPageImpl {
* This is used in the sampling case. The ELLPACK page is constructed from an existing EllpackInfo
* and the given number of rows.
*/
EllpackPageImpl(DeviceOrd device, common::HistogramCuts cuts, bool is_dense, size_t row_stride,
size_t n_rows);
EllpackPageImpl(DeviceOrd device, std::shared_ptr<common::HistogramCuts const> cuts,
bool is_dense, size_t row_stride, size_t n_rows);
/*!
* \brief Constructor used for external memory.
*/
EllpackPageImpl(DeviceOrd device, common::HistogramCuts cuts, const SparsePage& page,
bool is_dense, size_t row_stride, common::Span<FeatureType const> feature_types);
EllpackPageImpl(DeviceOrd device, std::shared_ptr<common::HistogramCuts const> cuts,
const SparsePage& page, bool is_dense, size_t row_stride,
common::Span<FeatureType const> feature_types);
/*!
* \brief Constructor from an existing DMatrix.
@ -162,7 +163,7 @@ class EllpackPageImpl {
explicit EllpackPageImpl(AdapterBatch batch, float missing, DeviceOrd device, bool is_dense,
common::Span<size_t> row_counts_span,
common::Span<FeatureType const> feature_types, size_t row_stride,
size_t n_rows, common::HistogramCuts const& cuts);
size_t n_rows, std::shared_ptr<common::HistogramCuts const> cuts);
/**
* \brief Constructor from an existing CPU gradient index.
*/
@ -194,8 +195,9 @@ class EllpackPageImpl {
base_rowid = row_id;
}
[[nodiscard]] common::HistogramCuts& Cuts() { return cuts_; }
[[nodiscard]] common::HistogramCuts const& Cuts() const { return cuts_; }
[[nodiscard]] common::HistogramCuts const& Cuts() const { return *cuts_; }
[[nodiscard]] std::shared_ptr<common::HistogramCuts const> CutsShared() const { return cuts_; }
void SetCuts(std::shared_ptr<common::HistogramCuts const> cuts) { cuts_ = cuts; }
/*! \return Estimation of memory cost of this page. */
static size_t MemCostBytes(size_t num_rows, size_t row_stride, const common::HistogramCuts&cuts) ;
@ -203,7 +205,7 @@ class EllpackPageImpl {
/*! \brief Return the total number of symbols (total number of bins plus 1 for
* not found). */
[[nodiscard]] std::size_t NumSymbols() const { return cuts_.TotalBins() + 1; }
[[nodiscard]] std::size_t NumSymbols() const { return cuts_->TotalBins() + 1; }
[[nodiscard]] EllpackDeviceAccessor GetDeviceAccessor(
DeviceOrd device, common::Span<FeatureType const> feature_types = {}) const;
@ -225,19 +227,18 @@ class EllpackPageImpl {
*/
void InitCompressedData(DeviceOrd device);
public:
public:
/*! \brief Whether or not if the matrix is dense. */
bool is_dense;
/*! \brief Row length for ELLPACK. */
size_t row_stride;
size_t base_rowid{0};
size_t n_rows{};
bst_idx_t base_rowid{0};
bst_idx_t n_rows{};
/*! \brief global index of histogram, which is stored in ELLPACK format. */
HostDeviceVector<common::CompressedByteT> gidx_buffer;
private:
common::HistogramCuts cuts_;
std::shared_ptr<common::HistogramCuts const> cuts_;
common::Monitor monitor_;
};

View File

@ -49,7 +49,6 @@ class EllpackPage {
[[nodiscard]] const EllpackPageImpl* Impl() const { return impl_.get(); }
EllpackPageImpl* Impl() { return impl_.get(); }
[[nodiscard]] common::HistogramCuts& Cuts();
[[nodiscard]] common::HistogramCuts const& Cuts() const;
private:

View File

@ -1,60 +1,78 @@
/**
* Copyright 2019-2023, XGBoost contributors
* Copyright 2019-2024, XGBoost contributors
*/
#include <dmlc/registry.h>
#include <cstddef> // for size_t
#include <cstdint> // for uint64_t
#include "../common/io.h" // for AlignedResourceReadStream, AlignedFileWriteStream
#include "../common/ref_resource_view.h" // for ReadVec, WriteVec
#include "ellpack_page.cuh"
#include "histogram_cut_format.h" // for ReadHistogramCuts, WriteHistogramCuts
#include "sparse_page_writer.h" // for SparsePageFormat
#include "ellpack_page.cuh" // for EllpackPage
#include "ellpack_page_raw_format.h"
namespace xgboost::data {
DMLC_REGISTRY_FILE_TAG(ellpack_page_raw_format);
class EllpackPageRawFormat : public SparsePageFormat<EllpackPage> {
public:
bool Read(EllpackPage* page, common::AlignedResourceReadStream* fi) override {
auto* impl = page->Impl();
if (!ReadHistogramCuts(&impl->Cuts(), fi)) {
return false;
}
if (!fi->Read(&impl->n_rows)) {
return false;
}
if (!fi->Read(&impl->is_dense)) {
return false;
}
if (!fi->Read(&impl->row_stride)) {
return false;
}
if (!common::ReadVec(fi, &impl->gidx_buffer.HostVector())) {
return false;
}
if (!fi->Read(&impl->base_rowid)) {
return false;
}
dh::DefaultStream().Sync();
namespace {
template <typename T>
[[nodiscard]] bool ReadDeviceVec(common::AlignedResourceReadStream* fi, HostDeviceVector<T>* vec) {
std::uint64_t n{0};
if (!fi->Read(&n)) {
return false;
}
if (n == 0) {
return true;
}
size_t Write(const EllpackPage& page, common::AlignedFileWriteStream* fo) override {
std::size_t bytes{0};
auto* impl = page.Impl();
bytes += WriteHistogramCuts(impl->Cuts(), fo);
bytes += fo->Write(impl->n_rows);
bytes += fo->Write(impl->is_dense);
bytes += fo->Write(impl->row_stride);
CHECK(!impl->gidx_buffer.ConstHostVector().empty());
bytes += common::WriteVec(fo, impl->gidx_buffer.HostVector());
bytes += fo->Write(impl->base_rowid);
return bytes;
}
};
auto expected_bytes = sizeof(T) * n;
XGBOOST_REGISTER_ELLPACK_PAGE_FORMAT(raw)
.describe("Raw ELLPACK binary data format.")
.set_body([]() { return new EllpackPageRawFormat(); });
auto [ptr, n_bytes] = fi->Consume(expected_bytes);
if (n_bytes != expected_bytes) {
return false;
}
vec->SetDevice(DeviceOrd::CUDA(0));
vec->Resize(n);
auto d_vec = vec->DeviceSpan();
dh::safe_cuda(
cudaMemcpyAsync(d_vec.data(), ptr, n_bytes, cudaMemcpyDefault, dh::DefaultStream()));
return true;
}
} // namespace
[[nodiscard]] bool EllpackPageRawFormat::Read(EllpackPage* page,
common::AlignedResourceReadStream* fi) {
auto* impl = page->Impl();
impl->SetCuts(this->cuts_);
if (!fi->Read(&impl->n_rows)) {
return false;
}
if (!fi->Read(&impl->is_dense)) {
return false;
}
if (!fi->Read(&impl->row_stride)) {
return false;
}
if (!ReadDeviceVec(fi, &impl->gidx_buffer)) {
return false;
}
if (!fi->Read(&impl->base_rowid)) {
return false;
}
return true;
}
[[nodiscard]] std::size_t EllpackPageRawFormat::Write(const EllpackPage& page,
common::AlignedFileWriteStream* fo) {
std::size_t bytes{0};
auto* impl = page.Impl();
bytes += fo->Write(impl->n_rows);
bytes += fo->Write(impl->is_dense);
bytes += fo->Write(impl->row_stride);
CHECK(!impl->gidx_buffer.ConstHostVector().empty());
bytes += common::WriteVec(fo, impl->gidx_buffer.HostVector());
bytes += fo->Write(impl->base_rowid);
return bytes;
}
} // namespace xgboost::data

View File

@ -0,0 +1,46 @@
/**
* Copyright 2019-2024, XGBoost contributors
*/
#pragma once
#include <cstddef> // for size_t
#include <memory> // for shared_ptr
#include <utility> // for move
#include "../common/io.h" // for AlignedResourceReadStream
#include "sparse_page_writer.h" // for SparsePageFormat
#include "xgboost/data.h" // for EllpackPage
#if !defined(XGBOOST_USE_CUDA)
#include "../common/common.h" // for AssertGPUSupport
#endif // !defined(XGBOOST_USE_CUDA)`
namespace xgboost::common {
class HistogramCuts;
}
namespace xgboost::data {
class EllpackPageRawFormat : public SparsePageFormat<EllpackPage> {
std::shared_ptr<common::HistogramCuts const> cuts_;
public:
explicit EllpackPageRawFormat(std::shared_ptr<common::HistogramCuts const> cuts)
: cuts_{std::move(cuts)} {}
[[nodiscard]] bool Read(EllpackPage* page, common::AlignedResourceReadStream* fi) override;
[[nodiscard]] std::size_t Write(const EllpackPage& page,
common::AlignedFileWriteStream* fo) override;
};
#if !defined(XGBOOST_USE_CUDA)
inline bool EllpackPageRawFormat::Read(EllpackPage*, common::AlignedResourceReadStream*) {
common::AssertGPUSupport();
return false;
}
inline std::size_t EllpackPageRawFormat::Write(const EllpackPage&,
common::AlignedFileWriteStream*) {
common::AssertGPUSupport();
return 0;
}
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace xgboost::data

View File

@ -21,7 +21,7 @@ void EllpackPageSource::Fetch() {
auto const &csr = source_->Page();
this->page_.reset(new EllpackPage{});
auto *impl = this->page_->Impl();
*impl = EllpackPageImpl(device_, *cuts_, *csr, is_dense_, row_stride_, feature_types_);
*impl = EllpackPageImpl(device_, cuts_, *csr, is_dense_, row_stride_, feature_types_);
page_->SetBaseRowId(csr->base_rowid);
this->WriteCache();
}

View File

@ -1,35 +1,43 @@
/**
* Copyright 2019-2023, XGBoost Contributors
* Copyright 2019-2024, XGBoost Contributors
*/
#ifndef XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_
#define XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_
#include <xgboost/data.h>
#include <cstdint> // for int32_t
#include <memory> // for shared_ptr
#include <utility> // for move
#include <memory>
#include <string>
#include <utility>
#include "../common/common.h"
#include "../common/hist_util.h"
#include "ellpack_page.h" // for EllpackPage
#include "sparse_page_source.h"
#include "../common/hist_util.h" // for HistogramCuts
#include "ellpack_page.h" // for EllpackPage
#include "ellpack_page_raw_format.h" // for EllpackPageRawFormat
#include "sparse_page_source.h" // for PageSourceIncMixIn
#include "xgboost/base.h" // for bst_idx_t
#include "xgboost/context.h" // for DeviceOrd
#include "xgboost/data.h" // for BatchParam
#include "xgboost/span.h" // for Span
namespace xgboost::data {
class EllpackPageSource : public PageSourceIncMixIn<EllpackPage> {
bool is_dense_;
size_t row_stride_;
bst_idx_t row_stride_;
BatchParam param_;
common::Span<FeatureType const> feature_types_;
std::unique_ptr<common::HistogramCuts> cuts_;
std::shared_ptr<common::HistogramCuts const> cuts_;
DeviceOrd device_;
protected:
[[nodiscard]] SparsePageFormat<EllpackPage>* CreatePageFormat() const override {
cuts_->SetDevice(this->device_);
return new EllpackPageRawFormat{cuts_};
}
public:
EllpackPageSource(float missing, int nthreads, bst_feature_t n_features, size_t n_batches,
std::shared_ptr<Cache> cache, BatchParam param,
std::unique_ptr<common::HistogramCuts> cuts, bool is_dense, size_t row_stride,
common::Span<FeatureType const> feature_types,
EllpackPageSource(float missing, std::int32_t nthreads, bst_feature_t n_features,
size_t n_batches, std::shared_ptr<Cache> cache, BatchParam param,
std::shared_ptr<common::HistogramCuts const> cuts, bool is_dense,
bst_idx_t row_stride, common::Span<FeatureType const> feature_types,
std::shared_ptr<SparsePageSource> source, DeviceOrd device)
: PageSourceIncMixIn(missing, nthreads, n_features, n_batches, cache, false),
is_dense_{is_dense},

View File

@ -1,106 +1,97 @@
/**
* Copyright 2021-2024 XGBoost contributors
* Copyright 2021-2024, XGBoost contributors
*/
#include <cstddef> // for size_t
#include <cstdint> // for uint8_t
#include <type_traits> // for underlying_type_t
#include <vector> // for vector
#include "gradient_index_format.h"
#include <cstddef> // for size_t
#include <cstdint> // for uint8_t
#include <type_traits> // for underlying_type_t
#include <vector> // for vector
#include "../common/hist_util.h" // for HistogramCuts
#include "../common/io.h" // for AlignedResourceReadStream
#include "../common/ref_resource_view.h" // for ReadVec, WriteVec
#include "gradient_index.h" // for GHistIndexMatrix
#include "histogram_cut_format.h" // for ReadHistogramCuts
#include "sparse_page_writer.h" // for SparsePageFormat
namespace xgboost::data {
class GHistIndexRawFormat : public SparsePageFormat<GHistIndexMatrix> {
public:
bool Read(GHistIndexMatrix* page, common::AlignedResourceReadStream* fi) override {
CHECK(fi);
[[nodiscard]] bool GHistIndexRawFormat::Read(GHistIndexMatrix* page,
common::AlignedResourceReadStream* fi) {
CHECK(fi);
if (!ReadHistogramCuts(&page->cut, fi)) {
return false;
}
// indptr
if (!common::ReadVec(fi, &page->row_ptr)) {
return false;
}
// data
// - bin type
// Old gcc doesn't support reading from enum.
std::underlying_type_t<common::BinTypeSize> uint_bin_type{0};
if (!fi->Read(&uint_bin_type)) {
return false;
}
common::BinTypeSize size_type = static_cast<common::BinTypeSize>(uint_bin_type);
// - index buffer
if (!common::ReadVec(fi, &page->data)) {
return false;
}
// - index
page->index =
common::Index{common::Span{page->data.data(), static_cast<size_t>(page->data.size())},
size_type};
// hit count
if (!common::ReadVec(fi, &page->hit_count)) {
return false;
}
if (!fi->Read(&page->max_numeric_bins_per_feat)) {
return false;
}
if (!fi->Read(&page->base_rowid)) {
return false;
}
bool is_dense = false;
if (!fi->Read(&is_dense)) {
return false;
}
page->SetDense(is_dense);
if (is_dense) {
page->index.SetBinOffset(page->cut.Ptrs());
}
if (!page->ReadColumnPage(fi)) {
return false;
}
return true;
page->Cuts() = this->cuts_;
// indptr
if (!common::ReadVec(fi, &page->row_ptr)) {
return false;
}
std::size_t Write(GHistIndexMatrix const& page, common::AlignedFileWriteStream* fo) override {
std::size_t bytes = 0;
bytes += WriteHistogramCuts(page.cut, fo);
// indptr
bytes += common::WriteVec(fo, page.row_ptr);
// data
// - bin type
std::underlying_type_t<common::BinTypeSize> uint_bin_type = page.index.GetBinTypeSize();
bytes += fo->Write(uint_bin_type);
// - index buffer
std::vector<std::uint8_t> data(page.index.begin(), page.index.end());
bytes += fo->Write(static_cast<std::uint64_t>(data.size()));
if (!data.empty()) {
bytes += fo->Write(data.data(), data.size());
}
// hit count
bytes += common::WriteVec(fo, page.hit_count);
// max_bins, base row, is_dense
bytes += fo->Write(page.max_numeric_bins_per_feat);
bytes += fo->Write(page.base_rowid);
bytes += fo->Write(page.IsDense());
bytes += page.WriteColumnPage(fo);
return bytes;
// data
// - bin type
// Old gcc doesn't support reading from enum.
std::underlying_type_t<common::BinTypeSize> uint_bin_type{0};
if (!fi->Read(&uint_bin_type)) {
return false;
}
};
common::BinTypeSize size_type = static_cast<common::BinTypeSize>(uint_bin_type);
// - index buffer
if (!common::ReadVec(fi, &page->data)) {
return false;
}
// - index
page->index = common::Index{
common::Span{page->data.data(), static_cast<size_t>(page->data.size())}, size_type};
// hit count
if (!common::ReadVec(fi, &page->hit_count)) {
return false;
}
if (!fi->Read(&page->max_numeric_bins_per_feat)) {
return false;
}
if (!fi->Read(&page->base_rowid)) {
return false;
}
bool is_dense = false;
if (!fi->Read(&is_dense)) {
return false;
}
page->SetDense(is_dense);
if (is_dense) {
page->index.SetBinOffset(page->cut.Ptrs());
}
if (!page->ReadColumnPage(fi)) {
return false;
}
return true;
}
[[nodiscard]] std::size_t GHistIndexRawFormat::Write(GHistIndexMatrix const& page,
common::AlignedFileWriteStream* fo) {
std::size_t bytes = 0;
// indptr
bytes += common::WriteVec(fo, page.row_ptr);
// data
// - bin type
std::underlying_type_t<common::BinTypeSize> uint_bin_type = page.index.GetBinTypeSize();
bytes += fo->Write(uint_bin_type);
// - index buffer
std::vector<std::uint8_t> data(page.index.begin(), page.index.end());
bytes += fo->Write(static_cast<std::uint64_t>(data.size()));
if (!data.empty()) {
bytes += fo->Write(data.data(), data.size());
}
// hit count
bytes += common::WriteVec(fo, page.hit_count);
// max_bins, base row, is_dense
bytes += fo->Write(page.max_numeric_bins_per_feat);
bytes += fo->Write(page.base_rowid);
bytes += fo->Write(page.IsDense());
bytes += page.WriteColumnPage(fo);
return bytes;
}
DMLC_REGISTRY_FILE_TAG(gradient_index_format);
XGBOOST_REGISTER_GHIST_INDEX_PAGE_FORMAT(raw)
.describe("Raw GHistIndex binary data format.")
.set_body([]() { return new GHistIndexRawFormat(); });
} // namespace xgboost::data

View File

@ -0,0 +1,29 @@
/**
* Copyright 2021-2024, XGBoost contributors
*/
#pragma once
#include <cstddef> // for size_t
#include <utility> // for move
#include "../common/hist_util.h" // for HistogramCuts
#include "../common/io.h" // for AlignedFileWriteStream
#include "gradient_index.h" // for GHistIndexMatrix
#include "sparse_page_writer.h" // for SparsePageFormat
namespace xgboost::common {
class HistogramCuts;
}
namespace xgboost::data {
class GHistIndexRawFormat : public SparsePageFormat<GHistIndexMatrix> {
common::HistogramCuts cuts_;
public:
[[nodiscard]] bool Read(GHistIndexMatrix* page, common::AlignedResourceReadStream* fi) override;
[[nodiscard]] std::size_t Write(GHistIndexMatrix const& page,
common::AlignedFileWriteStream* fo) override;
explicit GHistIndexRawFormat(common::HistogramCuts cuts) : cuts_{std::move(cuts)} {}
};
} // namespace xgboost::data

View File

@ -1,5 +1,5 @@
/**
* Copyright 2021-2023, XGBoost Contributors
* Copyright 2021-2024, XGBoost Contributors
*/
#include "gradient_index_page_source.h"

View File

@ -1,27 +1,39 @@
/*!
* Copyright 2021-2022 by XGBoost Contributors
/**
* Copyright 2021-2024, XGBoost Contributors
*/
#ifndef XGBOOST_DATA_GRADIENT_INDEX_PAGE_SOURCE_H_
#define XGBOOST_DATA_GRADIENT_INDEX_PAGE_SOURCE_H_
#include <memory>
#include <utility>
#include <cmath> // for isnan
#include <cstdint> // for int32_t
#include <memory> // for shared_ptr
#include <utility> // for move
#include "gradient_index.h"
#include "sparse_page_source.h"
#include "../common/hist_util.h" // for HistogramCuts
#include "gradient_index.h" // for GHistIndexMatrix
#include "gradient_index_format.h" // for GHistIndexRawFormat
#include "sparse_page_source.h" // for PageSourceIncMixIn
#include "xgboost/base.h" // for bst_feature_t
#include "xgboost/data.h" // for BatchParam, FeatureType
#include "xgboost/span.h" // for Span
namespace xgboost {
namespace data {
class GradientIndexPageSource : public PageSourceIncMixIn<GHistIndexMatrix> {
common::HistogramCuts cuts_;
bool is_dense_;
int32_t max_bin_per_feat_;
std::int32_t max_bin_per_feat_;
common::Span<FeatureType const> feature_types_;
double sparse_thresh_;
protected:
[[nodiscard]] SparsePageFormat<GHistIndexMatrix>* CreatePageFormat() const override {
return new GHistIndexRawFormat{cuts_};
}
public:
GradientIndexPageSource(float missing, int nthreads, bst_feature_t n_features, size_t n_batches,
std::shared_ptr<Cache> cache, BatchParam param,
GradientIndexPageSource(float missing, std::int32_t nthreads, bst_feature_t n_features,
size_t n_batches, std::shared_ptr<Cache> cache, BatchParam param,
common::HistogramCuts cuts, bool is_dense,
common::Span<FeatureType const> feature_types,
std::shared_ptr<SparsePageSource> source)

View File

@ -54,7 +54,7 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
/**
* Generate quantiles
*/
common::HistogramCuts cuts;
auto cuts = std::make_shared<common::HistogramCuts>();
do {
// We use do while here as the first batch is fetched in ctor
CHECK_LT(ctx->Ordinal(), common::AllVisibleGPUs());
@ -104,9 +104,9 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
sketch_containers.clear();
sketch_containers.shrink_to_fit();
final_sketch.MakeCuts(ctx, &cuts, this->info_.IsColumnSplit());
final_sketch.MakeCuts(ctx, cuts.get(), this->info_.IsColumnSplit());
} else {
GetCutsFromRef(ctx, ref, Info().num_col_, p, &cuts);
GetCutsFromRef(ctx, ref, Info().num_col_, p, cuts.get());
}
this->info_.num_row_ = accumulated_rows;

View File

@ -1,7 +1,7 @@
/**
* Copyright 2021-2024, XGBoost contributors
*/
#include <memory> // for unique_ptr
#include <memory> // for shared_ptr
#include "../common/hist_util.cuh"
#include "../common/hist_util.h" // for HistogramCuts
@ -26,13 +26,13 @@ BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(Context const* ctx,
// reinitialize the cache
cache_info_.erase(id);
MakeCache(this, ".ellpack.page", cache_prefix_, &cache_info_);
std::unique_ptr<common::HistogramCuts> cuts;
std::shared_ptr<common::HistogramCuts> cuts;
if (!param.hess.empty()) {
cuts = std::make_unique<common::HistogramCuts>(
cuts = std::make_shared<common::HistogramCuts>(
common::DeviceSketchWithHessian(ctx, this, param.max_bin, param.hess));
} else {
cuts =
std::make_unique<common::HistogramCuts>(common::DeviceSketch(ctx, this, param.max_bin));
std::make_shared<common::HistogramCuts>(common::DeviceSketch(ctx, this, param.max_bin));
}
this->InitializeSparsePage(ctx); // reset after use.

View File

@ -7,6 +7,7 @@
#include <algorithm> // for min
#include <atomic> // for atomic
#include <cstdint> // for uint64_t
#include <cstdio> // for remove
#include <future> // for future
#include <memory> // for unique_ptr
@ -72,9 +73,13 @@ struct Cache {
*/
[[nodiscard]] auto View(std::size_t i) const {
std::uint64_t off = offset.at(i);
std::uint64_t len = offset.at(i + 1) - offset[i];
std::uint64_t len = this->Bytes(i);
return std::pair{off, len};
}
/**
* @brief Get the number of bytes for the i^th page.
*/
[[nodiscard]] std::uint64_t Bytes(std::size_t i) const { return offset.at(i + 1) - offset[i]; }
/**
* @brief Call this once the write for the cache is complete.
*/
@ -174,6 +179,10 @@ class SparsePageSourceImpl : public BatchIteratorImpl<S> {
ExceHandler exce_;
common::Monitor monitor_;
[[nodiscard]] virtual SparsePageFormat<S>* CreatePageFormat() const {
return ::xgboost::data::CreatePageFormat<S>("raw");
}
[[nodiscard]] bool ReadCache() {
CHECK(!at_end_);
if (!cache_info_->written) {
@ -207,7 +216,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl<S> {
*GlobalConfigThreadLocalStore::Get() = config;
auto page = std::make_shared<S>();
this->exce_.Run([&] {
std::unique_ptr<SparsePageFormat<S>> fmt{CreatePageFormat<S>("raw")};
std::unique_ptr<SparsePageFormat<S>> fmt{this->CreatePageFormat()};
auto name = self->cache_info_->ShardName();
auto [offset, length] = self->cache_info_->View(fetch_it);
auto fi = std::make_unique<common::PrivateMmapConstStream>(name, offset, length);
@ -234,7 +243,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl<S> {
CHECK(!cache_info_->written);
common::Timer timer;
timer.Start();
std::unique_ptr<SparsePageFormat<S>> fmt{CreatePageFormat<S>("raw")};
std::unique_ptr<SparsePageFormat<S>> fmt{this->CreatePageFormat()};
auto name = cache_info_->ShardName();
std::unique_ptr<common::AlignedFileWriteStream> fo;
@ -404,8 +413,8 @@ class PageSourceIncMixIn : public SparsePageSourceImpl<S> {
bool sync_{true};
public:
PageSourceIncMixIn(float missing, int nthreads, bst_feature_t n_features, std::uint32_t n_batches,
std::shared_ptr<Cache> cache, bool sync)
PageSourceIncMixIn(float missing, std::int32_t nthreads, bst_feature_t n_features,
std::uint32_t n_batches, std::shared_ptr<Cache> cache, bool sync)
: Super::SparsePageSourceImpl{missing, nthreads, n_features, n_batches, cache}, sync_{sync} {}
[[nodiscard]] PageSourceIncMixIn& operator++() final {

View File

@ -10,7 +10,6 @@
#include <string> // for string
#include "../common/io.h" // for AlignedResourceReadStream, AlignedFileWriteStream
#include "dmlc/io.h" // for Stream
#include "dmlc/registry.h" // for Registry, FunctionRegEntryBase
#include "xgboost/data.h" // for SparsePage,CSCPage,SortedCSCPage,EllpackPage ...

View File

@ -7,7 +7,6 @@
#include <dmlc/thread_local.h>
#include "xgboost/global_config.h"
#include "xgboost/json.h"
namespace xgboost {
DMLC_REGISTER_PARAMETER(GlobalConfiguration);

View File

@ -1,5 +1,5 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
* Copyright 2020-2024, XGBoost Contributors
*/
#ifndef FEATURE_GROUPS_CUH_
#define FEATURE_GROUPS_CUH_
@ -29,22 +29,23 @@ struct FeatureGroup {
/** The number of features in the group. */
int num_features;
/** The first bin in the group. */
int start_bin;
bst_bin_t start_bin;
/** The number of bins in the group. */
int num_bins;
bst_bin_t num_bins;
};
/** \brief FeatureGroupsAccessor is a non-owning accessor for FeatureGroups. */
struct FeatureGroupsAccessor {
FeatureGroupsAccessor(common::Span<const int> feature_segments_,
common::Span<const int> bin_segments_, int max_group_bins_) :
feature_segments(feature_segments_), bin_segments(bin_segments_),
max_group_bins(max_group_bins_) {}
common::Span<const int> bin_segments_, int max_group_bins_)
: feature_segments(feature_segments_),
bin_segments(bin_segments_),
max_group_bins(max_group_bins_) {}
common::Span<const int> feature_segments;
common::Span<const int> bin_segments;
int max_group_bins;
/** \brief Gets the number of feature groups. */
__host__ __device__ int NumGroups() const {
return feature_segments.size() - 1;
@ -84,7 +85,7 @@ struct FeatureGroups {
/** Maximum number of bins in a group. Useful to compute the amount of dynamic
shared memory when launching a kernel. */
int max_group_bins;
/** Creates feature groups by splitting features into groups.
\param cuts Histogram cuts that given the number of bins per feature.
\param is_dense Whether the data matrix is dense.
@ -110,7 +111,7 @@ struct FeatureGroups {
private:
void InitSingle(const common::HistogramCuts& cuts);
};
};
} // namespace tree
} // namespace xgboost

View File

@ -167,7 +167,7 @@ GradientBasedSample ExternalMemoryNoSampling::Sample(Context const* ctx,
for (auto& batch : dmat->GetBatches<EllpackPage>(ctx, batch_param_)) {
auto page = batch.Impl();
if (!page_) {
page_ = std::make_unique<EllpackPageImpl>(ctx->Device(), page->Cuts(), page->is_dense,
page_ = std::make_unique<EllpackPageImpl>(ctx->Device(), page->CutsShared(), page->is_dense,
page->row_stride, dmat->Info().num_row_);
}
size_t num_elements = page_->Copy(ctx->Device(), page, offset);
@ -228,7 +228,7 @@ GradientBasedSample ExternalMemoryUniformSampling::Sample(Context const* ctx,
auto first_page = (*batch_iterator.begin()).Impl();
// Create a new ELLPACK page with empty rows.
page_.reset(); // Release the device memory first before reallocating
page_.reset(new EllpackPageImpl(ctx->Device(), first_page->Cuts(), first_page->is_dense,
page_.reset(new EllpackPageImpl(ctx->Device(), first_page->CutsShared(), first_page->is_dense,
first_page->row_stride, sample_rows));
// Compact the ELLPACK pages into the single sample page.
@ -306,7 +306,7 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* c
auto first_page = (*batch_iterator.begin()).Impl();
// Create a new ELLPACK page with empty rows.
page_.reset(); // Release the device memory first before reallocating
page_.reset(new EllpackPageImpl(ctx->Device(), first_page->Cuts(), first_page->is_dense,
page_.reset(new EllpackPageImpl(ctx->Device(), first_page->CutsShared(), first_page->is_dense,
first_page->row_stride, sample_rows));
// Compact the ELLPACK pages into the single sample page.

View File

@ -56,8 +56,7 @@ TEST(HistUtil, DeviceSketch) {
TEST(HistUtil, SketchBatchNumElements) {
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
LOG(WARNING) << "Test not runnable with RMM enabled.";
return;
GTEST_SKIP_("Test not runnable with RMM enabled.");
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
size_t constexpr kCols = 10000;
int device;

View File

@ -152,7 +152,7 @@ TEST(EllpackPage, Copy) {
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
// Create an empty result page.
EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kRows);
EllpackPageImpl result(FstCU(), page->CutsShared(), page->is_dense, page->row_stride, kRows);
// Copy batch pages into the result page.
size_t offset = 0;
@ -200,7 +200,8 @@ TEST(EllpackPage, Compact) {
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
// Create an empty result page.
EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kCompactedRows);
EllpackPageImpl result(FstCU(), page->CutsShared(), page->is_dense, page->row_stride,
kCompactedRows);
// Compact batch pages into the result page.
std::vector<size_t> row_indexes_h {

View File

@ -1,14 +1,14 @@
/**
* Copyright 2021-2023, XGBoost contributors
* Copyright 2021-2024, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
#include "../../../src/common/io.h" // for PrivateMmapConstStream, AlignedResourceReadStream...
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/data/sparse_page_source.h"
#include "../../../src/tree/param.h" // TrainParam
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../../../src/data/ellpack_page_raw_format.h" // for EllpackPageRawFormat
#include "../../../src/tree/param.h" // TrainParam
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
namespace xgboost::data {
@ -16,12 +16,18 @@ TEST(EllpackPageRawFormat, IO) {
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
std::unique_ptr<SparsePageFormat<EllpackPage>> format{CreatePageFormat<EllpackPage>("raw")};
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
dmlc::TemporaryDirectory tmpdir;
std::string path = tmpdir.path + "/ellpack.page";
std::shared_ptr<common::HistogramCuts const> cuts;
for (auto const& page : m->GetBatches<EllpackPage>(&ctx, param)) {
cuts = page.Impl()->CutsShared();
}
cuts->SetDevice(ctx.Device());
auto format = std::make_unique<EllpackPageRawFormat>(cuts);
std::size_t n_bytes{0};
{
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
@ -33,7 +39,7 @@ TEST(EllpackPageRawFormat, IO) {
EllpackPage page;
std::unique_ptr<common::AlignedResourceReadStream> fi{
std::make_unique<common::PrivateMmapConstStream>(path.c_str(), 0, n_bytes)};
format->Read(&page, fi.get());
ASSERT_TRUE(format->Read(&page, fi.get()));
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
auto loaded = page.Impl();

View File

@ -7,23 +7,28 @@
#include <cstddef> // for size_t
#include <memory> // for unique_ptr
#include "../../../src/common/column_matrix.h"
#include "../../../src/common/io.h" // for MmapResource, AlignedResourceReadStream...
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
#include "../../../src/data/sparse_page_writer.h" // for CreatePageFormat
#include "../helpers.h" // for RandomDataGenerator
#include "../../../src/common/column_matrix.h" // for common::ColumnMatrix
#include "../../../src/common/io.h" // for MmapResource, AlignedResourceReadStream...
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
#include "../../../src/data/gradient_index_format.h" // for GHistIndexRawFormat
#include "../helpers.h" // for RandomDataGenerator
namespace xgboost::data {
TEST(GHistIndexPageRawFormat, IO) {
Context ctx;
std::unique_ptr<SparsePageFormat<GHistIndexMatrix>> format{
CreatePageFormat<GHistIndexMatrix>("raw")};
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
dmlc::TemporaryDirectory tmpdir;
std::string path = tmpdir.path + "/ghistindex.page";
auto batch = BatchParam{256, 0.5};
common::HistogramCuts cuts;
for (auto const &index : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
cuts = index.Cuts();
break;
}
auto format = std::make_unique<GHistIndexRawFormat>(std::move(cuts));
std::size_t bytes{0};
{
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
@ -36,7 +41,7 @@ TEST(GHistIndexPageRawFormat, IO) {
std::unique_ptr<common::AlignedResourceReadStream> fi{
std::make_unique<common::PrivateMmapConstStream>(path, 0, bytes)};
format->Read(&page, fi.get());
ASSERT_TRUE(format->Read(&page, fi.get()));
for (auto const &gidx : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
auto const &loaded = gidx;

View File

@ -20,9 +20,8 @@ void TestEquivalent(float sparsity) {
std::numeric_limits<float>::quiet_NaN(), 0, 256);
std::size_t offset = 0;
auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
std::unique_ptr<EllpackPageImpl> page_concatenated {
new EllpackPageImpl(ctx.Device(), first->Cuts(), first->is_dense,
first->row_stride, 1000 * 100)};
std::unique_ptr<EllpackPageImpl> page_concatenated{new EllpackPageImpl(
ctx.Device(), first->CutsShared(), first->is_dense, first->row_stride, 1000 * 100)};
for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {
auto page = batch.Impl();
size_t num_elements = page_concatenated->Copy(ctx.Device(), page, offset);

View File

@ -171,6 +171,12 @@ TEST(SparsePageDMatrix, GHistIndexSkipSparsePage) {
// Restore the batch parameter by passing it in again through check_ghist
check_ghist();
}
// half the pages
auto it = Xy->GetBatches<SparsePage>(&ctx).begin();
for (std::int32_t i = 0; i < 3; ++i) {
++it;
}
check_ghist();
}
TEST(SparsePageDMatrix, MetaInfo) {

View File

@ -164,9 +164,9 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
size_t offset = 0;
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
if (!impl_ext) {
impl_ext = std::make_unique<EllpackPageImpl>(batch.Impl()->gidx_buffer.Device(),
batch.Impl()->Cuts(), batch.Impl()->is_dense,
batch.Impl()->row_stride, kRows);
impl_ext = std::make_unique<EllpackPageImpl>(
batch.Impl()->gidx_buffer.Device(), batch.Impl()->CutsShared(), batch.Impl()->is_dense,
batch.Impl()->row_stride, kRows);
}
auto n_elems = impl_ext->Copy(ctx.Device(), batch.Impl(), offset);
offset += n_elems;

View File

@ -13,31 +13,25 @@
namespace xgboost {
#if defined(__CUDACC__)
namespace {
namespace detail {
class HistogramCutsWrapper : public common::HistogramCuts {
public:
using SuperT = common::HistogramCuts;
void SetValues(std::vector<float> cuts) {
SuperT::cut_values_.HostVector() = std::move(cuts);
}
void SetPtrs(std::vector<uint32_t> ptrs) {
SuperT::cut_ptrs_.HostVector() = std::move(ptrs);
}
void SetMins(std::vector<float> mins) {
SuperT::min_vals_.HostVector() = std::move(mins);
}
void SetValues(std::vector<float> cuts) { SuperT::cut_values_.HostVector() = std::move(cuts); }
void SetPtrs(std::vector<uint32_t> ptrs) { SuperT::cut_ptrs_.HostVector() = std::move(ptrs); }
void SetMins(std::vector<float> mins) { SuperT::min_vals_.HostVector() = std::move(mins); }
};
} // anonymous namespace
} // namespace detail
inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(int n_rows, int n_cols,
bst_float sparsity = 0) {
auto dmat = RandomDataGenerator(n_rows, n_cols, sparsity).Seed(3).GenerateDMatrix();
const SparsePage& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
HistogramCutsWrapper cmat;
cmat.SetPtrs({0, 3, 6, 9, 12, 15, 18, 21, 24});
auto cmat = std::make_shared<detail::HistogramCutsWrapper>();
cmat->SetPtrs({0, 3, 6, 9, 12, 15, 18, 21, 24});
// 24 cut fields, 3 cut fields for each feature (column).
cmat.SetValues({0.30f, 0.67f, 1.64f,
cmat->SetValues({0.30f, 0.67f, 1.64f,
0.32f, 0.77f, 1.95f,
0.29f, 0.70f, 1.80f,
0.32f, 0.75f, 1.85f,
@ -45,7 +39,7 @@ inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(int n_rows, int n_cols,
0.25f, 0.74f, 2.00f,
0.26f, 0.74f, 1.98f,
0.26f, 0.71f, 1.83f});
cmat.SetMins({0.1f, 0.2f, 0.3f, 0.1f, 0.2f, 0.3f, 0.2f, 0.2f});
cmat->SetMins({0.1f, 0.2f, 0.3f, 0.1f, 0.2f, 0.3f, 0.2f, 0.2f});
bst_idx_t row_stride = 0;
const auto &offset_vec = batch.offset.ConstHostVector();

View File

@ -150,13 +150,13 @@ TEST(GpuHist, BuildHistSharedMem) {
TestBuildHist<GradientPairPrecise>(true);
}
HistogramCutsWrapper GetHostCutMatrix () {
HistogramCutsWrapper cmat;
cmat.SetPtrs({0, 3, 6, 9, 12, 15, 18, 21, 24});
cmat.SetMins({0.1f, 0.2f, 0.3f, 0.1f, 0.2f, 0.3f, 0.2f, 0.2f});
std::shared_ptr<detail::HistogramCutsWrapper> GetHostCutMatrix () {
auto cmat = std::make_shared<detail::HistogramCutsWrapper>();
cmat->SetPtrs({0, 3, 6, 9, 12, 15, 18, 21, 24});
cmat->SetMins({0.1f, 0.2f, 0.3f, 0.1f, 0.2f, 0.3f, 0.2f, 0.2f});
// 24 cut fields, 3 cut fields for each feature (column).
// Each row of the cut represents the cuts for a data column.
cmat.SetValues({0.30f, 0.67f, 1.64f,
cmat->SetValues({0.30f, 0.67f, 1.64f,
0.32f, 0.77f, 1.95f,
0.29f, 0.70f, 1.80f,
0.32f, 0.75f, 1.85f,