Use Booster context in DMatrix. (#8896)
- Pass context from booster to DMatrix. - Use context instead of integer for `n_threads`. - Check the consistency configuration for `max_bin`. - Test for all combinations of initialization options.
This commit is contained in:
33
src/data/batch_utils.h
Normal file
33
src/data/batch_utils.h
Normal file
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_DATA_BATCH_UTILS_H_
|
||||
#define XGBOOST_DATA_BATCH_UTILS_H_
|
||||
|
||||
#include "xgboost/data.h" // for BatchParam
|
||||
|
||||
namespace xgboost::data::detail {
|
||||
// At least one batch parameter is initialized.
|
||||
inline void CheckEmpty(BatchParam const& l, BatchParam const& r) {
|
||||
if (!l.Initialized()) {
|
||||
CHECK(r.Initialized()) << "Batch parameter is not initialized.";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Should we regenerate the gradient index?
|
||||
*
|
||||
* \param old Parameter stored in DMatrix.
|
||||
* \param p New parameter passed in by caller.
|
||||
*/
|
||||
inline bool RegenGHist(BatchParam old, BatchParam p) {
|
||||
// Parameter is renewed or caller requests a regen
|
||||
if (!p.Initialized()) {
|
||||
// Empty parameter is passed in, don't regenerate so that we can use gindex in
|
||||
// predictor, which doesn't have any training parameter.
|
||||
return false;
|
||||
}
|
||||
return p.regen || old.ParamNotEqual(p);
|
||||
}
|
||||
} // namespace xgboost::data::detail
|
||||
#endif // XGBOOST_DATA_BATCH_UTILS_H_
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2019 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost contributors
|
||||
*/
|
||||
#ifndef XGBOOST_USE_CUDA
|
||||
|
||||
@@ -12,7 +12,7 @@ class EllpackPageImpl {};
|
||||
|
||||
EllpackPage::EllpackPage() = default;
|
||||
|
||||
EllpackPage::EllpackPage(DMatrix*, const BatchParam&) {
|
||||
EllpackPage::EllpackPage(Context const*, DMatrix*, const BatchParam&) {
|
||||
LOG(FATAL) << "Internal Error: XGBoost is not compiled with CUDA but "
|
||||
"EllpackPage is required";
|
||||
}
|
||||
|
||||
@@ -17,8 +17,8 @@ namespace xgboost {
|
||||
|
||||
EllpackPage::EllpackPage() : impl_{new EllpackPageImpl()} {}
|
||||
|
||||
EllpackPage::EllpackPage(DMatrix* dmat, const BatchParam& param)
|
||||
: impl_{new EllpackPageImpl(dmat, param)} {}
|
||||
EllpackPage::EllpackPage(Context const* ctx, DMatrix* dmat, const BatchParam& param)
|
||||
: impl_{new EllpackPageImpl{ctx, dmat, param}} {}
|
||||
|
||||
EllpackPage::~EllpackPage() = default;
|
||||
|
||||
@@ -105,29 +105,29 @@ EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts,
|
||||
}
|
||||
|
||||
// Construct an ELLPACK matrix in memory.
|
||||
EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param)
|
||||
EllpackPageImpl::EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchParam& param)
|
||||
: is_dense(dmat->IsDense()) {
|
||||
monitor_.Init("ellpack_page");
|
||||
dh::safe_cuda(cudaSetDevice(param.gpu_id));
|
||||
dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
|
||||
|
||||
n_rows = dmat->Info().num_row_;
|
||||
|
||||
monitor_.Start("Quantiles");
|
||||
// Create the quantile sketches for the dmatrix and initialize HistogramCuts.
|
||||
row_stride = GetRowStride(dmat);
|
||||
cuts_ = common::DeviceSketch(param.gpu_id, dmat, param.max_bin);
|
||||
cuts_ = common::DeviceSketch(ctx->gpu_id, dmat, param.max_bin);
|
||||
monitor_.Stop("Quantiles");
|
||||
|
||||
monitor_.Start("InitCompressedData");
|
||||
this->InitCompressedData(param.gpu_id);
|
||||
this->InitCompressedData(ctx->gpu_id);
|
||||
monitor_.Stop("InitCompressedData");
|
||||
|
||||
dmat->Info().feature_types.SetDevice(param.gpu_id);
|
||||
dmat->Info().feature_types.SetDevice(ctx->gpu_id);
|
||||
auto ft = dmat->Info().feature_types.ConstDeviceSpan();
|
||||
monitor_.Start("BinningCompression");
|
||||
CHECK(dmat->SingleColBlock());
|
||||
for (const auto& batch : dmat->GetBatches<SparsePage>()) {
|
||||
CreateHistIndices(param.gpu_id, batch, ft);
|
||||
CreateHistIndices(ctx->gpu_id, batch, ft);
|
||||
}
|
||||
monitor_.Stop("BinningCompression");
|
||||
}
|
||||
|
||||
@@ -155,7 +155,7 @@ class EllpackPageImpl {
|
||||
* This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix
|
||||
* in CSR format.
|
||||
*/
|
||||
explicit EllpackPageImpl(DMatrix* dmat, const BatchParam& parm);
|
||||
explicit EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchParam& parm);
|
||||
|
||||
template <typename AdapterBatch>
|
||||
explicit EllpackPageImpl(AdapterBatch batch, float missing, int device, bool is_dense,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2019-2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost contributors
|
||||
*/
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
@@ -10,7 +10,7 @@
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
void EllpackPageSource::Fetch() {
|
||||
dh::safe_cuda(cudaSetDevice(param_.gpu_id));
|
||||
dh::safe_cuda(cudaSetDevice(device_));
|
||||
if (!this->ReadCache()) {
|
||||
if (count_ != 0 && !sync_) {
|
||||
// source is initialized to be the 0th page during construction, so when count_ is 0
|
||||
@@ -22,8 +22,7 @@ void EllpackPageSource::Fetch() {
|
||||
auto const &csr = source_->Page();
|
||||
this->page_.reset(new EllpackPage{});
|
||||
auto *impl = this->page_->Impl();
|
||||
*impl = EllpackPageImpl(param_.gpu_id, *cuts_, *csr, is_dense_, row_stride_,
|
||||
feature_types_);
|
||||
*impl = EllpackPageImpl(device_, *cuts_, *csr, is_dense_, row_stride_, feature_types_);
|
||||
page_->SetBaseRowId(csr->base_rowid);
|
||||
this->WriteCache();
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2019-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost Contributors
|
||||
*/
|
||||
|
||||
#ifndef XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_
|
||||
@@ -23,19 +23,21 @@ class EllpackPageSource : public PageSourceIncMixIn<EllpackPage> {
|
||||
BatchParam param_;
|
||||
common::Span<FeatureType const> feature_types_;
|
||||
std::unique_ptr<common::HistogramCuts> cuts_;
|
||||
std::int32_t device_;
|
||||
|
||||
public:
|
||||
EllpackPageSource(float missing, int nthreads, bst_feature_t n_features, size_t n_batches,
|
||||
std::shared_ptr<Cache> cache, BatchParam param,
|
||||
std::unique_ptr<common::HistogramCuts> cuts, bool is_dense, size_t row_stride,
|
||||
common::Span<FeatureType const> feature_types,
|
||||
std::shared_ptr<SparsePageSource> source)
|
||||
std::shared_ptr<SparsePageSource> source, std::int32_t device)
|
||||
: PageSourceIncMixIn(missing, nthreads, n_features, n_batches, cache, false),
|
||||
is_dense_{is_dense},
|
||||
row_stride_{row_stride},
|
||||
param_{std::move(param)},
|
||||
feature_types_{feature_types},
|
||||
cuts_{std::move(cuts)} {
|
||||
cuts_{std::move(cuts)},
|
||||
device_{device} {
|
||||
this->source_ = source;
|
||||
this->Fetch();
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2017-2023, XGBoost Contributors
|
||||
* \brief Data type for fast histogram aggregation.
|
||||
*/
|
||||
#include "gradient_index.h"
|
||||
@@ -19,18 +19,18 @@ namespace xgboost {
|
||||
|
||||
GHistIndexMatrix::GHistIndexMatrix() : columns_{std::make_unique<common::ColumnMatrix>()} {}
|
||||
|
||||
GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat,
|
||||
double sparse_thresh, bool sorted_sketch, int32_t n_threads,
|
||||
GHistIndexMatrix::GHistIndexMatrix(Context const *ctx, DMatrix *p_fmat, bst_bin_t max_bins_per_feat,
|
||||
double sparse_thresh, bool sorted_sketch,
|
||||
common::Span<float> hess)
|
||||
: max_numeric_bins_per_feat{max_bins_per_feat} {
|
||||
CHECK(p_fmat->SingleColBlock());
|
||||
// We use sorted sketching for approx tree method since it's more efficient in
|
||||
// computation time (but higher memory usage).
|
||||
cut = common::SketchOnDMatrix(p_fmat, max_bins_per_feat, n_threads, sorted_sketch, hess);
|
||||
cut = common::SketchOnDMatrix(ctx, p_fmat, max_bins_per_feat, sorted_sketch, hess);
|
||||
|
||||
const uint32_t nbins = cut.Ptrs().back();
|
||||
hit_count.resize(nbins, 0);
|
||||
hit_count_tloc_.resize(n_threads * nbins, 0);
|
||||
hit_count_tloc_.resize(ctx->Threads() * nbins, 0);
|
||||
|
||||
size_t new_size = 1;
|
||||
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
@@ -45,7 +45,7 @@ GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat,
|
||||
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
|
||||
|
||||
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
this->PushBatch(batch, ft, n_threads);
|
||||
this->PushBatch(batch, ft, ctx->Threads());
|
||||
}
|
||||
this->columns_ = std::make_unique<common::ColumnMatrix>();
|
||||
|
||||
@@ -54,7 +54,7 @@ GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat,
|
||||
// hist
|
||||
CHECK(!sorted_sketch);
|
||||
for (auto const &page : p_fmat->GetBatches<SparsePage>()) {
|
||||
this->columns_->InitFromSparse(page, *this, sparse_thresh, n_threads);
|
||||
this->columns_->InitFromSparse(page, *this, sparse_thresh, ctx->Threads());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#include "../common/threading_utils.h"
|
||||
#include "../common/transform_iterator.h" // for MakeIndexTransformIter
|
||||
#include "adapter.h"
|
||||
#include "proxy_dmatrix.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
|
||||
@@ -155,8 +154,8 @@ class GHistIndexMatrix {
|
||||
/**
|
||||
* \brief Constrcutor for SimpleDMatrix.
|
||||
*/
|
||||
GHistIndexMatrix(DMatrix* x, bst_bin_t max_bins_per_feat, double sparse_thresh,
|
||||
bool sorted_sketch, int32_t n_threads, common::Span<float> hess = {});
|
||||
GHistIndexMatrix(Context const* ctx, DMatrix* x, bst_bin_t max_bins_per_feat,
|
||||
double sparse_thresh, bool sorted_sketch, common::Span<float> hess = {});
|
||||
/**
|
||||
* \brief Constructor for Iterative DMatrix. Initialize basic information and prepare
|
||||
* for push batch.
|
||||
@@ -295,28 +294,5 @@ void AssignColumnBinIndex(GHistIndexMatrix const& page, Fn&& assign) {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Should we regenerate the gradient index?
|
||||
*
|
||||
* \param old Parameter stored in DMatrix.
|
||||
* \param p New parameter passed in by caller.
|
||||
*/
|
||||
inline bool RegenGHist(BatchParam old, BatchParam p) {
|
||||
// parameter is renewed or caller requests a regen
|
||||
if (p == BatchParam{}) {
|
||||
// empty parameter is passed in, don't regenerate so that we can use gindex in
|
||||
// predictor, which doesn't have any training parameter.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Avoid comparing nan values.
|
||||
bool l_nan = std::isnan(old.sparse_thresh);
|
||||
bool r_nan = std::isnan(p.sparse_thresh);
|
||||
// regenerate if parameter is changed.
|
||||
bool st_chg = (l_nan != r_nan) || (!l_nan && !r_nan && (old.sparse_thresh != p.sparse_thresh));
|
||||
bool param_chg = old.gpu_id != p.gpu_id || old.max_bin != p.max_bin;
|
||||
return p.regen || param_chg || st_chg;
|
||||
}
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_DATA_GRADIENT_INDEX_H_
|
||||
|
||||
@@ -1,25 +1,26 @@
|
||||
/*!
|
||||
* Copyright 2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2022-2023, XGBoost contributors
|
||||
*/
|
||||
#include "iterative_dmatrix.h"
|
||||
|
||||
#include <algorithm> // std::copy
|
||||
#include <cstddef> // std::size_t
|
||||
#include <type_traits> // std::underlying_type_t
|
||||
#include <vector> // std::vector
|
||||
#include <algorithm> // for copy
|
||||
#include <cstddef> // for size_t
|
||||
#include <memory> // for shared_ptr
|
||||
#include <type_traits> // for underlying_type_t
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/categorical.h" // common::IsCat
|
||||
#include "../common/column_matrix.h"
|
||||
#include "../tree/param.h" // FIXME(jiamingy): Find a better way to share this parameter.
|
||||
#include "../tree/param.h" // FIXME(jiamingy): Find a better way to share this parameter.
|
||||
#include "batch_utils.h" // for RegenGHist
|
||||
#include "gradient_index.h"
|
||||
#include "proxy_dmatrix.h"
|
||||
#include "simple_batch_iterator.h"
|
||||
#include "xgboost/data.h" // FeatureType
|
||||
#include "xgboost/data.h" // for FeatureType, DMatrix
|
||||
#include "xgboost/logging.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
namespace xgboost::data {
|
||||
IterativeDMatrix::IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy,
|
||||
std::shared_ptr<DMatrix> ref, DataIterResetCallback* reset,
|
||||
XGDMatrixCallbackNext* next, float missing, int nthread,
|
||||
@@ -34,60 +35,61 @@ IterativeDMatrix::IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle pro
|
||||
|
||||
auto d = MakeProxy(proxy_)->DeviceIdx();
|
||||
|
||||
StringView msg{"All batch should be on the same device."};
|
||||
if (batch_param_.gpu_id != Context::kCpuId) {
|
||||
CHECK_EQ(d, batch_param_.gpu_id) << msg;
|
||||
}
|
||||
|
||||
batch_param_ = BatchParam{d, max_bin};
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(d)}});
|
||||
// hardcoded parameter.
|
||||
batch_param_.sparse_thresh = tree::TrainParam::DftSparseThreshold();
|
||||
BatchParam p{max_bin, tree::TrainParam::DftSparseThreshold()};
|
||||
|
||||
ctx_.UpdateAllowUnknown(
|
||||
Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(d)}});
|
||||
if (ctx_.IsCPU()) {
|
||||
this->InitFromCPU(iter_handle, missing, ref);
|
||||
if (ctx.IsCPU()) {
|
||||
this->InitFromCPU(&ctx, p, iter_handle, missing, ref);
|
||||
} else {
|
||||
this->InitFromCUDA(iter_handle, missing, ref);
|
||||
this->InitFromCUDA(&ctx, p, iter_handle, missing, ref);
|
||||
}
|
||||
|
||||
this->fmat_ctx_ = ctx;
|
||||
this->batch_ = p;
|
||||
}
|
||||
|
||||
void GetCutsFromRef(std::shared_ptr<DMatrix> ref_, bst_feature_t n_features, BatchParam p,
|
||||
common::HistogramCuts* p_cuts) {
|
||||
CHECK(ref_);
|
||||
void GetCutsFromRef(Context const* ctx, std::shared_ptr<DMatrix> ref, bst_feature_t n_features,
|
||||
BatchParam p, common::HistogramCuts* p_cuts) {
|
||||
CHECK(ref);
|
||||
CHECK(p_cuts);
|
||||
auto csr = [&]() {
|
||||
for (auto const& page : ref_->GetBatches<GHistIndexMatrix>(p)) {
|
||||
p.forbid_regen = true;
|
||||
// Fetch cuts from GIDX
|
||||
auto csr = [&] {
|
||||
for (auto const& page : ref->GetBatches<GHistIndexMatrix>(ctx, p)) {
|
||||
*p_cuts = page.cut;
|
||||
break;
|
||||
}
|
||||
};
|
||||
auto ellpack = [&]() {
|
||||
// workaround ellpack being initialized from CPU.
|
||||
if (p.gpu_id == Context::kCpuId) {
|
||||
p.gpu_id = ref_->Ctx()->gpu_id;
|
||||
}
|
||||
if (p.gpu_id == Context::kCpuId) {
|
||||
p.gpu_id = 0;
|
||||
}
|
||||
for (auto const& page : ref_->GetBatches<EllpackPage>(p)) {
|
||||
// Fetch cuts from Ellpack.
|
||||
auto ellpack = [&] {
|
||||
for (auto const& page : ref->GetBatches<EllpackPage>(ctx, p)) {
|
||||
GetCutsFromEllpack(page, p_cuts);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
if (ref_->PageExists<GHistIndexMatrix>()) {
|
||||
if (ref->PageExists<GHistIndexMatrix>() && ref->PageExists<EllpackPage>()) {
|
||||
// Both exists
|
||||
if (ctx->IsCPU()) {
|
||||
csr();
|
||||
} else {
|
||||
ellpack();
|
||||
}
|
||||
} else if (ref->PageExists<GHistIndexMatrix>()) {
|
||||
csr();
|
||||
} else if (ref_->PageExists<EllpackPage>()) {
|
||||
} else if (ref->PageExists<EllpackPage>()) {
|
||||
ellpack();
|
||||
} else {
|
||||
if (p.gpu_id == Context::kCpuId) {
|
||||
// None exist
|
||||
if (ctx->IsCPU()) {
|
||||
csr();
|
||||
} else {
|
||||
ellpack();
|
||||
}
|
||||
}
|
||||
CHECK_EQ(ref_->Info().num_col_, n_features)
|
||||
CHECK_EQ(ref->Info().num_col_, n_features)
|
||||
<< "Invalid ref DMatrix, different number of features.";
|
||||
}
|
||||
|
||||
@@ -112,7 +114,8 @@ void SyncFeatureType(std::vector<FeatureType>* p_h_ft) {
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
void IterativeDMatrix::InitFromCPU(Context const* ctx, BatchParam const& p,
|
||||
DataIterHandle iter_handle, float missing,
|
||||
std::shared_ptr<DMatrix> ref) {
|
||||
DMatrixProxy* proxy = MakeProxy(proxy_);
|
||||
CHECK(proxy);
|
||||
@@ -133,7 +136,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
auto const is_valid = data::IsValidFunctor{missing};
|
||||
auto nnz_cnt = [&]() {
|
||||
return HostAdapterDispatch(proxy, [&](auto const& value) {
|
||||
size_t n_threads = ctx_.Threads();
|
||||
size_t n_threads = ctx->Threads();
|
||||
size_t n_features = column_sizes.size();
|
||||
linalg::Tensor<std::size_t, 2> column_sizes_tloc({n_threads, n_features}, Context::kCpuId);
|
||||
column_sizes_tloc.Data()->Fill(0ul);
|
||||
@@ -158,10 +161,10 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
});
|
||||
};
|
||||
|
||||
size_t n_features = 0;
|
||||
size_t n_batches = 0;
|
||||
size_t accumulated_rows{0};
|
||||
size_t nnz{0};
|
||||
std::uint64_t n_features = 0;
|
||||
std::size_t n_batches = 0;
|
||||
std::uint64_t accumulated_rows{0};
|
||||
std::uint64_t nnz{0};
|
||||
|
||||
/**
|
||||
* CPU impl needs an additional loop for accumulating the column size.
|
||||
@@ -203,7 +206,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
accumulated_rows = 0;
|
||||
std::vector<FeatureType> h_ft;
|
||||
if (ref) {
|
||||
GetCutsFromRef(ref, Info().num_col_, batch_param_, &cuts);
|
||||
GetCutsFromRef(ctx, ref, Info().num_col_, p, &cuts);
|
||||
h_ft = ref->Info().feature_types.HostVector();
|
||||
} else {
|
||||
size_t i = 0;
|
||||
@@ -211,9 +214,8 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
if (!p_sketch) {
|
||||
h_ft = proxy->Info().feature_types.ConstHostVector();
|
||||
SyncFeatureType(&h_ft);
|
||||
p_sketch.reset(new common::HostSketchContainer{
|
||||
batch_param_.max_bin, h_ft, column_sizes, !proxy->Info().group_ptr_.empty(),
|
||||
ctx_.Threads()});
|
||||
p_sketch.reset(new common::HostSketchContainer{ctx, p.max_bin, h_ft, column_sizes,
|
||||
!proxy->Info().group_ptr_.empty()});
|
||||
}
|
||||
HostAdapterDispatch(proxy, [&](auto const& batch) {
|
||||
proxy->Info().num_nonzero_ = batch_nnz[i];
|
||||
@@ -237,15 +239,15 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
/**
|
||||
* Generate gradient index.
|
||||
*/
|
||||
this->ghist_ = std::make_unique<GHistIndexMatrix>(Info(), std::move(cuts), batch_param_.max_bin);
|
||||
this->ghist_ = std::make_unique<GHistIndexMatrix>(Info(), std::move(cuts), p.max_bin);
|
||||
size_t rbegin = 0;
|
||||
size_t prev_sum = 0;
|
||||
size_t i = 0;
|
||||
while (iter.Next()) {
|
||||
HostAdapterDispatch(proxy, [&](auto const& batch) {
|
||||
proxy->Info().num_nonzero_ = batch_nnz[i];
|
||||
this->ghist_->PushAdapterBatch(&ctx_, rbegin, prev_sum, batch, missing, h_ft,
|
||||
batch_param_.sparse_thresh, Info().num_row_);
|
||||
this->ghist_->PushAdapterBatch(ctx, rbegin, prev_sum, batch, missing, h_ft, p.sparse_thresh,
|
||||
Info().num_row_);
|
||||
});
|
||||
if (n_batches != 1) {
|
||||
this->info_.Extend(std::move(proxy->Info()), false, true);
|
||||
@@ -265,7 +267,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
accumulated_rows = 0;
|
||||
while (iter.Next()) {
|
||||
HostAdapterDispatch(proxy, [&](auto const& batch) {
|
||||
this->ghist_->PushAdapterBatchColumns(&ctx_, batch, missing, accumulated_rows);
|
||||
this->ghist_->PushAdapterBatchColumns(ctx, batch, missing, accumulated_rows);
|
||||
});
|
||||
accumulated_rows += num_rows();
|
||||
}
|
||||
@@ -282,11 +284,27 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
Info().feature_types.HostVector() = h_ft;
|
||||
}
|
||||
|
||||
BatchSet<GHistIndexMatrix> IterativeDMatrix::GetGradientIndex(BatchParam const& param) {
|
||||
CheckParam(param);
|
||||
BatchSet<GHistIndexMatrix> IterativeDMatrix::GetGradientIndex(Context const* ctx,
|
||||
BatchParam const& param) {
|
||||
if (param.Initialized()) {
|
||||
CheckParam(param);
|
||||
CHECK(!detail::RegenGHist(param, batch_)) << error::InconsistentMaxBin();
|
||||
}
|
||||
if (!ellpack_ && !ghist_) {
|
||||
LOG(FATAL) << "`QuantileDMatrix` not initialized.";
|
||||
}
|
||||
|
||||
if (!ghist_) {
|
||||
CHECK(ellpack_);
|
||||
ghist_ = std::make_shared<GHistIndexMatrix>(&ctx_, Info(), *ellpack_, param);
|
||||
if (ctx->IsCPU()) {
|
||||
ghist_ = std::make_shared<GHistIndexMatrix>(ctx, Info(), *ellpack_, param);
|
||||
} else if (fmat_ctx_.IsCPU()) {
|
||||
ghist_ = std::make_shared<GHistIndexMatrix>(&fmat_ctx_, Info(), *ellpack_, param);
|
||||
} else {
|
||||
// Can happen when QDM is initialized on GPU, but a CPU version is queried by a different QDM
|
||||
// for cut reference.
|
||||
auto cpu_ctx = ctx->MakeCPU();
|
||||
ghist_ = std::make_shared<GHistIndexMatrix>(&cpu_ctx, Info(), *ellpack_, param);
|
||||
}
|
||||
}
|
||||
|
||||
if (!std::isnan(param.sparse_thresh) &&
|
||||
@@ -300,8 +318,9 @@ BatchSet<GHistIndexMatrix> IterativeDMatrix::GetGradientIndex(BatchParam const&
|
||||
return BatchSet<GHistIndexMatrix>(begin_iter);
|
||||
}
|
||||
|
||||
BatchSet<ExtSparsePage> IterativeDMatrix::GetExtBatches(BatchParam const& param) {
|
||||
for (auto const& page : this->GetGradientIndex(param)) {
|
||||
BatchSet<ExtSparsePage> IterativeDMatrix::GetExtBatches(Context const* ctx,
|
||||
BatchParam const& param) {
|
||||
for (auto const& page : this->GetGradientIndex(ctx, param)) {
|
||||
auto p_out = std::make_shared<SparsePage>();
|
||||
p_out->data.Resize(this->Info().num_nonzero_);
|
||||
p_out->offset.Resize(this->Info().num_row_ + 1);
|
||||
@@ -336,5 +355,26 @@ BatchSet<ExtSparsePage> IterativeDMatrix::GetExtBatches(BatchParam const& param)
|
||||
BatchIterator<ExtSparsePage>(new SimpleBatchIteratorImpl<ExtSparsePage>(nullptr));
|
||||
return BatchSet<ExtSparsePage>(begin_iter);
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
inline void IterativeDMatrix::InitFromCUDA(Context const*, BatchParam const&, DataIterHandle, float,
|
||||
std::shared_ptr<DMatrix>) {
|
||||
// silent the warning about unused variables.
|
||||
(void)(proxy_);
|
||||
(void)(reset_);
|
||||
(void)(next_);
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
|
||||
inline BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(Context const* ctx,
|
||||
BatchParam const& param) {
|
||||
common::AssertGPUSupport();
|
||||
auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));
|
||||
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));
|
||||
}
|
||||
|
||||
inline void GetCutsFromEllpack(EllpackPage const&, common::HistogramCuts*) {
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -1,22 +1,24 @@
|
||||
/*!
|
||||
* Copyright 2020-2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost contributors
|
||||
*/
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
#include "../common/hist_util.cuh"
|
||||
#include "batch_utils.h" // for RegenGHist
|
||||
#include "device_adapter.cuh"
|
||||
#include "ellpack_page.cuh"
|
||||
#include "gradient_index.h"
|
||||
#include "iterative_dmatrix.h"
|
||||
#include "proxy_dmatrix.cuh"
|
||||
#include "proxy_dmatrix.h"
|
||||
#include "simple_batch_iterator.h"
|
||||
#include "sparse_page_source.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
|
||||
namespace xgboost::data {
|
||||
void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
|
||||
DataIterHandle iter_handle, float missing,
|
||||
std::shared_ptr<DMatrix> ref) {
|
||||
// A handle passed to external iterator.
|
||||
DMatrixProxy* proxy = MakeProxy(proxy_);
|
||||
@@ -46,7 +48,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
|
||||
int32_t current_device;
|
||||
dh::safe_cuda(cudaGetDevice(¤t_device));
|
||||
auto get_device = [&]() -> int32_t {
|
||||
int32_t d = (ctx_.gpu_id == Context::kCpuId) ? current_device : ctx_.gpu_id;
|
||||
std::int32_t d = (ctx->gpu_id == Context::kCpuId) ? current_device : ctx->gpu_id;
|
||||
CHECK_NE(d, Context::kCpuId);
|
||||
return d;
|
||||
};
|
||||
@@ -57,8 +59,8 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
|
||||
common::HistogramCuts cuts;
|
||||
do {
|
||||
// We use do while here as the first batch is fetched in ctor
|
||||
ctx_.gpu_id = proxy->DeviceIdx();
|
||||
CHECK_LT(ctx_.gpu_id, common::AllVisibleGPUs());
|
||||
// ctx_.gpu_id = proxy->DeviceIdx();
|
||||
CHECK_LT(ctx->gpu_id, common::AllVisibleGPUs());
|
||||
dh::safe_cuda(cudaSetDevice(get_device()));
|
||||
if (cols == 0) {
|
||||
cols = num_cols();
|
||||
@@ -68,12 +70,12 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
|
||||
CHECK_EQ(cols, num_cols()) << "Inconsistent number of columns.";
|
||||
}
|
||||
if (!ref) {
|
||||
sketch_containers.emplace_back(proxy->Info().feature_types, batch_param_.max_bin, cols,
|
||||
num_rows(), get_device());
|
||||
sketch_containers.emplace_back(proxy->Info().feature_types, p.max_bin, cols, num_rows(),
|
||||
get_device());
|
||||
auto* p_sketch = &sketch_containers.back();
|
||||
proxy->Info().weights_.SetDevice(get_device());
|
||||
Dispatch(proxy, [&](auto const& value) {
|
||||
common::AdapterDeviceSketch(value, batch_param_.max_bin, proxy->Info(), missing, p_sketch);
|
||||
common::AdapterDeviceSketch(value, p.max_bin, proxy->Info(), missing, p_sketch);
|
||||
});
|
||||
}
|
||||
auto batch_rows = num_rows();
|
||||
@@ -95,8 +97,8 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
|
||||
if (!ref) {
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
common::SketchContainer final_sketch(
|
||||
sketch_containers.empty() ? ft : sketch_containers.front().FeatureTypes(),
|
||||
batch_param_.max_bin, cols, accumulated_rows, get_device());
|
||||
sketch_containers.empty() ? ft : sketch_containers.front().FeatureTypes(), p.max_bin, cols,
|
||||
accumulated_rows, get_device());
|
||||
for (auto const& sketch : sketch_containers) {
|
||||
final_sketch.Merge(sketch.ColumnsPtr(), sketch.Data());
|
||||
final_sketch.FixError();
|
||||
@@ -106,7 +108,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
|
||||
|
||||
final_sketch.MakeCuts(&cuts);
|
||||
} else {
|
||||
GetCutsFromRef(ref, Info().num_col_, batch_param_, &cuts);
|
||||
GetCutsFromRef(ctx, ref, Info().num_col_, p, &cuts);
|
||||
}
|
||||
|
||||
this->info_.num_row_ = accumulated_rows;
|
||||
@@ -169,24 +171,34 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
|
||||
info_.SynchronizeNumberOfColumns();
|
||||
}
|
||||
|
||||
BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(BatchParam const& param) {
|
||||
CheckParam(param);
|
||||
BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(Context const* ctx,
|
||||
BatchParam const& param) {
|
||||
if (param.Initialized()) {
|
||||
CheckParam(param);
|
||||
CHECK(!detail::RegenGHist(param, batch_)) << error::InconsistentMaxBin();
|
||||
}
|
||||
if (!ellpack_ && !ghist_) {
|
||||
LOG(FATAL) << "`QuantileDMatrix` not initialized.";
|
||||
}
|
||||
if (!ellpack_ && ghist_) {
|
||||
|
||||
if (!ellpack_) {
|
||||
ellpack_.reset(new EllpackPage());
|
||||
// Evaluation QuantileDMatrix initialized from CPU data might not have the correct GPU
|
||||
// ID.
|
||||
if (this->ctx_.IsCPU()) {
|
||||
this->ctx_.gpu_id = param.gpu_id;
|
||||
if (ctx->IsCUDA()) {
|
||||
this->Info().feature_types.SetDevice(ctx->gpu_id);
|
||||
*ellpack_->Impl() =
|
||||
EllpackPageImpl(ctx, *this->ghist_, this->Info().feature_types.ConstDeviceSpan());
|
||||
} else if (fmat_ctx_.IsCUDA()) {
|
||||
this->Info().feature_types.SetDevice(fmat_ctx_.gpu_id);
|
||||
*ellpack_->Impl() =
|
||||
EllpackPageImpl(&fmat_ctx_, *this->ghist_, this->Info().feature_types.ConstDeviceSpan());
|
||||
} else {
|
||||
// Can happen when QDM is initialized on CPU, but a GPU version is queried by a different QDM
|
||||
// for cut reference.
|
||||
auto cuda_ctx = ctx->MakeCUDA();
|
||||
this->Info().feature_types.SetDevice(cuda_ctx.gpu_id);
|
||||
*ellpack_->Impl() =
|
||||
EllpackPageImpl(&cuda_ctx, *this->ghist_, this->Info().feature_types.ConstDeviceSpan());
|
||||
}
|
||||
if (this->ctx_.IsCPU()) {
|
||||
this->ctx_.gpu_id = dh::CurrentDevice();
|
||||
}
|
||||
this->Info().feature_types.SetDevice(this->ctx_.gpu_id);
|
||||
*ellpack_->Impl() =
|
||||
EllpackPageImpl(&ctx_, *this->ghist_, this->Info().feature_types.ConstDeviceSpan());
|
||||
}
|
||||
CHECK(ellpack_);
|
||||
auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));
|
||||
@@ -196,5 +208,4 @@ BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(BatchParam const& para
|
||||
void GetCutsFromEllpack(EllpackPage const& page, common::HistogramCuts* cuts) {
|
||||
*cuts = page.Impl()->Cuts();
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
/*!
|
||||
* Copyright 2020-2022 by Contributors
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
* \file iterative_dmatrix.h
|
||||
*
|
||||
* \brief Implementation of the higher-level `QuantileDMatrix`.
|
||||
*/
|
||||
#ifndef XGBOOST_DATA_ITERATIVE_DMATRIX_H_
|
||||
#define XGBOOST_DATA_ITERATIVE_DMATRIX_H_
|
||||
@@ -10,10 +12,12 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "../common/error_msg.h"
|
||||
#include "proxy_dmatrix.h"
|
||||
#include "simple_batch_iterator.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/c_api.h"
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -43,21 +47,17 @@ namespace data {
|
||||
*/
|
||||
class IterativeDMatrix : public DMatrix {
|
||||
MetaInfo info_;
|
||||
Context ctx_;
|
||||
BatchParam batch_param_;
|
||||
std::shared_ptr<EllpackPage> ellpack_;
|
||||
std::shared_ptr<GHistIndexMatrix> ghist_;
|
||||
BatchParam batch_;
|
||||
|
||||
DMatrixHandle proxy_;
|
||||
DataIterResetCallback *reset_;
|
||||
XGDMatrixCallbackNext *next_;
|
||||
Context fmat_ctx_;
|
||||
|
||||
void CheckParam(BatchParam const ¶m) {
|
||||
// FIXME(Jiamingy): https://github.com/dmlc/xgboost/issues/7976
|
||||
if (param.max_bin != batch_param_.max_bin && param.max_bin != 0) {
|
||||
LOG(WARNING) << "Inconsistent max_bin between Quantile DMatrix and Booster:" << param.max_bin
|
||||
<< " vs. " << batch_param_.max_bin;
|
||||
}
|
||||
CHECK_EQ(param.max_bin, batch_.max_bin) << error::InconsistentMaxBin();
|
||||
CHECK(!param.regen && param.hess.empty())
|
||||
<< "Only `hist` and `gpu_hist` tree method can use `QuantileDMatrix`.";
|
||||
}
|
||||
@@ -68,8 +68,10 @@ class IterativeDMatrix : public DMatrix {
|
||||
return BatchSet<Page>(BatchIterator<Page>(nullptr));
|
||||
}
|
||||
|
||||
void InitFromCUDA(DataIterHandle iter, float missing, std::shared_ptr<DMatrix> ref);
|
||||
void InitFromCPU(DataIterHandle iter_handle, float missing, std::shared_ptr<DMatrix> ref);
|
||||
void InitFromCUDA(Context const *ctx, BatchParam const &p, DataIterHandle iter_handle,
|
||||
float missing, std::shared_ptr<DMatrix> ref);
|
||||
void InitFromCPU(Context const *ctx, BatchParam const &p, DataIterHandle iter_handle,
|
||||
float missing, std::shared_ptr<DMatrix> ref);
|
||||
|
||||
public:
|
||||
explicit IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy,
|
||||
@@ -94,51 +96,40 @@ class IterativeDMatrix : public DMatrix {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<SparsePage>(BatchIterator<SparsePage>(nullptr));
|
||||
}
|
||||
BatchSet<CSCPage> GetColumnBatches() override { return InvalidTreeMethod<CSCPage>(); }
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches() override {
|
||||
BatchSet<CSCPage> GetColumnBatches(Context const *) override {
|
||||
return InvalidTreeMethod<CSCPage>();
|
||||
}
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const *) override {
|
||||
return InvalidTreeMethod<SortedCSCPage>();
|
||||
}
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(BatchParam const ¶m) override;
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const *ctx, BatchParam const ¶m) override;
|
||||
|
||||
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam ¶m) override;
|
||||
BatchSet<ExtSparsePage> GetExtBatches(BatchParam const& param) override;
|
||||
BatchSet<EllpackPage> GetEllpackBatches(Context const *ctx, const BatchParam ¶m) override;
|
||||
BatchSet<ExtSparsePage> GetExtBatches(Context const *ctx, BatchParam const ¶m) override;
|
||||
|
||||
bool SingleColBlock() const override { return true; }
|
||||
|
||||
MetaInfo &Info() override { return info_; }
|
||||
MetaInfo const &Info() const override { return info_; }
|
||||
|
||||
Context const *Ctx() const override { return &ctx_; }
|
||||
Context const *Ctx() const override { return &fmat_ctx_; }
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Get quantile cuts from reference Quantile DMatrix.
|
||||
* \brief Get quantile cuts from reference (Quantile)DMatrix.
|
||||
*
|
||||
* \param ctx The context of the new DMatrix.
|
||||
* \param ref The reference DMatrix.
|
||||
* \param n_features Number of features, used for validation only.
|
||||
* \param p Batch parameter for the new DMatrix.
|
||||
* \param p_cuts Output quantile cuts.
|
||||
*/
|
||||
void GetCutsFromRef(std::shared_ptr<DMatrix> ref_, bst_feature_t n_features, BatchParam p,
|
||||
common::HistogramCuts *p_cuts);
|
||||
void GetCutsFromRef(Context const *ctx, std::shared_ptr<DMatrix> ref, bst_feature_t n_features,
|
||||
BatchParam p, common::HistogramCuts *p_cuts);
|
||||
/**
|
||||
* \brief Get quantile cuts from ellpack page.
|
||||
*/
|
||||
void GetCutsFromEllpack(EllpackPage const &page, common::HistogramCuts *cuts);
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
inline void IterativeDMatrix::InitFromCUDA(DataIterHandle, float, std::shared_ptr<DMatrix>) {
|
||||
// silent the warning about unused variables.
|
||||
(void)(proxy_);
|
||||
(void)(reset_);
|
||||
(void)(next_);
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
inline BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(const BatchParam &) {
|
||||
common::AssertGPUSupport();
|
||||
auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));
|
||||
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));
|
||||
}
|
||||
|
||||
inline void GetCutsFromEllpack(EllpackPage const &, common::HistogramCuts *) {
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
|
||||
@@ -25,16 +25,11 @@ class DataIterProxy {
|
||||
NextFn* next_;
|
||||
|
||||
public:
|
||||
DataIterProxy(DataIterHandle iter, ResetFn* reset, NextFn* next) :
|
||||
iter_{iter},
|
||||
reset_{reset}, next_{next} {}
|
||||
DataIterProxy(DataIterHandle iter, ResetFn* reset, NextFn* next)
|
||||
: iter_{iter}, reset_{reset}, next_{next} {}
|
||||
|
||||
bool Next() {
|
||||
return next_(iter_);
|
||||
}
|
||||
void Reset() {
|
||||
reset_(iter_);
|
||||
}
|
||||
bool Next() { return next_(iter_); }
|
||||
void Reset() { reset_(iter_); }
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -68,9 +63,8 @@ class DMatrixProxy : public DMatrix {
|
||||
}
|
||||
|
||||
void SetArrayData(char const* c_interface);
|
||||
void SetCSRData(char const *c_indptr, char const *c_indices,
|
||||
char const *c_values, bst_feature_t n_features,
|
||||
bool on_host);
|
||||
void SetCSRData(char const* c_indptr, char const* c_indices, char const* c_values,
|
||||
bst_feature_t n_features, bool on_host);
|
||||
|
||||
MetaInfo& Info() override { return info_; }
|
||||
MetaInfo const& Info() const override { return info_; }
|
||||
@@ -81,6 +75,12 @@ class DMatrixProxy : public DMatrix {
|
||||
bool GHistIndexExists() const override { return false; }
|
||||
bool SparsePageExists() const override { return false; }
|
||||
|
||||
template <typename Page>
|
||||
BatchSet<Page> NoBatch() {
|
||||
LOG(FATAL) << "Proxy DMatrix cannot return data batch.";
|
||||
return BatchSet<Page>(BatchIterator<Page>(nullptr));
|
||||
}
|
||||
|
||||
DMatrix* Slice(common::Span<int32_t const> /*ridxs*/) override {
|
||||
LOG(FATAL) << "Slicing DMatrix is not supported for Proxy DMatrix.";
|
||||
return nullptr;
|
||||
@@ -89,29 +89,19 @@ class DMatrixProxy : public DMatrix {
|
||||
LOG(FATAL) << "Slicing DMatrix columns is not supported for Proxy DMatrix.";
|
||||
return nullptr;
|
||||
}
|
||||
BatchSet<SparsePage> GetRowBatches() override {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<SparsePage>(BatchIterator<SparsePage>(nullptr));
|
||||
BatchSet<SparsePage> GetRowBatches() override { return NoBatch<SparsePage>(); }
|
||||
BatchSet<CSCPage> GetColumnBatches(Context const*) override { return NoBatch<CSCPage>(); }
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const*) override {
|
||||
return NoBatch<SortedCSCPage>();
|
||||
}
|
||||
BatchSet<CSCPage> GetColumnBatches() override {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<CSCPage>(BatchIterator<CSCPage>(nullptr));
|
||||
BatchSet<EllpackPage> GetEllpackBatches(Context const*, BatchParam const&) override {
|
||||
return NoBatch<EllpackPage>();
|
||||
}
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches() override {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<SortedCSCPage>(BatchIterator<SortedCSCPage>(nullptr));
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const*, BatchParam const&) override {
|
||||
return NoBatch<GHistIndexMatrix>();
|
||||
}
|
||||
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam&) override {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(nullptr));
|
||||
}
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam&) override {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<GHistIndexMatrix>(BatchIterator<GHistIndexMatrix>(nullptr));
|
||||
}
|
||||
BatchSet<ExtSparsePage> GetExtBatches(BatchParam const&) override {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<ExtSparsePage>(BatchIterator<ExtSparsePage>(nullptr));
|
||||
BatchSet<ExtSparsePage> GetExtBatches(Context const*, BatchParam const&) override {
|
||||
return NoBatch<ExtSparsePage>();
|
||||
}
|
||||
std::any Adapter() const { return batch_; }
|
||||
};
|
||||
@@ -144,8 +134,7 @@ decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_
|
||||
} else {
|
||||
LOG(FATAL) << "Unknown type: " << proxy->Adapter().type().name();
|
||||
}
|
||||
return std::result_of_t<Fn(
|
||||
decltype(std::declval<std::shared_ptr<ArrayAdapter>>()->Value()))>();
|
||||
return std::result_of_t<Fn(decltype(std::declval<std::shared_ptr<ArrayAdapter>>()->Value()))>();
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -11,10 +11,12 @@
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include "../common/error_msg.h" // for InconsistentMaxBin
|
||||
#include "../common/random.h"
|
||||
#include "../common/threading_utils.h"
|
||||
#include "./simple_batch_iterator.h"
|
||||
#include "adapter.h"
|
||||
#include "batch_utils.h" // for CheckEmpty, RegenGHist
|
||||
#include "gradient_index.h"
|
||||
#include "xgboost/c_api.h"
|
||||
#include "xgboost/data.h"
|
||||
@@ -28,7 +30,7 @@ const MetaInfo& SimpleDMatrix::Info() const { return info_; }
|
||||
DMatrix* SimpleDMatrix::Slice(common::Span<int32_t const> ridxs) {
|
||||
auto out = new SimpleDMatrix;
|
||||
SparsePage& out_page = *out->sparse_page_;
|
||||
for (auto const &page : this->GetBatches<SparsePage>()) {
|
||||
for (auto const& page : this->GetBatches<SparsePage>()) {
|
||||
auto batch = page.GetView();
|
||||
auto& h_data = out_page.data.HostVector();
|
||||
auto& h_offset = out_page.offset.HostVector();
|
||||
@@ -42,7 +44,7 @@ DMatrix* SimpleDMatrix::Slice(common::Span<int32_t const> ridxs) {
|
||||
out->Info() = this->Info().Slice(ridxs);
|
||||
out->Info().num_nonzero_ = h_offset.back();
|
||||
}
|
||||
out->ctx_ = this->ctx_;
|
||||
out->fmat_ctx_ = this->fmat_ctx_;
|
||||
return out;
|
||||
}
|
||||
|
||||
@@ -52,7 +54,7 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {
|
||||
auto const slice_size = info_.num_col_ / num_slices;
|
||||
auto const slice_start = slice_size * slice_id;
|
||||
auto const slice_end = (slice_id == num_slices - 1) ? info_.num_col_ : slice_start + slice_size;
|
||||
for (auto const &page : this->GetBatches<SparsePage>()) {
|
||||
for (auto const& page : this->GetBatches<SparsePage>()) {
|
||||
auto batch = page.GetView();
|
||||
auto& h_data = out_page.data.HostVector();
|
||||
auto& h_offset = out_page.offset.HostVector();
|
||||
@@ -60,9 +62,8 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {
|
||||
for (bst_row_t i = 0; i < this->Info().num_row_; i++) {
|
||||
auto inst = batch[i];
|
||||
auto prev_size = h_data.size();
|
||||
std::copy_if(inst.begin(), inst.end(), std::back_inserter(h_data), [&](Entry e) {
|
||||
return e.index >= slice_start && e.index < slice_end;
|
||||
});
|
||||
std::copy_if(inst.begin(), inst.end(), std::back_inserter(h_data),
|
||||
[&](Entry e) { return e.index >= slice_start && e.index < slice_end; });
|
||||
rptr += h_data.size() - prev_size;
|
||||
h_offset.emplace_back(rptr);
|
||||
}
|
||||
@@ -73,7 +74,7 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {
|
||||
return out;
|
||||
}
|
||||
|
||||
void SimpleDMatrix::ReindexFeatures() {
|
||||
void SimpleDMatrix::ReindexFeatures(Context const* ctx) {
|
||||
if (info_.IsVerticalFederated()) {
|
||||
std::vector<uint64_t> buffer(collective::GetWorldSize());
|
||||
buffer[collective::GetRank()] = info_.num_col_;
|
||||
@@ -82,72 +83,115 @@ void SimpleDMatrix::ReindexFeatures() {
|
||||
if (offset == 0) {
|
||||
return;
|
||||
}
|
||||
sparse_page_->Reindex(offset, ctx_.Threads());
|
||||
sparse_page_->Reindex(offset, ctx->Threads());
|
||||
}
|
||||
}
|
||||
|
||||
BatchSet<SparsePage> SimpleDMatrix::GetRowBatches() {
|
||||
// since csr is the default data structure so `source_` is always available.
|
||||
auto begin_iter = BatchIterator<SparsePage>(
|
||||
new SimpleBatchIteratorImpl<SparsePage>(sparse_page_));
|
||||
auto begin_iter =
|
||||
BatchIterator<SparsePage>(new SimpleBatchIteratorImpl<SparsePage>(sparse_page_));
|
||||
return BatchSet<SparsePage>(begin_iter);
|
||||
}
|
||||
|
||||
BatchSet<CSCPage> SimpleDMatrix::GetColumnBatches() {
|
||||
BatchSet<CSCPage> SimpleDMatrix::GetColumnBatches(Context const* ctx) {
|
||||
// column page doesn't exist, generate it
|
||||
if (!column_page_) {
|
||||
column_page_.reset(new CSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx_.Threads())));
|
||||
column_page_.reset(new CSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx->Threads())));
|
||||
}
|
||||
auto begin_iter =
|
||||
BatchIterator<CSCPage>(new SimpleBatchIteratorImpl<CSCPage>(column_page_));
|
||||
auto begin_iter = BatchIterator<CSCPage>(new SimpleBatchIteratorImpl<CSCPage>(column_page_));
|
||||
return BatchSet<CSCPage>(begin_iter);
|
||||
}
|
||||
|
||||
BatchSet<SortedCSCPage> SimpleDMatrix::GetSortedColumnBatches() {
|
||||
BatchSet<SortedCSCPage> SimpleDMatrix::GetSortedColumnBatches(Context const* ctx) {
|
||||
// Sorted column page doesn't exist, generate it
|
||||
if (!sorted_column_page_) {
|
||||
sorted_column_page_.reset(
|
||||
new SortedCSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx_.Threads())));
|
||||
sorted_column_page_->SortRows(ctx_.Threads());
|
||||
new SortedCSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx->Threads())));
|
||||
sorted_column_page_->SortRows(ctx->Threads());
|
||||
}
|
||||
auto begin_iter = BatchIterator<SortedCSCPage>(
|
||||
new SimpleBatchIteratorImpl<SortedCSCPage>(sorted_column_page_));
|
||||
auto begin_iter =
|
||||
BatchIterator<SortedCSCPage>(new SimpleBatchIteratorImpl<SortedCSCPage>(sorted_column_page_));
|
||||
return BatchSet<SortedCSCPage>(begin_iter);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void CheckEmpty(BatchParam const& l, BatchParam const& r) {
|
||||
if (l == BatchParam{}) {
|
||||
CHECK(r != BatchParam{}) << "Batch parameter is not initialized.";
|
||||
BatchSet<EllpackPage> SimpleDMatrix::GetEllpackBatches(Context const* ctx,
|
||||
const BatchParam& param) {
|
||||
detail::CheckEmpty(batch_param_, param);
|
||||
if (ellpack_page_ && param.Initialized() && param.forbid_regen) {
|
||||
if (detail::RegenGHist(batch_param_, param)) {
|
||||
CHECK_EQ(batch_param_.max_bin, param.max_bin) << error::InconsistentMaxBin();
|
||||
}
|
||||
CHECK(!detail::RegenGHist(batch_param_, param));
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
BatchSet<EllpackPage> SimpleDMatrix::GetEllpackBatches(const BatchParam& param) {
|
||||
// ELLPACK page doesn't exist, generate it
|
||||
CheckEmpty(batch_param_, param);
|
||||
if (!ellpack_page_ || RegenGHist(batch_param_, param)) {
|
||||
CHECK_GE(param.gpu_id, 0);
|
||||
if (!ellpack_page_ || detail::RegenGHist(batch_param_, param)) {
|
||||
// ELLPACK page doesn't exist, generate it
|
||||
LOG(INFO) << "Generating new Ellpack page.";
|
||||
// These places can ask for a ellpack page:
|
||||
// - GPU hist: the ctx must be on CUDA.
|
||||
// - IterativeDMatrix::InitFromCUDA: The ctx must be on CUDA.
|
||||
// - IterativeDMatrix::InitFromCPU: It asks for ellpack only if it exists. It should
|
||||
// not regen, otherwise it indicates a mismatched parameter like max_bin.
|
||||
CHECK_GE(param.max_bin, 2);
|
||||
ellpack_page_.reset(new EllpackPage(this, param));
|
||||
batch_param_ = param;
|
||||
if (ctx->IsCUDA()) {
|
||||
// The context passed in is on GPU, we pick it first since we prioritize the context
|
||||
// in Booster.
|
||||
ellpack_page_.reset(new EllpackPage(ctx, this, param));
|
||||
} else if (fmat_ctx_.IsCUDA()) {
|
||||
// DMatrix was initialized on GPU, we use the context from initialization.
|
||||
ellpack_page_.reset(new EllpackPage(&fmat_ctx_, this, param));
|
||||
} else {
|
||||
// Mismatched parameter, user set a new max_bin during training.
|
||||
auto cuda_ctx = ctx->MakeCUDA();
|
||||
ellpack_page_.reset(new EllpackPage(&cuda_ctx, this, param));
|
||||
}
|
||||
|
||||
batch_param_ = param.MakeCache();
|
||||
}
|
||||
auto begin_iter =
|
||||
BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_page_));
|
||||
return BatchSet<EllpackPage>(begin_iter);
|
||||
}
|
||||
|
||||
BatchSet<GHistIndexMatrix> SimpleDMatrix::GetGradientIndex(const BatchParam& param) {
|
||||
CheckEmpty(batch_param_, param);
|
||||
if (!gradient_index_ || RegenGHist(batch_param_, param)) {
|
||||
BatchSet<GHistIndexMatrix> SimpleDMatrix::GetGradientIndex(Context const* ctx,
|
||||
const BatchParam& param) {
|
||||
detail::CheckEmpty(batch_param_, param);
|
||||
// Check whether we can regenerate the gradient index. This is to keep the consistency
|
||||
// between evaluation data and training data.
|
||||
if (gradient_index_ && param.Initialized() && param.forbid_regen) {
|
||||
if (detail::RegenGHist(batch_param_, param)) {
|
||||
CHECK_EQ(batch_param_.max_bin, param.max_bin) << error::InconsistentMaxBin();
|
||||
}
|
||||
CHECK(!detail::RegenGHist(batch_param_, param)) << "Inconsistent sparse threshold.";
|
||||
}
|
||||
if (!gradient_index_ || detail::RegenGHist(batch_param_, param)) {
|
||||
// GIDX page doesn't exist, generate it
|
||||
LOG(INFO) << "Generating new Gradient Index.";
|
||||
// These places can ask for a CSR gidx:
|
||||
// - CPU Hist: the ctx must be on CPU.
|
||||
// - IterativeDMatrix::InitFromCPU: The ctx must be on CPU.
|
||||
// - IterativeDMatrix::InitFromCUDA: It asks for gidx only if it exists. It should not
|
||||
// regen, otherwise it indicates a mismatched parameter like max_bin.
|
||||
CHECK_GE(param.max_bin, 2);
|
||||
CHECK_EQ(param.gpu_id, -1);
|
||||
// Used only by approx.
|
||||
auto sorted_sketch = param.regen;
|
||||
gradient_index_.reset(new GHistIndexMatrix(this, param.max_bin, param.sparse_thresh,
|
||||
sorted_sketch, this->ctx_.Threads(), param.hess));
|
||||
batch_param_ = param;
|
||||
if (ctx->IsCPU()) {
|
||||
// The context passed in is on CPU, we pick it first since we prioritize the context
|
||||
// in Booster.
|
||||
gradient_index_.reset(new GHistIndexMatrix{ctx, this, param.max_bin, param.sparse_thresh,
|
||||
sorted_sketch, param.hess});
|
||||
} else if (fmat_ctx_.IsCPU()) {
|
||||
// DMatrix was initialized on CPU, we use the context from initialization.
|
||||
gradient_index_.reset(new GHistIndexMatrix{&fmat_ctx_, this, param.max_bin,
|
||||
param.sparse_thresh, sorted_sketch, param.hess});
|
||||
} else {
|
||||
// Mismatched parameter, user set a new max_bin during training.
|
||||
auto cpu_ctx = ctx->MakeCPU();
|
||||
gradient_index_.reset(new GHistIndexMatrix{&cpu_ctx, this, param.max_bin, param.sparse_thresh,
|
||||
sorted_sketch, param.hess});
|
||||
}
|
||||
|
||||
batch_param_ = param.MakeCache();
|
||||
CHECK_EQ(batch_param_.hess.data(), param.hess.data());
|
||||
}
|
||||
auto begin_iter = BatchIterator<GHistIndexMatrix>(
|
||||
@@ -155,7 +199,7 @@ BatchSet<GHistIndexMatrix> SimpleDMatrix::GetGradientIndex(const BatchParam& par
|
||||
return BatchSet<GHistIndexMatrix>(begin_iter);
|
||||
}
|
||||
|
||||
BatchSet<ExtSparsePage> SimpleDMatrix::GetExtBatches(BatchParam const&) {
|
||||
BatchSet<ExtSparsePage> SimpleDMatrix::GetExtBatches(Context const*, BatchParam const&) {
|
||||
auto casted = std::make_shared<ExtSparsePage>(sparse_page_);
|
||||
CHECK(casted);
|
||||
auto begin_iter =
|
||||
@@ -166,7 +210,8 @@ BatchSet<ExtSparsePage> SimpleDMatrix::GetExtBatches(BatchParam const&) {
|
||||
template <typename AdapterT>
|
||||
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
|
||||
DataSplitMode data_split_mode) {
|
||||
this->ctx_.nthread = nthread;
|
||||
Context ctx;
|
||||
ctx.Init(Args{{"nthread", std::to_string(nthread)}});
|
||||
|
||||
std::vector<uint64_t> qids;
|
||||
uint64_t default_max = std::numeric_limits<uint64_t>::max();
|
||||
@@ -176,13 +221,13 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
|
||||
auto& data_vec = sparse_page_->data.HostVector();
|
||||
uint64_t inferred_num_columns = 0;
|
||||
uint64_t total_batch_size = 0;
|
||||
// batch_size is either number of rows or cols, depending on data layout
|
||||
// batch_size is either number of rows or cols, depending on data layout
|
||||
|
||||
adapter->BeforeFirst();
|
||||
// Iterate over batches of input data
|
||||
while (adapter->Next()) {
|
||||
auto& batch = adapter->Value();
|
||||
auto batch_max_columns = sparse_page_->Push(batch, missing, ctx_.Threads());
|
||||
auto batch_max_columns = sparse_page_->Push(batch, missing, ctx.Threads());
|
||||
inferred_num_columns = std::max(batch_max_columns, inferred_num_columns);
|
||||
total_batch_size += batch.Size();
|
||||
// Append meta information if available
|
||||
@@ -229,19 +274,18 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
|
||||
info_.num_col_ = adapter->NumColumns();
|
||||
}
|
||||
|
||||
|
||||
// Synchronise worker columns
|
||||
info_.data_split_mode = data_split_mode;
|
||||
ReindexFeatures();
|
||||
ReindexFeatures(&ctx);
|
||||
info_.SynchronizeNumberOfColumns();
|
||||
|
||||
if (adapter->NumRows() == kAdapterUnknownSize) {
|
||||
using IteratorAdapterT
|
||||
= IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>;
|
||||
using IteratorAdapterT =
|
||||
IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>;
|
||||
// If AdapterT is either IteratorAdapter or FileAdapter type, use the total batch size to
|
||||
// determine the correct number of rows, as offset_vec may be too short
|
||||
if (std::is_same<AdapterT, IteratorAdapterT>::value
|
||||
|| std::is_same<AdapterT, FileAdapter>::value) {
|
||||
if (std::is_same<AdapterT, IteratorAdapterT>::value ||
|
||||
std::is_same<AdapterT, FileAdapter>::value) {
|
||||
info_.num_row_ = total_batch_size;
|
||||
// Ensure offset_vec.size() - 1 == [number of rows]
|
||||
while (offset_vec.size() - 1 < total_batch_size) {
|
||||
@@ -265,9 +309,11 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
|
||||
info_.num_nonzero_ = data_vec.size();
|
||||
|
||||
// Sort the index for row partitioners used by variuos tree methods.
|
||||
if (!sparse_page_->IsIndicesSorted(this->ctx_.Threads())) {
|
||||
sparse_page_->SortIndices(this->ctx_.Threads());
|
||||
if (!sparse_page_->IsIndicesSorted(ctx.Threads())) {
|
||||
sparse_page_->SortIndices(ctx.Threads());
|
||||
}
|
||||
|
||||
this->fmat_ctx_ = ctx;
|
||||
}
|
||||
|
||||
SimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) {
|
||||
@@ -280,12 +326,12 @@ SimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) {
|
||||
}
|
||||
|
||||
void SimpleDMatrix::SaveToLocalFile(const std::string& fname) {
|
||||
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
|
||||
int tmagic = kMagic;
|
||||
fo->Write(tmagic);
|
||||
info_.SaveBinary(fo.get());
|
||||
fo->Write(sparse_page_->offset.HostVector());
|
||||
fo->Write(sparse_page_->data.HostVector());
|
||||
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
|
||||
int tmagic = kMagic;
|
||||
fo->Write(tmagic);
|
||||
info_.SaveBinary(fo.get());
|
||||
fo->Write(sparse_page_->offset.HostVector());
|
||||
fo->Write(sparse_page_->data.HostVector());
|
||||
}
|
||||
|
||||
template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing, int nthread,
|
||||
@@ -305,14 +351,14 @@ template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing,
|
||||
template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread,
|
||||
DataSplitMode data_split_mode);
|
||||
template SimpleDMatrix::SimpleDMatrix(
|
||||
IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>
|
||||
*adapter,
|
||||
IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>* adapter,
|
||||
float missing, int nthread, DataSplitMode data_split_mode);
|
||||
|
||||
template <>
|
||||
SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, int nthread,
|
||||
DataSplitMode data_split_mode) {
|
||||
ctx_.nthread = nthread;
|
||||
Context ctx;
|
||||
ctx.nthread = nthread;
|
||||
|
||||
auto& offset_vec = sparse_page_->offset.HostVector();
|
||||
auto& data_vec = sparse_page_->data.HostVector();
|
||||
@@ -326,7 +372,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
|
||||
size_t num_elements = 0;
|
||||
size_t num_rows = 0;
|
||||
// Import Arrow RecordBatches
|
||||
#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(ctx_.Threads())
|
||||
#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(ctx.Threads())
|
||||
for (int i = 0; i < static_cast<int>(batches.size()); ++i) { // NOLINT
|
||||
num_elements += batches[i]->Import(missing);
|
||||
num_rows += batches[i]->Size();
|
||||
@@ -348,7 +394,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
|
||||
data_vec.resize(total_elements);
|
||||
offset_vec.resize(total_batch_size + 1);
|
||||
// Copy data into DMatrix
|
||||
#pragma omp parallel num_threads(ctx_.Threads())
|
||||
#pragma omp parallel num_threads(ctx.Threads())
|
||||
{
|
||||
#pragma omp for nowait
|
||||
for (int i = 0; i < static_cast<int>(batches.size()); ++i) { // NOLINT
|
||||
@@ -372,12 +418,14 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
|
||||
// Synchronise worker columns
|
||||
info_.num_col_ = adapter->NumColumns();
|
||||
info_.data_split_mode = data_split_mode;
|
||||
ReindexFeatures();
|
||||
ReindexFeatures(&ctx);
|
||||
info_.SynchronizeNumberOfColumns();
|
||||
|
||||
info_.num_row_ = total_batch_size;
|
||||
info_.num_nonzero_ = data_vec.size();
|
||||
CHECK_EQ(offset_vec.back(), info_.num_nonzero_);
|
||||
|
||||
fmat_ctx_ = ctx;
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
/*!
|
||||
* Copyright 2019-2021 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost Contributors
|
||||
* \file simple_dmatrix.cu
|
||||
*/
|
||||
#include <thrust/copy.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include "device_adapter.cuh" // for CurrentDevice
|
||||
#include "simple_dmatrix.cuh"
|
||||
#include "simple_dmatrix.h"
|
||||
#include "device_adapter.cuh"
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
@@ -15,7 +17,7 @@ namespace data {
|
||||
// Current implementation assumes a single batch. More batches can
|
||||
// be supported in future. Does not currently support inferring row/column size
|
||||
template <typename AdapterT>
|
||||
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread*/,
|
||||
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, std::int32_t nthread,
|
||||
DataSplitMode data_split_mode) {
|
||||
CHECK(data_split_mode != DataSplitMode::kCol)
|
||||
<< "Column-wise data split is currently not supported on the GPU.";
|
||||
@@ -24,6 +26,9 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread
|
||||
CHECK_GE(device, 0);
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
|
||||
Context ctx;
|
||||
ctx.Init(Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(device)}});
|
||||
|
||||
CHECK(adapter->NumRows() != kAdapterUnknownSize);
|
||||
CHECK(adapter->NumColumns() != kAdapterUnknownSize);
|
||||
|
||||
@@ -33,13 +38,14 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread
|
||||
// Enforce single batch
|
||||
CHECK(!adapter->Next());
|
||||
|
||||
info_.num_nonzero_ =
|
||||
CopyToSparsePage(adapter->Value(), device, missing, sparse_page_.get());
|
||||
info_.num_nonzero_ = CopyToSparsePage(adapter->Value(), device, missing, sparse_page_.get());
|
||||
info_.num_col_ = adapter->NumColumns();
|
||||
info_.num_row_ = adapter->NumRows();
|
||||
// Synchronise worker columns
|
||||
info_.data_split_mode = data_split_mode;
|
||||
info_.SynchronizeNumberOfColumns();
|
||||
|
||||
this->fmat_ctx_ = ctx;
|
||||
}
|
||||
|
||||
template SimpleDMatrix::SimpleDMatrix(CudfAdapter* adapter, float missing,
|
||||
|
||||
@@ -32,7 +32,7 @@ class SimpleDMatrix : public DMatrix {
|
||||
|
||||
MetaInfo& Info() override;
|
||||
const MetaInfo& Info() const override;
|
||||
Context const* Ctx() const override { return &ctx_; }
|
||||
Context const* Ctx() const override { return &fmat_ctx_; }
|
||||
|
||||
bool SingleColBlock() const override { return true; }
|
||||
DMatrix* Slice(common::Span<int32_t const> ridxs) override;
|
||||
@@ -43,11 +43,11 @@ class SimpleDMatrix : public DMatrix {
|
||||
|
||||
protected:
|
||||
BatchSet<SparsePage> GetRowBatches() override;
|
||||
BatchSet<CSCPage> GetColumnBatches() override;
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches() override;
|
||||
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) override;
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam& param) override;
|
||||
BatchSet<ExtSparsePage> GetExtBatches(BatchParam const& param) override;
|
||||
BatchSet<CSCPage> GetColumnBatches(Context const* ctx) override;
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const* ctx) override;
|
||||
BatchSet<EllpackPage> GetEllpackBatches(Context const* ctx, const BatchParam& param) override;
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const* ctx, const BatchParam& param) override;
|
||||
BatchSet<ExtSparsePage> GetExtBatches(Context const* ctx, BatchParam const& param) override;
|
||||
|
||||
MetaInfo info_;
|
||||
// Primary storage type
|
||||
@@ -69,10 +69,11 @@ class SimpleDMatrix : public DMatrix {
|
||||
* starting from 0. However, all the algorithms assume the features are globally indexed, so we
|
||||
* reindex the features based on the offset needed to obtain the global view.
|
||||
*/
|
||||
void ReindexFeatures();
|
||||
void ReindexFeatures(Context const* ctx);
|
||||
|
||||
private:
|
||||
Context ctx_;
|
||||
// Context used only for DMatrix initialization.
|
||||
Context fmat_ctx_;
|
||||
};
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
/*!
|
||||
* Copyright 2014-2022 by Contributors
|
||||
/**
|
||||
* Copyright 2014-2023 by XGBoost Contributors
|
||||
* \file sparse_page_dmatrix.cc
|
||||
*
|
||||
* \brief The external memory version of Page Iterator.
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
@@ -8,11 +9,10 @@
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "./simple_batch_iterator.h"
|
||||
#include "batch_utils.h" // for RegenGHist
|
||||
#include "gradient_index.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
namespace xgboost::data {
|
||||
MetaInfo &SparsePageDMatrix::Info() { return info_; }
|
||||
|
||||
const MetaInfo &SparsePageDMatrix::Info() const { return info_; }
|
||||
@@ -46,7 +46,9 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p
|
||||
int32_t nthreads, std::string cache_prefix)
|
||||
: proxy_{proxy_handle}, iter_{iter_handle}, reset_{reset}, next_{next}, missing_{missing},
|
||||
cache_prefix_{std::move(cache_prefix)} {
|
||||
ctx_.nthread = nthreads;
|
||||
Context ctx;
|
||||
ctx.nthread = nthreads;
|
||||
|
||||
cache_prefix_ = cache_prefix_.empty() ? "DMatrix" : cache_prefix_;
|
||||
if (collective::IsDistributed()) {
|
||||
cache_prefix_ += ("-r" + std::to_string(collective::GetRank()));
|
||||
@@ -81,7 +83,7 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p
|
||||
|
||||
// the proxy is iterated together with the sparse page source so we can obtain all
|
||||
// information in 1 pass.
|
||||
for (auto const &page : this->GetRowBatchesImpl()) {
|
||||
for (auto const &page : this->GetRowBatchesImpl(&ctx)) {
|
||||
this->info_.Extend(std::move(proxy->Info()), false, false);
|
||||
n_features = std::max(n_features, num_cols());
|
||||
n_samples += num_rows();
|
||||
@@ -98,9 +100,11 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p
|
||||
|
||||
info_.SynchronizeNumberOfColumns();
|
||||
CHECK_NE(info_.num_col_, 0);
|
||||
|
||||
fmat_ctx_ = ctx;
|
||||
}
|
||||
|
||||
void SparsePageDMatrix::InitializeSparsePage() {
|
||||
void SparsePageDMatrix::InitializeSparsePage(Context const *ctx) {
|
||||
auto id = MakeCache(this, ".row.page", cache_prefix_, &cache_info_);
|
||||
// Don't use proxy DMatrix once this is already initialized, this allows users to
|
||||
// release the iterator and data.
|
||||
@@ -110,33 +114,33 @@ void SparsePageDMatrix::InitializeSparsePage() {
|
||||
return;
|
||||
}
|
||||
|
||||
auto iter = DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{
|
||||
iter_, reset_, next_};
|
||||
auto iter = DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{iter_, reset_, next_};
|
||||
DMatrixProxy *proxy = MakeProxy(proxy_);
|
||||
sparse_page_source_.reset(); // clear before creating new one to prevent conflicts.
|
||||
sparse_page_source_ = std::make_shared<SparsePageSource>(
|
||||
iter, proxy, this->missing_, this->ctx_.Threads(), this->info_.num_col_,
|
||||
this->n_batches_, cache_info_.at(id));
|
||||
sparse_page_source_ = std::make_shared<SparsePageSource>(iter, proxy, this->missing_,
|
||||
ctx->Threads(), this->info_.num_col_,
|
||||
this->n_batches_, cache_info_.at(id));
|
||||
}
|
||||
|
||||
BatchSet<SparsePage> SparsePageDMatrix::GetRowBatchesImpl() {
|
||||
this->InitializeSparsePage();
|
||||
BatchSet<SparsePage> SparsePageDMatrix::GetRowBatchesImpl(Context const* ctx) {
|
||||
this->InitializeSparsePage(ctx);
|
||||
auto begin_iter = BatchIterator<SparsePage>(sparse_page_source_);
|
||||
return BatchSet<SparsePage>(BatchIterator<SparsePage>(begin_iter));
|
||||
}
|
||||
|
||||
BatchSet<SparsePage> SparsePageDMatrix::GetRowBatches() {
|
||||
return this->GetRowBatchesImpl();
|
||||
// Use context from initialization for the default row page.
|
||||
return this->GetRowBatchesImpl(&fmat_ctx_);
|
||||
}
|
||||
|
||||
BatchSet<CSCPage> SparsePageDMatrix::GetColumnBatches() {
|
||||
BatchSet<CSCPage> SparsePageDMatrix::GetColumnBatches(Context const *ctx) {
|
||||
auto id = MakeCache(this, ".col.page", cache_prefix_, &cache_info_);
|
||||
CHECK_NE(this->Info().num_col_, 0);
|
||||
this->InitializeSparsePage();
|
||||
this->InitializeSparsePage(ctx);
|
||||
if (!column_source_) {
|
||||
column_source_ = std::make_shared<CSCPageSource>(
|
||||
this->missing_, this->ctx_.Threads(), this->Info().num_col_,
|
||||
this->n_batches_, cache_info_.at(id), sparse_page_source_);
|
||||
column_source_ =
|
||||
std::make_shared<CSCPageSource>(this->missing_, ctx->Threads(), this->Info().num_col_,
|
||||
this->n_batches_, cache_info_.at(id), sparse_page_source_);
|
||||
} else {
|
||||
column_source_->Reset();
|
||||
}
|
||||
@@ -144,14 +148,14 @@ BatchSet<CSCPage> SparsePageDMatrix::GetColumnBatches() {
|
||||
return BatchSet<CSCPage>(BatchIterator<CSCPage>(begin_iter));
|
||||
}
|
||||
|
||||
BatchSet<SortedCSCPage> SparsePageDMatrix::GetSortedColumnBatches() {
|
||||
BatchSet<SortedCSCPage> SparsePageDMatrix::GetSortedColumnBatches(Context const *ctx) {
|
||||
auto id = MakeCache(this, ".sorted.col.page", cache_prefix_, &cache_info_);
|
||||
CHECK_NE(this->Info().num_col_, 0);
|
||||
this->InitializeSparsePage();
|
||||
this->InitializeSparsePage(ctx);
|
||||
if (!sorted_column_source_) {
|
||||
sorted_column_source_ = std::make_shared<SortedCSCPageSource>(
|
||||
this->missing_, this->ctx_.Threads(), this->Info().num_col_,
|
||||
this->n_batches_, cache_info_.at(id), sparse_page_source_);
|
||||
this->missing_, ctx->Threads(), this->Info().num_col_, this->n_batches_, cache_info_.at(id),
|
||||
sparse_page_source_);
|
||||
} else {
|
||||
sorted_column_source_->Reset();
|
||||
}
|
||||
@@ -159,27 +163,27 @@ BatchSet<SortedCSCPage> SparsePageDMatrix::GetSortedColumnBatches() {
|
||||
return BatchSet<SortedCSCPage>(BatchIterator<SortedCSCPage>(begin_iter));
|
||||
}
|
||||
|
||||
BatchSet<GHistIndexMatrix> SparsePageDMatrix::GetGradientIndex(const BatchParam ¶m) {
|
||||
BatchSet<GHistIndexMatrix> SparsePageDMatrix::GetGradientIndex(Context const *ctx,
|
||||
const BatchParam ¶m) {
|
||||
CHECK_GE(param.max_bin, 2);
|
||||
auto id = MakeCache(this, ".gradient_index.page", cache_prefix_, &cache_info_);
|
||||
this->InitializeSparsePage();
|
||||
if (!cache_info_.at(id)->written || RegenGHist(batch_param_, param)) {
|
||||
this->InitializeSparsePage(ctx);
|
||||
if (!cache_info_.at(id)->written || detail::RegenGHist(batch_param_, param)) {
|
||||
cache_info_.erase(id);
|
||||
MakeCache(this, ".gradient_index.page", cache_prefix_, &cache_info_);
|
||||
LOG(INFO) << "Generating new Gradient Index.";
|
||||
// Use sorted sketch for approx.
|
||||
auto sorted_sketch = param.regen;
|
||||
auto cuts =
|
||||
common::SketchOnDMatrix(this, param.max_bin, ctx_.Threads(), sorted_sketch, param.hess);
|
||||
this->InitializeSparsePage(); // reset after use.
|
||||
auto cuts = common::SketchOnDMatrix(ctx, this, param.max_bin, sorted_sketch, param.hess);
|
||||
this->InitializeSparsePage(ctx); // reset after use.
|
||||
|
||||
batch_param_ = param;
|
||||
ghist_index_source_.reset();
|
||||
CHECK_NE(cuts.Values().size(), 0);
|
||||
auto ft = this->info_.feature_types.ConstHostSpan();
|
||||
ghist_index_source_.reset(new GradientIndexPageSource(
|
||||
this->missing_, this->ctx_.Threads(), this->Info().num_col_, this->n_batches_,
|
||||
cache_info_.at(id), param, std::move(cuts), this->IsDense(), ft, sparse_page_source_));
|
||||
this->missing_, ctx->Threads(), this->Info().num_col_, this->n_batches_, cache_info_.at(id),
|
||||
param, std::move(cuts), this->IsDense(), ft, sparse_page_source_));
|
||||
} else {
|
||||
CHECK(ghist_index_source_);
|
||||
ghist_index_source_->Reset();
|
||||
@@ -189,11 +193,10 @@ BatchSet<GHistIndexMatrix> SparsePageDMatrix::GetGradientIndex(const BatchParam
|
||||
}
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(const BatchParam &) {
|
||||
BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(Context const *, const BatchParam &) {
|
||||
common::AssertGPUSupport();
|
||||
auto begin_iter = BatchIterator<EllpackPage>(ellpack_page_source_);
|
||||
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -1,42 +1,40 @@
|
||||
/*!
|
||||
* Copyright 2021 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost contributors
|
||||
*/
|
||||
#include "sparse_page_source.h"
|
||||
#include "../common/hist_util.cuh"
|
||||
#include "batch_utils.h" // for CheckEmpty, RegenGHist
|
||||
#include "ellpack_page.cuh"
|
||||
#include "sparse_page_dmatrix.h"
|
||||
#include "sparse_page_source.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(const BatchParam& param) {
|
||||
CHECK_GE(param.gpu_id, 0);
|
||||
namespace xgboost::data {
|
||||
BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(Context const* ctx,
|
||||
const BatchParam& param) {
|
||||
CHECK(ctx->IsCUDA());
|
||||
CHECK_GE(param.max_bin, 2);
|
||||
if (!(batch_param_ != BatchParam{})) {
|
||||
CHECK(param != BatchParam{}) << "Batch parameter is not initialized.";
|
||||
}
|
||||
detail::CheckEmpty(batch_param_, param);
|
||||
auto id = MakeCache(this, ".ellpack.page", cache_prefix_, &cache_info_);
|
||||
size_t row_stride = 0;
|
||||
this->InitializeSparsePage();
|
||||
if (!cache_info_.at(id)->written || RegenGHist(batch_param_, param)) {
|
||||
this->InitializeSparsePage(ctx);
|
||||
if (!cache_info_.at(id)->written || detail::RegenGHist(batch_param_, param)) {
|
||||
// reinitialize the cache
|
||||
cache_info_.erase(id);
|
||||
MakeCache(this, ".ellpack.page", cache_prefix_, &cache_info_);
|
||||
std::unique_ptr<common::HistogramCuts> cuts;
|
||||
cuts.reset(new common::HistogramCuts{
|
||||
common::DeviceSketch(param.gpu_id, this, param.max_bin, 0)});
|
||||
this->InitializeSparsePage(); // reset after use.
|
||||
cuts.reset(
|
||||
new common::HistogramCuts{common::DeviceSketch(ctx->gpu_id, this, param.max_bin, 0)});
|
||||
this->InitializeSparsePage(ctx); // reset after use.
|
||||
|
||||
row_stride = GetRowStride(this);
|
||||
this->InitializeSparsePage(); // reset after use.
|
||||
this->InitializeSparsePage(ctx); // reset after use.
|
||||
CHECK_NE(row_stride, 0);
|
||||
batch_param_ = param;
|
||||
|
||||
auto ft = this->info_.feature_types.ConstDeviceSpan();
|
||||
ellpack_page_source_.reset(); // release resources.
|
||||
ellpack_page_source_.reset(new EllpackPageSource(
|
||||
this->missing_, this->ctx_.Threads(), this->Info().num_col_,
|
||||
this->n_batches_, cache_info_.at(id), param, std::move(cuts),
|
||||
this->IsDense(), row_stride, ft, sparse_page_source_));
|
||||
this->missing_, ctx->Threads(), this->Info().num_col_, this->n_batches_, cache_info_.at(id),
|
||||
param, std::move(cuts), this->IsDense(), row_stride, ft, sparse_page_source_, ctx->gpu_id));
|
||||
} else {
|
||||
CHECK(sparse_page_source_);
|
||||
ellpack_page_source_->Reset();
|
||||
@@ -45,5 +43,4 @@ BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(const BatchParam& par
|
||||
auto begin_iter = BatchIterator<EllpackPage>(ellpack_page_source_);
|
||||
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2015-2021 by Contributors
|
||||
/**
|
||||
* Copyright 2015-2023, XGBoost Contributors
|
||||
* \file sparse_page_dmatrix.h
|
||||
* \brief External-memory version of DMatrix.
|
||||
* \author Tianqi Chen
|
||||
@@ -9,12 +9,13 @@
|
||||
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/logging.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
#include "ellpack_page_source.h"
|
||||
#include "gradient_index_page_source.h"
|
||||
@@ -69,19 +70,18 @@ class SparsePageDMatrix : public DMatrix {
|
||||
XGDMatrixCallbackNext *next_;
|
||||
|
||||
float missing_;
|
||||
Context ctx_;
|
||||
Context fmat_ctx_;
|
||||
std::string cache_prefix_;
|
||||
uint32_t n_batches_ {0};
|
||||
uint32_t n_batches_{0};
|
||||
// sparse page is the source to other page types, we make a special member function.
|
||||
void InitializeSparsePage();
|
||||
void InitializeSparsePage(Context const *ctx);
|
||||
// Non-virtual version that can be used in constructor
|
||||
BatchSet<SparsePage> GetRowBatchesImpl();
|
||||
BatchSet<SparsePage> GetRowBatchesImpl(Context const *ctx);
|
||||
|
||||
public:
|
||||
explicit SparsePageDMatrix(DataIterHandle iter, DMatrixHandle proxy,
|
||||
DataIterResetCallback *reset,
|
||||
XGDMatrixCallbackNext *next, float missing,
|
||||
int32_t nthreads, std::string cache_prefix);
|
||||
explicit SparsePageDMatrix(DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset,
|
||||
XGDMatrixCallbackNext *next, float missing, int32_t nthreads,
|
||||
std::string cache_prefix);
|
||||
|
||||
~SparsePageDMatrix() override {
|
||||
// Clear out all resources before deleting the cache file.
|
||||
@@ -98,9 +98,9 @@ class SparsePageDMatrix : public DMatrix {
|
||||
}
|
||||
}
|
||||
|
||||
MetaInfo& Info() override;
|
||||
const MetaInfo& Info() const override;
|
||||
Context const* Ctx() const override { return &ctx_; }
|
||||
MetaInfo &Info() override;
|
||||
const MetaInfo &Info() const override;
|
||||
Context const *Ctx() const override { return &fmat_ctx_; }
|
||||
|
||||
bool SingleColBlock() const override { return false; }
|
||||
DMatrix *Slice(common::Span<int32_t const>) override {
|
||||
@@ -114,11 +114,11 @@ class SparsePageDMatrix : public DMatrix {
|
||||
|
||||
private:
|
||||
BatchSet<SparsePage> GetRowBatches() override;
|
||||
BatchSet<CSCPage> GetColumnBatches() override;
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches() override;
|
||||
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) override;
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam&) override;
|
||||
BatchSet<ExtSparsePage> GetExtBatches(BatchParam const &) override {
|
||||
BatchSet<CSCPage> GetColumnBatches(Context const *ctx) override;
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const *ctx) override;
|
||||
BatchSet<EllpackPage> GetEllpackBatches(Context const *ctx, const BatchParam ¶m) override;
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const *ctx, const BatchParam &) override;
|
||||
BatchSet<ExtSparsePage> GetExtBatches(Context const *, BatchParam const &) override {
|
||||
LOG(FATAL) << "Can not obtain a single CSR page for external memory DMatrix";
|
||||
return BatchSet<ExtSparsePage>(BatchIterator<ExtSparsePage>(nullptr));
|
||||
}
|
||||
@@ -141,9 +141,8 @@ inline std::string MakeId(std::string prefix, SparsePageDMatrix *ptr) {
|
||||
return prefix + "-" + ss.str();
|
||||
}
|
||||
|
||||
inline std::string
|
||||
MakeCache(SparsePageDMatrix *ptr, std::string format, std::string prefix,
|
||||
std::map<std::string, std::shared_ptr<Cache>> *out) {
|
||||
inline std::string MakeCache(SparsePageDMatrix *ptr, std::string format, std::string prefix,
|
||||
std::map<std::string, std::shared_ptr<Cache>> *out) {
|
||||
auto &cache_info = *out;
|
||||
auto name = MakeId(prefix, ptr);
|
||||
auto id = name + format;
|
||||
|
||||
Reference in New Issue
Block a user