Use Booster context in DMatrix. (#8896)

- Pass context from booster to DMatrix.
- Use context instead of integer for `n_threads`.
- Check the consistency configuration for `max_bin`.
- Test for all combinations of initialization options.
This commit is contained in:
Jiaming Yuan
2023-04-28 21:47:14 +08:00
committed by GitHub
parent 1f9a57d17b
commit 08ce495b5d
67 changed files with 1283 additions and 935 deletions

View File

@@ -3,30 +3,50 @@
*/
#include "xgboost/c_api.h"
#include <rabit/c_api.h>
#include <algorithm> // for copy
#include <cinttypes> // for strtoimax
#include <cmath> // for nan
#include <cstring> // for strcmp
#include <fstream> // for operator<<, basic_ostream, ios, stringstream
#include <functional> // for less
#include <limits> // for numeric_limits
#include <map> // for operator!=, _Rb_tree_const_iterator, _Rb_tre...
#include <memory> // for shared_ptr, allocator, __shared_ptr_access
#include <string> // for char_traits, basic_string, operator==, string
#include <system_error> // for errc
#include <utility> // for pair
#include <vector> // for vector
#include <cstring>
#include <fstream>
#include <memory>
#include <string>
#include <vector>
#include "../collective/communicator-inl.h"
#include "../common/api_entry.h" // XGBAPIThreadLocalEntry
#include "../common/charconv.h"
#include "../common/io.h"
#include "../data/adapter.h"
#include "../data/simple_dmatrix.h"
#include "c_api_utils.h"
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/global_config.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/json.h"
#include "xgboost/learner.h"
#include "xgboost/logging.h"
#include "xgboost/string_view.h" // StringView
#include "xgboost/version_config.h"
#include "../collective/communicator-inl.h" // for Allreduce, Broadcast, Finalize, GetProcessor...
#include "../common/api_entry.h" // for XGBAPIThreadLocalEntry
#include "../common/charconv.h" // for from_chars, to_chars, NumericLimits, from_ch...
#include "../common/io.h" // for FileExtension, LoadSequentialFile, MemoryBuf...
#include "../common/threading_utils.h" // for OmpGetNumThreads, ParallelFor
#include "../data/adapter.h" // for ArrayAdapter, DenseAdapter, RecordBatchesIte...
#include "../data/proxy_dmatrix.h" // for DMatrixProxy
#include "../data/simple_dmatrix.h" // for SimpleDMatrix
#include "c_api_error.h" // for xgboost_CHECK_C_ARG_PTR, API_END, API_BEGIN
#include "c_api_utils.h" // for RequiredArg, OptionalArg, GetMissing, CastDM...
#include "dmlc/base.h" // for BeginPtr, DMLC_ATTRIBUTE_UNUSED
#include "dmlc/io.h" // for Stream
#include "dmlc/parameter.h" // for FieldAccessEntry, FieldEntry, ParamManager
#include "dmlc/thread_local.h" // for ThreadLocalStore
#include "rabit/c_api.h" // for RabitLinkTag
#include "rabit/rabit.h" // for CheckPoint, LoadCheckPoint
#include "xgboost/base.h" // for bst_ulong, bst_float, GradientPair, bst_feat...
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for DMatrix, MetaInfo, DataType, ExtSparsePage
#include "xgboost/feature_map.h" // for FeatureMap
#include "xgboost/global_config.h" // for GlobalConfiguration, GlobalConfigThreadLocal...
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/intrusive_ptr.h" // for xgboost
#include "xgboost/json.h" // for Json, get, Integer, IsA, Boolean, String
#include "xgboost/learner.h" // for Learner, PredictionType
#include "xgboost/logging.h" // for LOG_FATAL, LogMessageFatal, CHECK, LogCheck_EQ
#include "xgboost/predictor.h" // for PredictionCacheEntry
#include "xgboost/span.h" // for Span
#include "xgboost/string_view.h" // for StringView, operator<<
#include "xgboost/version_config.h" // for XGBOOST_VER_MAJOR, XGBOOST_VER_MINOR, XGBOOS...
#if defined(XGBOOST_USE_FEDERATED)
#include "../../plugin/federated/federated_server.h"
@@ -341,10 +361,10 @@ XGB_DLL int XGQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHand
API_END();
}
XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle* out) {
XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle *out) {
API_BEGIN();
xgboost_CHECK_C_ARG_PTR(out);
*out = new std::shared_ptr<xgboost::DMatrix>(new xgboost::data::DMatrixProxy);;
*out = new std::shared_ptr<xgboost::DMatrix>(new xgboost::data::DMatrixProxy);
API_END();
}
@@ -746,7 +766,7 @@ XGB_DLL int XGDMatrixGetDataAsCSR(DMatrixHandle const handle, char const *config
CHECK_LE(p_m->Info().num_col_, std::numeric_limits<unsigned>::max());
for (auto const &page : p_m->GetBatches<ExtSparsePage>()) {
for (auto const &page : p_m->GetBatches<ExtSparsePage>(p_m->Ctx(), BatchParam{})) {
CHECK(page.page);
auto const &h_offset = page.page->offset.ConstHostVector();
std::copy(h_offset.cbegin(), h_offset.cend(), out_indptr);

View File

@@ -28,5 +28,10 @@ constexpr StringView InfInData() {
constexpr StringView NoF128() {
return "128-bit floating point is not supported on current platform.";
}
constexpr StringView InconsistentMaxBin() {
return "Inconsistent `max_bin`. `max_bin` should be the same across different QuantileDMatrix, "
"and consistent with the Booster being trained.";
}
} // namespace xgboost::error
#endif // XGBOOST_COMMON_ERROR_MSG_H_

View File

@@ -2,15 +2,18 @@
* Copyright 2017-2023 by XGBoost Contributors
* \file hist_util.cc
*/
#include "hist_util.h"
#include <dmlc/timer.h>
#include <vector>
#include "xgboost/base.h"
#include "../common/common.h"
#include "hist_util.h"
#include "column_matrix.h"
#include "quantile.h"
#include "xgboost/base.h"
#include "xgboost/context.h" // Context
#include "xgboost/data.h" // SparsePage, SortedCSCPage
#if defined(XGBOOST_MM_PREFETCH_PRESENT)
#include <xmmintrin.h>
@@ -28,10 +31,11 @@ HistogramCuts::HistogramCuts() {
cut_ptrs_.HostVector().emplace_back(0);
}
HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins, int32_t n_threads, bool use_sorted,
HistogramCuts SketchOnDMatrix(Context const *ctx, DMatrix *m, bst_bin_t max_bins, bool use_sorted,
Span<float> const hessian) {
HistogramCuts out;
auto const& info = m->Info();
auto const &info = m->Info();
auto n_threads = ctx->Threads();
std::vector<bst_row_t> reduced(info.num_col_, 0);
for (auto const &page : m->GetBatches<SparsePage>()) {
auto const &entries_per_column =
@@ -44,16 +48,19 @@ HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins, int32_t n_threads, b
}
if (!use_sorted) {
HostSketchContainer container(max_bins, m->Info().feature_types.ConstHostSpan(), reduced,
HostSketchContainer::UseGroup(info), n_threads);
for (auto const& page : m->GetBatches<SparsePage>()) {
HostSketchContainer container(ctx, max_bins, m->Info().feature_types.ConstHostSpan(), reduced,
HostSketchContainer::UseGroup(info));
for (auto const &page : m->GetBatches<SparsePage>()) {
container.PushRowPage(page, info, hessian);
}
container.MakeCuts(m->Info(), &out);
} else {
SortedSketchContainer container{max_bins, m->Info().feature_types.ConstHostSpan(), reduced,
HostSketchContainer::UseGroup(info), n_threads};
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
SortedSketchContainer container{ctx,
max_bins,
m->Info().feature_types.ConstHostSpan(),
reduced,
HostSketchContainer::UseGroup(info)};
for (auto const &page : m->GetBatches<SortedCSCPage>(ctx)) {
container.PushColPage(page, info, hessian);
}
container.MakeCuts(m->Info(), &out);

View File

@@ -170,7 +170,7 @@ class HistogramCuts {
* \param use_sorted Whether should we use SortedCSC for sketching, it's more efficient
* but consumes more memory.
*/
HistogramCuts SketchOnDMatrix(DMatrix* m, int32_t max_bins, int32_t n_threads,
HistogramCuts SketchOnDMatrix(Context const* ctx, DMatrix* m, bst_bin_t max_bins,
bool use_sorted = false, Span<float> const hessian = {});
enum BinTypeSize : uint8_t {

View File

@@ -16,16 +16,16 @@ namespace xgboost {
namespace common {
template <typename WQSketch>
SketchContainerImpl<WQSketch>::SketchContainerImpl(std::vector<bst_row_t> columns_size,
SketchContainerImpl<WQSketch>::SketchContainerImpl(Context const *ctx,
std::vector<bst_row_t> columns_size,
int32_t max_bins,
Span<FeatureType const> feature_types,
bool use_group,
int32_t n_threads)
bool use_group)
: feature_types_(feature_types.cbegin(), feature_types.cend()),
columns_size_{std::move(columns_size)},
max_bins_{max_bins},
use_group_ind_{use_group},
n_threads_{n_threads} {
n_threads_{ctx->Threads()} {
monitor_.Init(__func__);
CHECK_NE(columns_size_.size(), 0);
sketches_.resize(columns_size_.size());
@@ -380,13 +380,13 @@ auto AddCategories(std::set<float> const &categories, HistogramCuts *cuts) {
}
template <typename WQSketch>
void SketchContainerImpl<WQSketch>::MakeCuts(MetaInfo const& info, HistogramCuts* cuts) {
void SketchContainerImpl<WQSketch>::MakeCuts(MetaInfo const &info, HistogramCuts *p_cuts) {
monitor_.Start(__func__);
std::vector<typename WQSketch::SummaryContainer> reduced;
std::vector<int32_t> num_cuts;
this->AllReduce(info, &reduced, &num_cuts);
cuts->min_vals_.HostVector().resize(sketches_.size(), 0.0f);
p_cuts->min_vals_.HostVector().resize(sketches_.size(), 0.0f);
std::vector<typename WQSketch::SummaryContainer> final_summaries(reduced.size());
ParallelFor(reduced.size(), n_threads_, Sched::Guided(), [&](size_t fidx) {
@@ -401,48 +401,48 @@ void SketchContainerImpl<WQSketch>::MakeCuts(MetaInfo const& info, HistogramCuts
a.SetPrune(reduced[fidx], max_num_bins + 1);
CHECK(a.data && reduced[fidx].data);
const bst_float mval = a.data[0].value;
cuts->min_vals_.HostVector()[fidx] = mval - fabs(mval) - 1e-5f;
p_cuts->min_vals_.HostVector()[fidx] = mval - fabs(mval) - 1e-5f;
} else {
// Empty column.
const float mval = 1e-5f;
cuts->min_vals_.HostVector()[fidx] = mval;
p_cuts->min_vals_.HostVector()[fidx] = mval;
}
});
float max_cat{-1.f};
for (size_t fid = 0; fid < reduced.size(); ++fid) {
size_t max_num_bins = std::min(num_cuts[fid], max_bins_);
typename WQSketch::SummaryContainer const& a = final_summaries[fid];
typename WQSketch::SummaryContainer const &a = final_summaries[fid];
if (IsCat(feature_types_, fid)) {
max_cat = std::max(max_cat, AddCategories(categories_.at(fid), cuts));
max_cat = std::max(max_cat, AddCategories(categories_.at(fid), p_cuts));
} else {
AddCutPoint<WQSketch>(a, max_num_bins, cuts);
AddCutPoint<WQSketch>(a, max_num_bins, p_cuts);
// push a value that is greater than anything
const bst_float cpt =
(a.size > 0) ? a.data[a.size - 1].value : cuts->min_vals_.HostVector()[fid];
(a.size > 0) ? a.data[a.size - 1].value : p_cuts->min_vals_.HostVector()[fid];
// this must be bigger than last value in a scale
const bst_float last = cpt + (fabs(cpt) + 1e-5f);
cuts->cut_values_.HostVector().push_back(last);
p_cuts->cut_values_.HostVector().push_back(last);
}
// Ensure that every feature gets at least one quantile point
CHECK_LE(cuts->cut_values_.HostVector().size(), std::numeric_limits<uint32_t>::max());
auto cut_size = static_cast<uint32_t>(cuts->cut_values_.HostVector().size());
CHECK_GT(cut_size, cuts->cut_ptrs_.HostVector().back());
cuts->cut_ptrs_.HostVector().push_back(cut_size);
CHECK_LE(p_cuts->cut_values_.HostVector().size(), std::numeric_limits<uint32_t>::max());
auto cut_size = static_cast<uint32_t>(p_cuts->cut_values_.HostVector().size());
CHECK_GT(cut_size, p_cuts->cut_ptrs_.HostVector().back());
p_cuts->cut_ptrs_.HostVector().push_back(cut_size);
}
cuts->SetCategorical(this->has_categorical_, max_cat);
p_cuts->SetCategorical(this->has_categorical_, max_cat);
monitor_.Stop(__func__);
}
template class SketchContainerImpl<WQuantileSketch<float, float>>;
template class SketchContainerImpl<WXQuantileSketch<float, float>>;
HostSketchContainer::HostSketchContainer(int32_t max_bins, common::Span<FeatureType const> ft,
std::vector<size_t> columns_size, bool use_group,
int32_t n_threads)
: SketchContainerImpl{columns_size, max_bins, ft, use_group, n_threads} {
HostSketchContainer::HostSketchContainer(Context const *ctx, bst_bin_t max_bins,
common::Span<FeatureType const> ft,
std::vector<size_t> columns_size, bool use_group)
: SketchContainerImpl{ctx, columns_size, max_bins, ft, use_group} {
monitor_.Init(__func__);
ParallelFor(sketches_.size(), n_threads_, Sched::Auto(), [&](auto i) {
auto n_bins = std::min(static_cast<size_t>(max_bins_), columns_size_[i]);

View File

@@ -800,9 +800,8 @@ class SketchContainerImpl {
* \param max_bins maximum number of bins for each feature.
* \param use_group whether is assigned to group to data instance.
*/
SketchContainerImpl(std::vector<bst_row_t> columns_size, int32_t max_bins,
common::Span<FeatureType const> feature_types, bool use_group,
int32_t n_threads);
SketchContainerImpl(Context const *ctx, std::vector<bst_row_t> columns_size, int32_t max_bins,
common::Span<FeatureType const> feature_types, bool use_group);
static bool UseGroup(MetaInfo const &info) {
size_t const num_groups =
@@ -894,8 +893,8 @@ class HostSketchContainer : public SketchContainerImpl<WQuantileSketch<float, fl
using WQSketch = WQuantileSketch<float, float>;
public:
HostSketchContainer(int32_t max_bins, common::Span<FeatureType const> ft,
std::vector<size_t> columns_size, bool use_group, int32_t n_threads);
HostSketchContainer(Context const *ctx, bst_bin_t max_bins, common::Span<FeatureType const> ft,
std::vector<size_t> columns_size, bool use_group);
template <typename Batch>
void PushAdapterBatch(Batch const &batch, size_t base_rowid, MetaInfo const &info, float missing);
@@ -990,10 +989,10 @@ class SortedSketchContainer : public SketchContainerImpl<WXQuantileSketch<float,
using Super = SketchContainerImpl<WXQuantileSketch<float, float>>;
public:
explicit SortedSketchContainer(int32_t max_bins, common::Span<FeatureType const> ft,
std::vector<size_t> columns_size, bool use_group,
int32_t n_threads)
: SketchContainerImpl{columns_size, max_bins, ft, use_group, n_threads} {
explicit SortedSketchContainer(Context const *ctx, int32_t max_bins,
common::Span<FeatureType const> ft,
std::vector<size_t> columns_size, bool use_group)
: SketchContainerImpl{ctx, columns_size, max_bins, ft, use_group} {
monitor_.Init(__func__);
sketches_.resize(columns_size.size());
size_t i = 0;

33
src/data/batch_utils.h Normal file
View File

@@ -0,0 +1,33 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#ifndef XGBOOST_DATA_BATCH_UTILS_H_
#define XGBOOST_DATA_BATCH_UTILS_H_
#include "xgboost/data.h" // for BatchParam
namespace xgboost::data::detail {
// At least one batch parameter is initialized.
inline void CheckEmpty(BatchParam const& l, BatchParam const& r) {
if (!l.Initialized()) {
CHECK(r.Initialized()) << "Batch parameter is not initialized.";
}
}
/**
* \brief Should we regenerate the gradient index?
*
* \param old Parameter stored in DMatrix.
* \param p New parameter passed in by caller.
*/
inline bool RegenGHist(BatchParam old, BatchParam p) {
// Parameter is renewed or caller requests a regen
if (!p.Initialized()) {
// Empty parameter is passed in, don't regenerate so that we can use gindex in
// predictor, which doesn't have any training parameter.
return false;
}
return p.regen || old.ParamNotEqual(p);
}
} // namespace xgboost::data::detail
#endif // XGBOOST_DATA_BATCH_UTILS_H_

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2019 XGBoost contributors
/**
* Copyright 2019-2023, XGBoost contributors
*/
#ifndef XGBOOST_USE_CUDA
@@ -12,7 +12,7 @@ class EllpackPageImpl {};
EllpackPage::EllpackPage() = default;
EllpackPage::EllpackPage(DMatrix*, const BatchParam&) {
EllpackPage::EllpackPage(Context const*, DMatrix*, const BatchParam&) {
LOG(FATAL) << "Internal Error: XGBoost is not compiled with CUDA but "
"EllpackPage is required";
}

View File

@@ -17,8 +17,8 @@ namespace xgboost {
EllpackPage::EllpackPage() : impl_{new EllpackPageImpl()} {}
EllpackPage::EllpackPage(DMatrix* dmat, const BatchParam& param)
: impl_{new EllpackPageImpl(dmat, param)} {}
EllpackPage::EllpackPage(Context const* ctx, DMatrix* dmat, const BatchParam& param)
: impl_{new EllpackPageImpl{ctx, dmat, param}} {}
EllpackPage::~EllpackPage() = default;
@@ -105,29 +105,29 @@ EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts,
}
// Construct an ELLPACK matrix in memory.
EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param)
EllpackPageImpl::EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchParam& param)
: is_dense(dmat->IsDense()) {
monitor_.Init("ellpack_page");
dh::safe_cuda(cudaSetDevice(param.gpu_id));
dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
n_rows = dmat->Info().num_row_;
monitor_.Start("Quantiles");
// Create the quantile sketches for the dmatrix and initialize HistogramCuts.
row_stride = GetRowStride(dmat);
cuts_ = common::DeviceSketch(param.gpu_id, dmat, param.max_bin);
cuts_ = common::DeviceSketch(ctx->gpu_id, dmat, param.max_bin);
monitor_.Stop("Quantiles");
monitor_.Start("InitCompressedData");
this->InitCompressedData(param.gpu_id);
this->InitCompressedData(ctx->gpu_id);
monitor_.Stop("InitCompressedData");
dmat->Info().feature_types.SetDevice(param.gpu_id);
dmat->Info().feature_types.SetDevice(ctx->gpu_id);
auto ft = dmat->Info().feature_types.ConstDeviceSpan();
monitor_.Start("BinningCompression");
CHECK(dmat->SingleColBlock());
for (const auto& batch : dmat->GetBatches<SparsePage>()) {
CreateHistIndices(param.gpu_id, batch, ft);
CreateHistIndices(ctx->gpu_id, batch, ft);
}
monitor_.Stop("BinningCompression");
}

View File

@@ -155,7 +155,7 @@ class EllpackPageImpl {
* This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix
* in CSR format.
*/
explicit EllpackPageImpl(DMatrix* dmat, const BatchParam& parm);
explicit EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchParam& parm);
template <typename AdapterBatch>
explicit EllpackPageImpl(AdapterBatch batch, float missing, int device, bool is_dense,

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2019-2022 XGBoost contributors
/**
* Copyright 2019-2023, XGBoost contributors
*/
#include <memory>
#include <utility>
@@ -10,7 +10,7 @@
namespace xgboost {
namespace data {
void EllpackPageSource::Fetch() {
dh::safe_cuda(cudaSetDevice(param_.gpu_id));
dh::safe_cuda(cudaSetDevice(device_));
if (!this->ReadCache()) {
if (count_ != 0 && !sync_) {
// source is initialized to be the 0th page during construction, so when count_ is 0
@@ -22,8 +22,7 @@ void EllpackPageSource::Fetch() {
auto const &csr = source_->Page();
this->page_.reset(new EllpackPage{});
auto *impl = this->page_->Impl();
*impl = EllpackPageImpl(param_.gpu_id, *cuts_, *csr, is_dense_, row_stride_,
feature_types_);
*impl = EllpackPageImpl(device_, *cuts_, *csr, is_dense_, row_stride_, feature_types_);
page_->SetBaseRowId(csr->base_rowid);
this->WriteCache();
}

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2019-2022 by XGBoost Contributors
/**
* Copyright 2019-2023, XGBoost Contributors
*/
#ifndef XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_
@@ -23,19 +23,21 @@ class EllpackPageSource : public PageSourceIncMixIn<EllpackPage> {
BatchParam param_;
common::Span<FeatureType const> feature_types_;
std::unique_ptr<common::HistogramCuts> cuts_;
std::int32_t device_;
public:
EllpackPageSource(float missing, int nthreads, bst_feature_t n_features, size_t n_batches,
std::shared_ptr<Cache> cache, BatchParam param,
std::unique_ptr<common::HistogramCuts> cuts, bool is_dense, size_t row_stride,
common::Span<FeatureType const> feature_types,
std::shared_ptr<SparsePageSource> source)
std::shared_ptr<SparsePageSource> source, std::int32_t device)
: PageSourceIncMixIn(missing, nthreads, n_features, n_batches, cache, false),
is_dense_{is_dense},
row_stride_{row_stride},
param_{std::move(param)},
feature_types_{feature_types},
cuts_{std::move(cuts)} {
cuts_{std::move(cuts)},
device_{device} {
this->source_ = source;
this->Fetch();
}

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2017-2022 by XGBoost Contributors
/**
* Copyright 2017-2023, XGBoost Contributors
* \brief Data type for fast histogram aggregation.
*/
#include "gradient_index.h"
@@ -19,18 +19,18 @@ namespace xgboost {
GHistIndexMatrix::GHistIndexMatrix() : columns_{std::make_unique<common::ColumnMatrix>()} {}
GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat,
double sparse_thresh, bool sorted_sketch, int32_t n_threads,
GHistIndexMatrix::GHistIndexMatrix(Context const *ctx, DMatrix *p_fmat, bst_bin_t max_bins_per_feat,
double sparse_thresh, bool sorted_sketch,
common::Span<float> hess)
: max_numeric_bins_per_feat{max_bins_per_feat} {
CHECK(p_fmat->SingleColBlock());
// We use sorted sketching for approx tree method since it's more efficient in
// computation time (but higher memory usage).
cut = common::SketchOnDMatrix(p_fmat, max_bins_per_feat, n_threads, sorted_sketch, hess);
cut = common::SketchOnDMatrix(ctx, p_fmat, max_bins_per_feat, sorted_sketch, hess);
const uint32_t nbins = cut.Ptrs().back();
hit_count.resize(nbins, 0);
hit_count_tloc_.resize(n_threads * nbins, 0);
hit_count_tloc_.resize(ctx->Threads() * nbins, 0);
size_t new_size = 1;
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
@@ -45,7 +45,7 @@ GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat,
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
this->PushBatch(batch, ft, n_threads);
this->PushBatch(batch, ft, ctx->Threads());
}
this->columns_ = std::make_unique<common::ColumnMatrix>();
@@ -54,7 +54,7 @@ GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat,
// hist
CHECK(!sorted_sketch);
for (auto const &page : p_fmat->GetBatches<SparsePage>()) {
this->columns_->InitFromSparse(page, *this, sparse_thresh, n_threads);
this->columns_->InitFromSparse(page, *this, sparse_thresh, ctx->Threads());
}
}
}

View File

@@ -19,7 +19,6 @@
#include "../common/threading_utils.h"
#include "../common/transform_iterator.h" // for MakeIndexTransformIter
#include "adapter.h"
#include "proxy_dmatrix.h"
#include "xgboost/base.h"
#include "xgboost/data.h"
@@ -155,8 +154,8 @@ class GHistIndexMatrix {
/**
* \brief Constrcutor for SimpleDMatrix.
*/
GHistIndexMatrix(DMatrix* x, bst_bin_t max_bins_per_feat, double sparse_thresh,
bool sorted_sketch, int32_t n_threads, common::Span<float> hess = {});
GHistIndexMatrix(Context const* ctx, DMatrix* x, bst_bin_t max_bins_per_feat,
double sparse_thresh, bool sorted_sketch, common::Span<float> hess = {});
/**
* \brief Constructor for Iterative DMatrix. Initialize basic information and prepare
* for push batch.
@@ -295,28 +294,5 @@ void AssignColumnBinIndex(GHistIndexMatrix const& page, Fn&& assign) {
}
});
}
/**
* \brief Should we regenerate the gradient index?
*
* \param old Parameter stored in DMatrix.
* \param p New parameter passed in by caller.
*/
inline bool RegenGHist(BatchParam old, BatchParam p) {
// parameter is renewed or caller requests a regen
if (p == BatchParam{}) {
// empty parameter is passed in, don't regenerate so that we can use gindex in
// predictor, which doesn't have any training parameter.
return false;
}
// Avoid comparing nan values.
bool l_nan = std::isnan(old.sparse_thresh);
bool r_nan = std::isnan(p.sparse_thresh);
// regenerate if parameter is changed.
bool st_chg = (l_nan != r_nan) || (!l_nan && !r_nan && (old.sparse_thresh != p.sparse_thresh));
bool param_chg = old.gpu_id != p.gpu_id || old.max_bin != p.max_bin;
return p.regen || param_chg || st_chg;
}
} // namespace xgboost
#endif // XGBOOST_DATA_GRADIENT_INDEX_H_

View File

@@ -1,25 +1,26 @@
/*!
* Copyright 2022 XGBoost contributors
/**
* Copyright 2022-2023, XGBoost contributors
*/
#include "iterative_dmatrix.h"
#include <algorithm> // std::copy
#include <cstddef> // std::size_t
#include <type_traits> // std::underlying_type_t
#include <vector> // std::vector
#include <algorithm> // for copy
#include <cstddef> // for size_t
#include <memory> // for shared_ptr
#include <type_traits> // for underlying_type_t
#include <vector> // for vector
#include "../collective/communicator-inl.h"
#include "../common/categorical.h" // common::IsCat
#include "../common/column_matrix.h"
#include "../tree/param.h" // FIXME(jiamingy): Find a better way to share this parameter.
#include "../tree/param.h" // FIXME(jiamingy): Find a better way to share this parameter.
#include "batch_utils.h" // for RegenGHist
#include "gradient_index.h"
#include "proxy_dmatrix.h"
#include "simple_batch_iterator.h"
#include "xgboost/data.h" // FeatureType
#include "xgboost/data.h" // for FeatureType, DMatrix
#include "xgboost/logging.h"
namespace xgboost {
namespace data {
namespace xgboost::data {
IterativeDMatrix::IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy,
std::shared_ptr<DMatrix> ref, DataIterResetCallback* reset,
XGDMatrixCallbackNext* next, float missing, int nthread,
@@ -34,60 +35,61 @@ IterativeDMatrix::IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle pro
auto d = MakeProxy(proxy_)->DeviceIdx();
StringView msg{"All batch should be on the same device."};
if (batch_param_.gpu_id != Context::kCpuId) {
CHECK_EQ(d, batch_param_.gpu_id) << msg;
}
batch_param_ = BatchParam{d, max_bin};
Context ctx;
ctx.UpdateAllowUnknown(Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(d)}});
// hardcoded parameter.
batch_param_.sparse_thresh = tree::TrainParam::DftSparseThreshold();
BatchParam p{max_bin, tree::TrainParam::DftSparseThreshold()};
ctx_.UpdateAllowUnknown(
Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(d)}});
if (ctx_.IsCPU()) {
this->InitFromCPU(iter_handle, missing, ref);
if (ctx.IsCPU()) {
this->InitFromCPU(&ctx, p, iter_handle, missing, ref);
} else {
this->InitFromCUDA(iter_handle, missing, ref);
this->InitFromCUDA(&ctx, p, iter_handle, missing, ref);
}
this->fmat_ctx_ = ctx;
this->batch_ = p;
}
void GetCutsFromRef(std::shared_ptr<DMatrix> ref_, bst_feature_t n_features, BatchParam p,
common::HistogramCuts* p_cuts) {
CHECK(ref_);
void GetCutsFromRef(Context const* ctx, std::shared_ptr<DMatrix> ref, bst_feature_t n_features,
BatchParam p, common::HistogramCuts* p_cuts) {
CHECK(ref);
CHECK(p_cuts);
auto csr = [&]() {
for (auto const& page : ref_->GetBatches<GHistIndexMatrix>(p)) {
p.forbid_regen = true;
// Fetch cuts from GIDX
auto csr = [&] {
for (auto const& page : ref->GetBatches<GHistIndexMatrix>(ctx, p)) {
*p_cuts = page.cut;
break;
}
};
auto ellpack = [&]() {
// workaround ellpack being initialized from CPU.
if (p.gpu_id == Context::kCpuId) {
p.gpu_id = ref_->Ctx()->gpu_id;
}
if (p.gpu_id == Context::kCpuId) {
p.gpu_id = 0;
}
for (auto const& page : ref_->GetBatches<EllpackPage>(p)) {
// Fetch cuts from Ellpack.
auto ellpack = [&] {
for (auto const& page : ref->GetBatches<EllpackPage>(ctx, p)) {
GetCutsFromEllpack(page, p_cuts);
break;
}
};
if (ref_->PageExists<GHistIndexMatrix>()) {
if (ref->PageExists<GHistIndexMatrix>() && ref->PageExists<EllpackPage>()) {
// Both exists
if (ctx->IsCPU()) {
csr();
} else {
ellpack();
}
} else if (ref->PageExists<GHistIndexMatrix>()) {
csr();
} else if (ref_->PageExists<EllpackPage>()) {
} else if (ref->PageExists<EllpackPage>()) {
ellpack();
} else {
if (p.gpu_id == Context::kCpuId) {
// None exist
if (ctx->IsCPU()) {
csr();
} else {
ellpack();
}
}
CHECK_EQ(ref_->Info().num_col_, n_features)
CHECK_EQ(ref->Info().num_col_, n_features)
<< "Invalid ref DMatrix, different number of features.";
}
@@ -112,7 +114,8 @@ void SyncFeatureType(std::vector<FeatureType>* p_h_ft) {
}
} // anonymous namespace
void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
void IterativeDMatrix::InitFromCPU(Context const* ctx, BatchParam const& p,
DataIterHandle iter_handle, float missing,
std::shared_ptr<DMatrix> ref) {
DMatrixProxy* proxy = MakeProxy(proxy_);
CHECK(proxy);
@@ -133,7 +136,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
auto const is_valid = data::IsValidFunctor{missing};
auto nnz_cnt = [&]() {
return HostAdapterDispatch(proxy, [&](auto const& value) {
size_t n_threads = ctx_.Threads();
size_t n_threads = ctx->Threads();
size_t n_features = column_sizes.size();
linalg::Tensor<std::size_t, 2> column_sizes_tloc({n_threads, n_features}, Context::kCpuId);
column_sizes_tloc.Data()->Fill(0ul);
@@ -158,10 +161,10 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
});
};
size_t n_features = 0;
size_t n_batches = 0;
size_t accumulated_rows{0};
size_t nnz{0};
std::uint64_t n_features = 0;
std::size_t n_batches = 0;
std::uint64_t accumulated_rows{0};
std::uint64_t nnz{0};
/**
* CPU impl needs an additional loop for accumulating the column size.
@@ -203,7 +206,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
accumulated_rows = 0;
std::vector<FeatureType> h_ft;
if (ref) {
GetCutsFromRef(ref, Info().num_col_, batch_param_, &cuts);
GetCutsFromRef(ctx, ref, Info().num_col_, p, &cuts);
h_ft = ref->Info().feature_types.HostVector();
} else {
size_t i = 0;
@@ -211,9 +214,8 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
if (!p_sketch) {
h_ft = proxy->Info().feature_types.ConstHostVector();
SyncFeatureType(&h_ft);
p_sketch.reset(new common::HostSketchContainer{
batch_param_.max_bin, h_ft, column_sizes, !proxy->Info().group_ptr_.empty(),
ctx_.Threads()});
p_sketch.reset(new common::HostSketchContainer{ctx, p.max_bin, h_ft, column_sizes,
!proxy->Info().group_ptr_.empty()});
}
HostAdapterDispatch(proxy, [&](auto const& batch) {
proxy->Info().num_nonzero_ = batch_nnz[i];
@@ -237,15 +239,15 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
/**
* Generate gradient index.
*/
this->ghist_ = std::make_unique<GHistIndexMatrix>(Info(), std::move(cuts), batch_param_.max_bin);
this->ghist_ = std::make_unique<GHistIndexMatrix>(Info(), std::move(cuts), p.max_bin);
size_t rbegin = 0;
size_t prev_sum = 0;
size_t i = 0;
while (iter.Next()) {
HostAdapterDispatch(proxy, [&](auto const& batch) {
proxy->Info().num_nonzero_ = batch_nnz[i];
this->ghist_->PushAdapterBatch(&ctx_, rbegin, prev_sum, batch, missing, h_ft,
batch_param_.sparse_thresh, Info().num_row_);
this->ghist_->PushAdapterBatch(ctx, rbegin, prev_sum, batch, missing, h_ft, p.sparse_thresh,
Info().num_row_);
});
if (n_batches != 1) {
this->info_.Extend(std::move(proxy->Info()), false, true);
@@ -265,7 +267,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
accumulated_rows = 0;
while (iter.Next()) {
HostAdapterDispatch(proxy, [&](auto const& batch) {
this->ghist_->PushAdapterBatchColumns(&ctx_, batch, missing, accumulated_rows);
this->ghist_->PushAdapterBatchColumns(ctx, batch, missing, accumulated_rows);
});
accumulated_rows += num_rows();
}
@@ -282,11 +284,27 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
Info().feature_types.HostVector() = h_ft;
}
BatchSet<GHistIndexMatrix> IterativeDMatrix::GetGradientIndex(BatchParam const& param) {
CheckParam(param);
BatchSet<GHistIndexMatrix> IterativeDMatrix::GetGradientIndex(Context const* ctx,
BatchParam const& param) {
if (param.Initialized()) {
CheckParam(param);
CHECK(!detail::RegenGHist(param, batch_)) << error::InconsistentMaxBin();
}
if (!ellpack_ && !ghist_) {
LOG(FATAL) << "`QuantileDMatrix` not initialized.";
}
if (!ghist_) {
CHECK(ellpack_);
ghist_ = std::make_shared<GHistIndexMatrix>(&ctx_, Info(), *ellpack_, param);
if (ctx->IsCPU()) {
ghist_ = std::make_shared<GHistIndexMatrix>(ctx, Info(), *ellpack_, param);
} else if (fmat_ctx_.IsCPU()) {
ghist_ = std::make_shared<GHistIndexMatrix>(&fmat_ctx_, Info(), *ellpack_, param);
} else {
// Can happen when QDM is initialized on GPU, but a CPU version is queried by a different QDM
// for cut reference.
auto cpu_ctx = ctx->MakeCPU();
ghist_ = std::make_shared<GHistIndexMatrix>(&cpu_ctx, Info(), *ellpack_, param);
}
}
if (!std::isnan(param.sparse_thresh) &&
@@ -300,8 +318,9 @@ BatchSet<GHistIndexMatrix> IterativeDMatrix::GetGradientIndex(BatchParam const&
return BatchSet<GHistIndexMatrix>(begin_iter);
}
BatchSet<ExtSparsePage> IterativeDMatrix::GetExtBatches(BatchParam const& param) {
for (auto const& page : this->GetGradientIndex(param)) {
BatchSet<ExtSparsePage> IterativeDMatrix::GetExtBatches(Context const* ctx,
BatchParam const& param) {
for (auto const& page : this->GetGradientIndex(ctx, param)) {
auto p_out = std::make_shared<SparsePage>();
p_out->data.Resize(this->Info().num_nonzero_);
p_out->offset.Resize(this->Info().num_row_ + 1);
@@ -336,5 +355,26 @@ BatchSet<ExtSparsePage> IterativeDMatrix::GetExtBatches(BatchParam const& param)
BatchIterator<ExtSparsePage>(new SimpleBatchIteratorImpl<ExtSparsePage>(nullptr));
return BatchSet<ExtSparsePage>(begin_iter);
}
} // namespace data
} // namespace xgboost
#if !defined(XGBOOST_USE_CUDA)
inline void IterativeDMatrix::InitFromCUDA(Context const*, BatchParam const&, DataIterHandle, float,
std::shared_ptr<DMatrix>) {
// silent the warning about unused variables.
(void)(proxy_);
(void)(reset_);
(void)(next_);
common::AssertGPUSupport();
}
inline BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(Context const* ctx,
BatchParam const& param) {
common::AssertGPUSupport();
auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));
}
inline void GetCutsFromEllpack(EllpackPage const&, common::HistogramCuts*) {
common::AssertGPUSupport();
}
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace xgboost::data

View File

@@ -1,22 +1,24 @@
/*!
* Copyright 2020-2022 XGBoost contributors
/**
* Copyright 2020-2023, XGBoost contributors
*/
#include <algorithm>
#include <memory>
#include <type_traits>
#include "../common/hist_util.cuh"
#include "batch_utils.h" // for RegenGHist
#include "device_adapter.cuh"
#include "ellpack_page.cuh"
#include "gradient_index.h"
#include "iterative_dmatrix.h"
#include "proxy_dmatrix.cuh"
#include "proxy_dmatrix.h"
#include "simple_batch_iterator.h"
#include "sparse_page_source.h"
namespace xgboost {
namespace data {
void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
namespace xgboost::data {
void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
DataIterHandle iter_handle, float missing,
std::shared_ptr<DMatrix> ref) {
// A handle passed to external iterator.
DMatrixProxy* proxy = MakeProxy(proxy_);
@@ -46,7 +48,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
int32_t current_device;
dh::safe_cuda(cudaGetDevice(&current_device));
auto get_device = [&]() -> int32_t {
int32_t d = (ctx_.gpu_id == Context::kCpuId) ? current_device : ctx_.gpu_id;
std::int32_t d = (ctx->gpu_id == Context::kCpuId) ? current_device : ctx->gpu_id;
CHECK_NE(d, Context::kCpuId);
return d;
};
@@ -57,8 +59,8 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
common::HistogramCuts cuts;
do {
// We use do while here as the first batch is fetched in ctor
ctx_.gpu_id = proxy->DeviceIdx();
CHECK_LT(ctx_.gpu_id, common::AllVisibleGPUs());
// ctx_.gpu_id = proxy->DeviceIdx();
CHECK_LT(ctx->gpu_id, common::AllVisibleGPUs());
dh::safe_cuda(cudaSetDevice(get_device()));
if (cols == 0) {
cols = num_cols();
@@ -68,12 +70,12 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
CHECK_EQ(cols, num_cols()) << "Inconsistent number of columns.";
}
if (!ref) {
sketch_containers.emplace_back(proxy->Info().feature_types, batch_param_.max_bin, cols,
num_rows(), get_device());
sketch_containers.emplace_back(proxy->Info().feature_types, p.max_bin, cols, num_rows(),
get_device());
auto* p_sketch = &sketch_containers.back();
proxy->Info().weights_.SetDevice(get_device());
Dispatch(proxy, [&](auto const& value) {
common::AdapterDeviceSketch(value, batch_param_.max_bin, proxy->Info(), missing, p_sketch);
common::AdapterDeviceSketch(value, p.max_bin, proxy->Info(), missing, p_sketch);
});
}
auto batch_rows = num_rows();
@@ -95,8 +97,8 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
if (!ref) {
HostDeviceVector<FeatureType> ft;
common::SketchContainer final_sketch(
sketch_containers.empty() ? ft : sketch_containers.front().FeatureTypes(),
batch_param_.max_bin, cols, accumulated_rows, get_device());
sketch_containers.empty() ? ft : sketch_containers.front().FeatureTypes(), p.max_bin, cols,
accumulated_rows, get_device());
for (auto const& sketch : sketch_containers) {
final_sketch.Merge(sketch.ColumnsPtr(), sketch.Data());
final_sketch.FixError();
@@ -106,7 +108,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
final_sketch.MakeCuts(&cuts);
} else {
GetCutsFromRef(ref, Info().num_col_, batch_param_, &cuts);
GetCutsFromRef(ctx, ref, Info().num_col_, p, &cuts);
}
this->info_.num_row_ = accumulated_rows;
@@ -169,24 +171,34 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
info_.SynchronizeNumberOfColumns();
}
BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(BatchParam const& param) {
CheckParam(param);
BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(Context const* ctx,
BatchParam const& param) {
if (param.Initialized()) {
CheckParam(param);
CHECK(!detail::RegenGHist(param, batch_)) << error::InconsistentMaxBin();
}
if (!ellpack_ && !ghist_) {
LOG(FATAL) << "`QuantileDMatrix` not initialized.";
}
if (!ellpack_ && ghist_) {
if (!ellpack_) {
ellpack_.reset(new EllpackPage());
// Evaluation QuantileDMatrix initialized from CPU data might not have the correct GPU
// ID.
if (this->ctx_.IsCPU()) {
this->ctx_.gpu_id = param.gpu_id;
if (ctx->IsCUDA()) {
this->Info().feature_types.SetDevice(ctx->gpu_id);
*ellpack_->Impl() =
EllpackPageImpl(ctx, *this->ghist_, this->Info().feature_types.ConstDeviceSpan());
} else if (fmat_ctx_.IsCUDA()) {
this->Info().feature_types.SetDevice(fmat_ctx_.gpu_id);
*ellpack_->Impl() =
EllpackPageImpl(&fmat_ctx_, *this->ghist_, this->Info().feature_types.ConstDeviceSpan());
} else {
// Can happen when QDM is initialized on CPU, but a GPU version is queried by a different QDM
// for cut reference.
auto cuda_ctx = ctx->MakeCUDA();
this->Info().feature_types.SetDevice(cuda_ctx.gpu_id);
*ellpack_->Impl() =
EllpackPageImpl(&cuda_ctx, *this->ghist_, this->Info().feature_types.ConstDeviceSpan());
}
if (this->ctx_.IsCPU()) {
this->ctx_.gpu_id = dh::CurrentDevice();
}
this->Info().feature_types.SetDevice(this->ctx_.gpu_id);
*ellpack_->Impl() =
EllpackPageImpl(&ctx_, *this->ghist_, this->Info().feature_types.ConstDeviceSpan());
}
CHECK(ellpack_);
auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));
@@ -196,5 +208,4 @@ BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(BatchParam const& para
void GetCutsFromEllpack(EllpackPage const& page, common::HistogramCuts* cuts) {
*cuts = page.Impl()->Cuts();
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data

View File

@@ -1,6 +1,8 @@
/*!
* Copyright 2020-2022 by Contributors
/**
* Copyright 2020-2023 by XGBoost Contributors
* \file iterative_dmatrix.h
*
* \brief Implementation of the higher-level `QuantileDMatrix`.
*/
#ifndef XGBOOST_DATA_ITERATIVE_DMATRIX_H_
#define XGBOOST_DATA_ITERATIVE_DMATRIX_H_
@@ -10,10 +12,12 @@
#include <utility>
#include <vector>
#include "../common/error_msg.h"
#include "proxy_dmatrix.h"
#include "simple_batch_iterator.h"
#include "xgboost/base.h"
#include "xgboost/c_api.h"
#include "xgboost/context.h" // for Context
#include "xgboost/data.h"
namespace xgboost {
@@ -43,21 +47,17 @@ namespace data {
*/
class IterativeDMatrix : public DMatrix {
MetaInfo info_;
Context ctx_;
BatchParam batch_param_;
std::shared_ptr<EllpackPage> ellpack_;
std::shared_ptr<GHistIndexMatrix> ghist_;
BatchParam batch_;
DMatrixHandle proxy_;
DataIterResetCallback *reset_;
XGDMatrixCallbackNext *next_;
Context fmat_ctx_;
void CheckParam(BatchParam const &param) {
// FIXME(Jiamingy): https://github.com/dmlc/xgboost/issues/7976
if (param.max_bin != batch_param_.max_bin && param.max_bin != 0) {
LOG(WARNING) << "Inconsistent max_bin between Quantile DMatrix and Booster:" << param.max_bin
<< " vs. " << batch_param_.max_bin;
}
CHECK_EQ(param.max_bin, batch_.max_bin) << error::InconsistentMaxBin();
CHECK(!param.regen && param.hess.empty())
<< "Only `hist` and `gpu_hist` tree method can use `QuantileDMatrix`.";
}
@@ -68,8 +68,10 @@ class IterativeDMatrix : public DMatrix {
return BatchSet<Page>(BatchIterator<Page>(nullptr));
}
void InitFromCUDA(DataIterHandle iter, float missing, std::shared_ptr<DMatrix> ref);
void InitFromCPU(DataIterHandle iter_handle, float missing, std::shared_ptr<DMatrix> ref);
void InitFromCUDA(Context const *ctx, BatchParam const &p, DataIterHandle iter_handle,
float missing, std::shared_ptr<DMatrix> ref);
void InitFromCPU(Context const *ctx, BatchParam const &p, DataIterHandle iter_handle,
float missing, std::shared_ptr<DMatrix> ref);
public:
explicit IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy,
@@ -94,51 +96,40 @@ class IterativeDMatrix : public DMatrix {
LOG(FATAL) << "Not implemented.";
return BatchSet<SparsePage>(BatchIterator<SparsePage>(nullptr));
}
BatchSet<CSCPage> GetColumnBatches() override { return InvalidTreeMethod<CSCPage>(); }
BatchSet<SortedCSCPage> GetSortedColumnBatches() override {
BatchSet<CSCPage> GetColumnBatches(Context const *) override {
return InvalidTreeMethod<CSCPage>();
}
BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const *) override {
return InvalidTreeMethod<SortedCSCPage>();
}
BatchSet<GHistIndexMatrix> GetGradientIndex(BatchParam const &param) override;
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const *ctx, BatchParam const &param) override;
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam &param) override;
BatchSet<ExtSparsePage> GetExtBatches(BatchParam const& param) override;
BatchSet<EllpackPage> GetEllpackBatches(Context const *ctx, const BatchParam &param) override;
BatchSet<ExtSparsePage> GetExtBatches(Context const *ctx, BatchParam const &param) override;
bool SingleColBlock() const override { return true; }
MetaInfo &Info() override { return info_; }
MetaInfo const &Info() const override { return info_; }
Context const *Ctx() const override { return &ctx_; }
Context const *Ctx() const override { return &fmat_ctx_; }
};
/**
* \brief Get quantile cuts from reference Quantile DMatrix.
* \brief Get quantile cuts from reference (Quantile)DMatrix.
*
* \param ctx The context of the new DMatrix.
* \param ref The reference DMatrix.
* \param n_features Number of features, used for validation only.
* \param p Batch parameter for the new DMatrix.
* \param p_cuts Output quantile cuts.
*/
void GetCutsFromRef(std::shared_ptr<DMatrix> ref_, bst_feature_t n_features, BatchParam p,
common::HistogramCuts *p_cuts);
void GetCutsFromRef(Context const *ctx, std::shared_ptr<DMatrix> ref, bst_feature_t n_features,
BatchParam p, common::HistogramCuts *p_cuts);
/**
* \brief Get quantile cuts from ellpack page.
*/
void GetCutsFromEllpack(EllpackPage const &page, common::HistogramCuts *cuts);
#if !defined(XGBOOST_USE_CUDA)
inline void IterativeDMatrix::InitFromCUDA(DataIterHandle, float, std::shared_ptr<DMatrix>) {
// silent the warning about unused variables.
(void)(proxy_);
(void)(reset_);
(void)(next_);
common::AssertGPUSupport();
}
inline BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(const BatchParam &) {
common::AssertGPUSupport();
auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));
}
inline void GetCutsFromEllpack(EllpackPage const &, common::HistogramCuts *) {
common::AssertGPUSupport();
}
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace data
} // namespace xgboost

View File

@@ -25,16 +25,11 @@ class DataIterProxy {
NextFn* next_;
public:
DataIterProxy(DataIterHandle iter, ResetFn* reset, NextFn* next) :
iter_{iter},
reset_{reset}, next_{next} {}
DataIterProxy(DataIterHandle iter, ResetFn* reset, NextFn* next)
: iter_{iter}, reset_{reset}, next_{next} {}
bool Next() {
return next_(iter_);
}
void Reset() {
reset_(iter_);
}
bool Next() { return next_(iter_); }
void Reset() { reset_(iter_); }
};
/*
@@ -68,9 +63,8 @@ class DMatrixProxy : public DMatrix {
}
void SetArrayData(char const* c_interface);
void SetCSRData(char const *c_indptr, char const *c_indices,
char const *c_values, bst_feature_t n_features,
bool on_host);
void SetCSRData(char const* c_indptr, char const* c_indices, char const* c_values,
bst_feature_t n_features, bool on_host);
MetaInfo& Info() override { return info_; }
MetaInfo const& Info() const override { return info_; }
@@ -81,6 +75,12 @@ class DMatrixProxy : public DMatrix {
bool GHistIndexExists() const override { return false; }
bool SparsePageExists() const override { return false; }
template <typename Page>
BatchSet<Page> NoBatch() {
LOG(FATAL) << "Proxy DMatrix cannot return data batch.";
return BatchSet<Page>(BatchIterator<Page>(nullptr));
}
DMatrix* Slice(common::Span<int32_t const> /*ridxs*/) override {
LOG(FATAL) << "Slicing DMatrix is not supported for Proxy DMatrix.";
return nullptr;
@@ -89,29 +89,19 @@ class DMatrixProxy : public DMatrix {
LOG(FATAL) << "Slicing DMatrix columns is not supported for Proxy DMatrix.";
return nullptr;
}
BatchSet<SparsePage> GetRowBatches() override {
LOG(FATAL) << "Not implemented.";
return BatchSet<SparsePage>(BatchIterator<SparsePage>(nullptr));
BatchSet<SparsePage> GetRowBatches() override { return NoBatch<SparsePage>(); }
BatchSet<CSCPage> GetColumnBatches(Context const*) override { return NoBatch<CSCPage>(); }
BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const*) override {
return NoBatch<SortedCSCPage>();
}
BatchSet<CSCPage> GetColumnBatches() override {
LOG(FATAL) << "Not implemented.";
return BatchSet<CSCPage>(BatchIterator<CSCPage>(nullptr));
BatchSet<EllpackPage> GetEllpackBatches(Context const*, BatchParam const&) override {
return NoBatch<EllpackPage>();
}
BatchSet<SortedCSCPage> GetSortedColumnBatches() override {
LOG(FATAL) << "Not implemented.";
return BatchSet<SortedCSCPage>(BatchIterator<SortedCSCPage>(nullptr));
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const*, BatchParam const&) override {
return NoBatch<GHistIndexMatrix>();
}
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam&) override {
LOG(FATAL) << "Not implemented.";
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(nullptr));
}
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam&) override {
LOG(FATAL) << "Not implemented.";
return BatchSet<GHistIndexMatrix>(BatchIterator<GHistIndexMatrix>(nullptr));
}
BatchSet<ExtSparsePage> GetExtBatches(BatchParam const&) override {
LOG(FATAL) << "Not implemented.";
return BatchSet<ExtSparsePage>(BatchIterator<ExtSparsePage>(nullptr));
BatchSet<ExtSparsePage> GetExtBatches(Context const*, BatchParam const&) override {
return NoBatch<ExtSparsePage>();
}
std::any Adapter() const { return batch_; }
};
@@ -144,8 +134,7 @@ decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_
} else {
LOG(FATAL) << "Unknown type: " << proxy->Adapter().type().name();
}
return std::result_of_t<Fn(
decltype(std::declval<std::shared_ptr<ArrayAdapter>>()->Value()))>();
return std::result_of_t<Fn(decltype(std::declval<std::shared_ptr<ArrayAdapter>>()->Value()))>();
}
}
} // namespace xgboost::data

View File

@@ -11,10 +11,12 @@
#include <type_traits>
#include <vector>
#include "../common/error_msg.h" // for InconsistentMaxBin
#include "../common/random.h"
#include "../common/threading_utils.h"
#include "./simple_batch_iterator.h"
#include "adapter.h"
#include "batch_utils.h" // for CheckEmpty, RegenGHist
#include "gradient_index.h"
#include "xgboost/c_api.h"
#include "xgboost/data.h"
@@ -28,7 +30,7 @@ const MetaInfo& SimpleDMatrix::Info() const { return info_; }
DMatrix* SimpleDMatrix::Slice(common::Span<int32_t const> ridxs) {
auto out = new SimpleDMatrix;
SparsePage& out_page = *out->sparse_page_;
for (auto const &page : this->GetBatches<SparsePage>()) {
for (auto const& page : this->GetBatches<SparsePage>()) {
auto batch = page.GetView();
auto& h_data = out_page.data.HostVector();
auto& h_offset = out_page.offset.HostVector();
@@ -42,7 +44,7 @@ DMatrix* SimpleDMatrix::Slice(common::Span<int32_t const> ridxs) {
out->Info() = this->Info().Slice(ridxs);
out->Info().num_nonzero_ = h_offset.back();
}
out->ctx_ = this->ctx_;
out->fmat_ctx_ = this->fmat_ctx_;
return out;
}
@@ -52,7 +54,7 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {
auto const slice_size = info_.num_col_ / num_slices;
auto const slice_start = slice_size * slice_id;
auto const slice_end = (slice_id == num_slices - 1) ? info_.num_col_ : slice_start + slice_size;
for (auto const &page : this->GetBatches<SparsePage>()) {
for (auto const& page : this->GetBatches<SparsePage>()) {
auto batch = page.GetView();
auto& h_data = out_page.data.HostVector();
auto& h_offset = out_page.offset.HostVector();
@@ -60,9 +62,8 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {
for (bst_row_t i = 0; i < this->Info().num_row_; i++) {
auto inst = batch[i];
auto prev_size = h_data.size();
std::copy_if(inst.begin(), inst.end(), std::back_inserter(h_data), [&](Entry e) {
return e.index >= slice_start && e.index < slice_end;
});
std::copy_if(inst.begin(), inst.end(), std::back_inserter(h_data),
[&](Entry e) { return e.index >= slice_start && e.index < slice_end; });
rptr += h_data.size() - prev_size;
h_offset.emplace_back(rptr);
}
@@ -73,7 +74,7 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {
return out;
}
void SimpleDMatrix::ReindexFeatures() {
void SimpleDMatrix::ReindexFeatures(Context const* ctx) {
if (info_.IsVerticalFederated()) {
std::vector<uint64_t> buffer(collective::GetWorldSize());
buffer[collective::GetRank()] = info_.num_col_;
@@ -82,72 +83,115 @@ void SimpleDMatrix::ReindexFeatures() {
if (offset == 0) {
return;
}
sparse_page_->Reindex(offset, ctx_.Threads());
sparse_page_->Reindex(offset, ctx->Threads());
}
}
BatchSet<SparsePage> SimpleDMatrix::GetRowBatches() {
// since csr is the default data structure so `source_` is always available.
auto begin_iter = BatchIterator<SparsePage>(
new SimpleBatchIteratorImpl<SparsePage>(sparse_page_));
auto begin_iter =
BatchIterator<SparsePage>(new SimpleBatchIteratorImpl<SparsePage>(sparse_page_));
return BatchSet<SparsePage>(begin_iter);
}
BatchSet<CSCPage> SimpleDMatrix::GetColumnBatches() {
BatchSet<CSCPage> SimpleDMatrix::GetColumnBatches(Context const* ctx) {
// column page doesn't exist, generate it
if (!column_page_) {
column_page_.reset(new CSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx_.Threads())));
column_page_.reset(new CSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx->Threads())));
}
auto begin_iter =
BatchIterator<CSCPage>(new SimpleBatchIteratorImpl<CSCPage>(column_page_));
auto begin_iter = BatchIterator<CSCPage>(new SimpleBatchIteratorImpl<CSCPage>(column_page_));
return BatchSet<CSCPage>(begin_iter);
}
BatchSet<SortedCSCPage> SimpleDMatrix::GetSortedColumnBatches() {
BatchSet<SortedCSCPage> SimpleDMatrix::GetSortedColumnBatches(Context const* ctx) {
// Sorted column page doesn't exist, generate it
if (!sorted_column_page_) {
sorted_column_page_.reset(
new SortedCSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx_.Threads())));
sorted_column_page_->SortRows(ctx_.Threads());
new SortedCSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx->Threads())));
sorted_column_page_->SortRows(ctx->Threads());
}
auto begin_iter = BatchIterator<SortedCSCPage>(
new SimpleBatchIteratorImpl<SortedCSCPage>(sorted_column_page_));
auto begin_iter =
BatchIterator<SortedCSCPage>(new SimpleBatchIteratorImpl<SortedCSCPage>(sorted_column_page_));
return BatchSet<SortedCSCPage>(begin_iter);
}
namespace {
void CheckEmpty(BatchParam const& l, BatchParam const& r) {
if (l == BatchParam{}) {
CHECK(r != BatchParam{}) << "Batch parameter is not initialized.";
BatchSet<EllpackPage> SimpleDMatrix::GetEllpackBatches(Context const* ctx,
const BatchParam& param) {
detail::CheckEmpty(batch_param_, param);
if (ellpack_page_ && param.Initialized() && param.forbid_regen) {
if (detail::RegenGHist(batch_param_, param)) {
CHECK_EQ(batch_param_.max_bin, param.max_bin) << error::InconsistentMaxBin();
}
CHECK(!detail::RegenGHist(batch_param_, param));
}
}
} // anonymous namespace
BatchSet<EllpackPage> SimpleDMatrix::GetEllpackBatches(const BatchParam& param) {
// ELLPACK page doesn't exist, generate it
CheckEmpty(batch_param_, param);
if (!ellpack_page_ || RegenGHist(batch_param_, param)) {
CHECK_GE(param.gpu_id, 0);
if (!ellpack_page_ || detail::RegenGHist(batch_param_, param)) {
// ELLPACK page doesn't exist, generate it
LOG(INFO) << "Generating new Ellpack page.";
// These places can ask for a ellpack page:
// - GPU hist: the ctx must be on CUDA.
// - IterativeDMatrix::InitFromCUDA: The ctx must be on CUDA.
// - IterativeDMatrix::InitFromCPU: It asks for ellpack only if it exists. It should
// not regen, otherwise it indicates a mismatched parameter like max_bin.
CHECK_GE(param.max_bin, 2);
ellpack_page_.reset(new EllpackPage(this, param));
batch_param_ = param;
if (ctx->IsCUDA()) {
// The context passed in is on GPU, we pick it first since we prioritize the context
// in Booster.
ellpack_page_.reset(new EllpackPage(ctx, this, param));
} else if (fmat_ctx_.IsCUDA()) {
// DMatrix was initialized on GPU, we use the context from initialization.
ellpack_page_.reset(new EllpackPage(&fmat_ctx_, this, param));
} else {
// Mismatched parameter, user set a new max_bin during training.
auto cuda_ctx = ctx->MakeCUDA();
ellpack_page_.reset(new EllpackPage(&cuda_ctx, this, param));
}
batch_param_ = param.MakeCache();
}
auto begin_iter =
BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_page_));
return BatchSet<EllpackPage>(begin_iter);
}
BatchSet<GHistIndexMatrix> SimpleDMatrix::GetGradientIndex(const BatchParam& param) {
CheckEmpty(batch_param_, param);
if (!gradient_index_ || RegenGHist(batch_param_, param)) {
BatchSet<GHistIndexMatrix> SimpleDMatrix::GetGradientIndex(Context const* ctx,
const BatchParam& param) {
detail::CheckEmpty(batch_param_, param);
// Check whether we can regenerate the gradient index. This is to keep the consistency
// between evaluation data and training data.
if (gradient_index_ && param.Initialized() && param.forbid_regen) {
if (detail::RegenGHist(batch_param_, param)) {
CHECK_EQ(batch_param_.max_bin, param.max_bin) << error::InconsistentMaxBin();
}
CHECK(!detail::RegenGHist(batch_param_, param)) << "Inconsistent sparse threshold.";
}
if (!gradient_index_ || detail::RegenGHist(batch_param_, param)) {
// GIDX page doesn't exist, generate it
LOG(INFO) << "Generating new Gradient Index.";
// These places can ask for a CSR gidx:
// - CPU Hist: the ctx must be on CPU.
// - IterativeDMatrix::InitFromCPU: The ctx must be on CPU.
// - IterativeDMatrix::InitFromCUDA: It asks for gidx only if it exists. It should not
// regen, otherwise it indicates a mismatched parameter like max_bin.
CHECK_GE(param.max_bin, 2);
CHECK_EQ(param.gpu_id, -1);
// Used only by approx.
auto sorted_sketch = param.regen;
gradient_index_.reset(new GHistIndexMatrix(this, param.max_bin, param.sparse_thresh,
sorted_sketch, this->ctx_.Threads(), param.hess));
batch_param_ = param;
if (ctx->IsCPU()) {
// The context passed in is on CPU, we pick it first since we prioritize the context
// in Booster.
gradient_index_.reset(new GHistIndexMatrix{ctx, this, param.max_bin, param.sparse_thresh,
sorted_sketch, param.hess});
} else if (fmat_ctx_.IsCPU()) {
// DMatrix was initialized on CPU, we use the context from initialization.
gradient_index_.reset(new GHistIndexMatrix{&fmat_ctx_, this, param.max_bin,
param.sparse_thresh, sorted_sketch, param.hess});
} else {
// Mismatched parameter, user set a new max_bin during training.
auto cpu_ctx = ctx->MakeCPU();
gradient_index_.reset(new GHistIndexMatrix{&cpu_ctx, this, param.max_bin, param.sparse_thresh,
sorted_sketch, param.hess});
}
batch_param_ = param.MakeCache();
CHECK_EQ(batch_param_.hess.data(), param.hess.data());
}
auto begin_iter = BatchIterator<GHistIndexMatrix>(
@@ -155,7 +199,7 @@ BatchSet<GHistIndexMatrix> SimpleDMatrix::GetGradientIndex(const BatchParam& par
return BatchSet<GHistIndexMatrix>(begin_iter);
}
BatchSet<ExtSparsePage> SimpleDMatrix::GetExtBatches(BatchParam const&) {
BatchSet<ExtSparsePage> SimpleDMatrix::GetExtBatches(Context const*, BatchParam const&) {
auto casted = std::make_shared<ExtSparsePage>(sparse_page_);
CHECK(casted);
auto begin_iter =
@@ -166,7 +210,8 @@ BatchSet<ExtSparsePage> SimpleDMatrix::GetExtBatches(BatchParam const&) {
template <typename AdapterT>
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
DataSplitMode data_split_mode) {
this->ctx_.nthread = nthread;
Context ctx;
ctx.Init(Args{{"nthread", std::to_string(nthread)}});
std::vector<uint64_t> qids;
uint64_t default_max = std::numeric_limits<uint64_t>::max();
@@ -176,13 +221,13 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
auto& data_vec = sparse_page_->data.HostVector();
uint64_t inferred_num_columns = 0;
uint64_t total_batch_size = 0;
// batch_size is either number of rows or cols, depending on data layout
// batch_size is either number of rows or cols, depending on data layout
adapter->BeforeFirst();
// Iterate over batches of input data
while (adapter->Next()) {
auto& batch = adapter->Value();
auto batch_max_columns = sparse_page_->Push(batch, missing, ctx_.Threads());
auto batch_max_columns = sparse_page_->Push(batch, missing, ctx.Threads());
inferred_num_columns = std::max(batch_max_columns, inferred_num_columns);
total_batch_size += batch.Size();
// Append meta information if available
@@ -229,19 +274,18 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
info_.num_col_ = adapter->NumColumns();
}
// Synchronise worker columns
info_.data_split_mode = data_split_mode;
ReindexFeatures();
ReindexFeatures(&ctx);
info_.SynchronizeNumberOfColumns();
if (adapter->NumRows() == kAdapterUnknownSize) {
using IteratorAdapterT
= IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>;
using IteratorAdapterT =
IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>;
// If AdapterT is either IteratorAdapter or FileAdapter type, use the total batch size to
// determine the correct number of rows, as offset_vec may be too short
if (std::is_same<AdapterT, IteratorAdapterT>::value
|| std::is_same<AdapterT, FileAdapter>::value) {
if (std::is_same<AdapterT, IteratorAdapterT>::value ||
std::is_same<AdapterT, FileAdapter>::value) {
info_.num_row_ = total_batch_size;
// Ensure offset_vec.size() - 1 == [number of rows]
while (offset_vec.size() - 1 < total_batch_size) {
@@ -265,9 +309,11 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
info_.num_nonzero_ = data_vec.size();
// Sort the index for row partitioners used by variuos tree methods.
if (!sparse_page_->IsIndicesSorted(this->ctx_.Threads())) {
sparse_page_->SortIndices(this->ctx_.Threads());
if (!sparse_page_->IsIndicesSorted(ctx.Threads())) {
sparse_page_->SortIndices(ctx.Threads());
}
this->fmat_ctx_ = ctx;
}
SimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) {
@@ -280,12 +326,12 @@ SimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) {
}
void SimpleDMatrix::SaveToLocalFile(const std::string& fname) {
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
int tmagic = kMagic;
fo->Write(tmagic);
info_.SaveBinary(fo.get());
fo->Write(sparse_page_->offset.HostVector());
fo->Write(sparse_page_->data.HostVector());
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
int tmagic = kMagic;
fo->Write(tmagic);
info_.SaveBinary(fo.get());
fo->Write(sparse_page_->offset.HostVector());
fo->Write(sparse_page_->data.HostVector());
}
template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing, int nthread,
@@ -305,14 +351,14 @@ template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing,
template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(
IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>
*adapter,
IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>* adapter,
float missing, int nthread, DataSplitMode data_split_mode);
template <>
SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode) {
ctx_.nthread = nthread;
Context ctx;
ctx.nthread = nthread;
auto& offset_vec = sparse_page_->offset.HostVector();
auto& data_vec = sparse_page_->data.HostVector();
@@ -326,7 +372,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
size_t num_elements = 0;
size_t num_rows = 0;
// Import Arrow RecordBatches
#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(ctx_.Threads())
#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(ctx.Threads())
for (int i = 0; i < static_cast<int>(batches.size()); ++i) { // NOLINT
num_elements += batches[i]->Import(missing);
num_rows += batches[i]->Size();
@@ -348,7 +394,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
data_vec.resize(total_elements);
offset_vec.resize(total_batch_size + 1);
// Copy data into DMatrix
#pragma omp parallel num_threads(ctx_.Threads())
#pragma omp parallel num_threads(ctx.Threads())
{
#pragma omp for nowait
for (int i = 0; i < static_cast<int>(batches.size()); ++i) { // NOLINT
@@ -372,12 +418,14 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
// Synchronise worker columns
info_.num_col_ = adapter->NumColumns();
info_.data_split_mode = data_split_mode;
ReindexFeatures();
ReindexFeatures(&ctx);
info_.SynchronizeNumberOfColumns();
info_.num_row_ = total_batch_size;
info_.num_nonzero_ = data_vec.size();
CHECK_EQ(offset_vec.back(), info_.num_nonzero_);
fmat_ctx_ = ctx;
}
} // namespace data
} // namespace xgboost

View File

@@ -1,12 +1,14 @@
/*!
* Copyright 2019-2021 by XGBoost Contributors
/**
* Copyright 2019-2023, XGBoost Contributors
* \file simple_dmatrix.cu
*/
#include <thrust/copy.h>
#include <xgboost/data.h>
#include "device_adapter.cuh" // for CurrentDevice
#include "simple_dmatrix.cuh"
#include "simple_dmatrix.h"
#include "device_adapter.cuh"
#include "xgboost/context.h" // for Context
#include "xgboost/data.h"
namespace xgboost {
namespace data {
@@ -15,7 +17,7 @@ namespace data {
// Current implementation assumes a single batch. More batches can
// be supported in future. Does not currently support inferring row/column size
template <typename AdapterT>
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread*/,
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, std::int32_t nthread,
DataSplitMode data_split_mode) {
CHECK(data_split_mode != DataSplitMode::kCol)
<< "Column-wise data split is currently not supported on the GPU.";
@@ -24,6 +26,9 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread
CHECK_GE(device, 0);
dh::safe_cuda(cudaSetDevice(device));
Context ctx;
ctx.Init(Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(device)}});
CHECK(adapter->NumRows() != kAdapterUnknownSize);
CHECK(adapter->NumColumns() != kAdapterUnknownSize);
@@ -33,13 +38,14 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread
// Enforce single batch
CHECK(!adapter->Next());
info_.num_nonzero_ =
CopyToSparsePage(adapter->Value(), device, missing, sparse_page_.get());
info_.num_nonzero_ = CopyToSparsePage(adapter->Value(), device, missing, sparse_page_.get());
info_.num_col_ = adapter->NumColumns();
info_.num_row_ = adapter->NumRows();
// Synchronise worker columns
info_.data_split_mode = data_split_mode;
info_.SynchronizeNumberOfColumns();
this->fmat_ctx_ = ctx;
}
template SimpleDMatrix::SimpleDMatrix(CudfAdapter* adapter, float missing,

View File

@@ -32,7 +32,7 @@ class SimpleDMatrix : public DMatrix {
MetaInfo& Info() override;
const MetaInfo& Info() const override;
Context const* Ctx() const override { return &ctx_; }
Context const* Ctx() const override { return &fmat_ctx_; }
bool SingleColBlock() const override { return true; }
DMatrix* Slice(common::Span<int32_t const> ridxs) override;
@@ -43,11 +43,11 @@ class SimpleDMatrix : public DMatrix {
protected:
BatchSet<SparsePage> GetRowBatches() override;
BatchSet<CSCPage> GetColumnBatches() override;
BatchSet<SortedCSCPage> GetSortedColumnBatches() override;
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) override;
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam& param) override;
BatchSet<ExtSparsePage> GetExtBatches(BatchParam const& param) override;
BatchSet<CSCPage> GetColumnBatches(Context const* ctx) override;
BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const* ctx) override;
BatchSet<EllpackPage> GetEllpackBatches(Context const* ctx, const BatchParam& param) override;
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const* ctx, const BatchParam& param) override;
BatchSet<ExtSparsePage> GetExtBatches(Context const* ctx, BatchParam const& param) override;
MetaInfo info_;
// Primary storage type
@@ -69,10 +69,11 @@ class SimpleDMatrix : public DMatrix {
* starting from 0. However, all the algorithms assume the features are globally indexed, so we
* reindex the features based on the offset needed to obtain the global view.
*/
void ReindexFeatures();
void ReindexFeatures(Context const* ctx);
private:
Context ctx_;
// Context used only for DMatrix initialization.
Context fmat_ctx_;
};
} // namespace data
} // namespace xgboost

View File

@@ -1,6 +1,7 @@
/*!
* Copyright 2014-2022 by Contributors
/**
* Copyright 2014-2023 by XGBoost Contributors
* \file sparse_page_dmatrix.cc
*
* \brief The external memory version of Page Iterator.
* \author Tianqi Chen
*/
@@ -8,11 +9,10 @@
#include "../collective/communicator-inl.h"
#include "./simple_batch_iterator.h"
#include "batch_utils.h" // for RegenGHist
#include "gradient_index.h"
namespace xgboost {
namespace data {
namespace xgboost::data {
MetaInfo &SparsePageDMatrix::Info() { return info_; }
const MetaInfo &SparsePageDMatrix::Info() const { return info_; }
@@ -46,7 +46,9 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p
int32_t nthreads, std::string cache_prefix)
: proxy_{proxy_handle}, iter_{iter_handle}, reset_{reset}, next_{next}, missing_{missing},
cache_prefix_{std::move(cache_prefix)} {
ctx_.nthread = nthreads;
Context ctx;
ctx.nthread = nthreads;
cache_prefix_ = cache_prefix_.empty() ? "DMatrix" : cache_prefix_;
if (collective::IsDistributed()) {
cache_prefix_ += ("-r" + std::to_string(collective::GetRank()));
@@ -81,7 +83,7 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p
// the proxy is iterated together with the sparse page source so we can obtain all
// information in 1 pass.
for (auto const &page : this->GetRowBatchesImpl()) {
for (auto const &page : this->GetRowBatchesImpl(&ctx)) {
this->info_.Extend(std::move(proxy->Info()), false, false);
n_features = std::max(n_features, num_cols());
n_samples += num_rows();
@@ -98,9 +100,11 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p
info_.SynchronizeNumberOfColumns();
CHECK_NE(info_.num_col_, 0);
fmat_ctx_ = ctx;
}
void SparsePageDMatrix::InitializeSparsePage() {
void SparsePageDMatrix::InitializeSparsePage(Context const *ctx) {
auto id = MakeCache(this, ".row.page", cache_prefix_, &cache_info_);
// Don't use proxy DMatrix once this is already initialized, this allows users to
// release the iterator and data.
@@ -110,33 +114,33 @@ void SparsePageDMatrix::InitializeSparsePage() {
return;
}
auto iter = DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{
iter_, reset_, next_};
auto iter = DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{iter_, reset_, next_};
DMatrixProxy *proxy = MakeProxy(proxy_);
sparse_page_source_.reset(); // clear before creating new one to prevent conflicts.
sparse_page_source_ = std::make_shared<SparsePageSource>(
iter, proxy, this->missing_, this->ctx_.Threads(), this->info_.num_col_,
this->n_batches_, cache_info_.at(id));
sparse_page_source_ = std::make_shared<SparsePageSource>(iter, proxy, this->missing_,
ctx->Threads(), this->info_.num_col_,
this->n_batches_, cache_info_.at(id));
}
BatchSet<SparsePage> SparsePageDMatrix::GetRowBatchesImpl() {
this->InitializeSparsePage();
BatchSet<SparsePage> SparsePageDMatrix::GetRowBatchesImpl(Context const* ctx) {
this->InitializeSparsePage(ctx);
auto begin_iter = BatchIterator<SparsePage>(sparse_page_source_);
return BatchSet<SparsePage>(BatchIterator<SparsePage>(begin_iter));
}
BatchSet<SparsePage> SparsePageDMatrix::GetRowBatches() {
return this->GetRowBatchesImpl();
// Use context from initialization for the default row page.
return this->GetRowBatchesImpl(&fmat_ctx_);
}
BatchSet<CSCPage> SparsePageDMatrix::GetColumnBatches() {
BatchSet<CSCPage> SparsePageDMatrix::GetColumnBatches(Context const *ctx) {
auto id = MakeCache(this, ".col.page", cache_prefix_, &cache_info_);
CHECK_NE(this->Info().num_col_, 0);
this->InitializeSparsePage();
this->InitializeSparsePage(ctx);
if (!column_source_) {
column_source_ = std::make_shared<CSCPageSource>(
this->missing_, this->ctx_.Threads(), this->Info().num_col_,
this->n_batches_, cache_info_.at(id), sparse_page_source_);
column_source_ =
std::make_shared<CSCPageSource>(this->missing_, ctx->Threads(), this->Info().num_col_,
this->n_batches_, cache_info_.at(id), sparse_page_source_);
} else {
column_source_->Reset();
}
@@ -144,14 +148,14 @@ BatchSet<CSCPage> SparsePageDMatrix::GetColumnBatches() {
return BatchSet<CSCPage>(BatchIterator<CSCPage>(begin_iter));
}
BatchSet<SortedCSCPage> SparsePageDMatrix::GetSortedColumnBatches() {
BatchSet<SortedCSCPage> SparsePageDMatrix::GetSortedColumnBatches(Context const *ctx) {
auto id = MakeCache(this, ".sorted.col.page", cache_prefix_, &cache_info_);
CHECK_NE(this->Info().num_col_, 0);
this->InitializeSparsePage();
this->InitializeSparsePage(ctx);
if (!sorted_column_source_) {
sorted_column_source_ = std::make_shared<SortedCSCPageSource>(
this->missing_, this->ctx_.Threads(), this->Info().num_col_,
this->n_batches_, cache_info_.at(id), sparse_page_source_);
this->missing_, ctx->Threads(), this->Info().num_col_, this->n_batches_, cache_info_.at(id),
sparse_page_source_);
} else {
sorted_column_source_->Reset();
}
@@ -159,27 +163,27 @@ BatchSet<SortedCSCPage> SparsePageDMatrix::GetSortedColumnBatches() {
return BatchSet<SortedCSCPage>(BatchIterator<SortedCSCPage>(begin_iter));
}
BatchSet<GHistIndexMatrix> SparsePageDMatrix::GetGradientIndex(const BatchParam &param) {
BatchSet<GHistIndexMatrix> SparsePageDMatrix::GetGradientIndex(Context const *ctx,
const BatchParam &param) {
CHECK_GE(param.max_bin, 2);
auto id = MakeCache(this, ".gradient_index.page", cache_prefix_, &cache_info_);
this->InitializeSparsePage();
if (!cache_info_.at(id)->written || RegenGHist(batch_param_, param)) {
this->InitializeSparsePage(ctx);
if (!cache_info_.at(id)->written || detail::RegenGHist(batch_param_, param)) {
cache_info_.erase(id);
MakeCache(this, ".gradient_index.page", cache_prefix_, &cache_info_);
LOG(INFO) << "Generating new Gradient Index.";
// Use sorted sketch for approx.
auto sorted_sketch = param.regen;
auto cuts =
common::SketchOnDMatrix(this, param.max_bin, ctx_.Threads(), sorted_sketch, param.hess);
this->InitializeSparsePage(); // reset after use.
auto cuts = common::SketchOnDMatrix(ctx, this, param.max_bin, sorted_sketch, param.hess);
this->InitializeSparsePage(ctx); // reset after use.
batch_param_ = param;
ghist_index_source_.reset();
CHECK_NE(cuts.Values().size(), 0);
auto ft = this->info_.feature_types.ConstHostSpan();
ghist_index_source_.reset(new GradientIndexPageSource(
this->missing_, this->ctx_.Threads(), this->Info().num_col_, this->n_batches_,
cache_info_.at(id), param, std::move(cuts), this->IsDense(), ft, sparse_page_source_));
this->missing_, ctx->Threads(), this->Info().num_col_, this->n_batches_, cache_info_.at(id),
param, std::move(cuts), this->IsDense(), ft, sparse_page_source_));
} else {
CHECK(ghist_index_source_);
ghist_index_source_->Reset();
@@ -189,11 +193,10 @@ BatchSet<GHistIndexMatrix> SparsePageDMatrix::GetGradientIndex(const BatchParam
}
#if !defined(XGBOOST_USE_CUDA)
BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(const BatchParam &) {
BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(Context const *, const BatchParam &) {
common::AssertGPUSupport();
auto begin_iter = BatchIterator<EllpackPage>(ellpack_page_source_);
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));
}
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace data
} // namespace xgboost
} // namespace xgboost::data

View File

@@ -1,42 +1,40 @@
/*!
* Copyright 2021 XGBoost contributors
/**
* Copyright 2021-2023 by XGBoost contributors
*/
#include "sparse_page_source.h"
#include "../common/hist_util.cuh"
#include "batch_utils.h" // for CheckEmpty, RegenGHist
#include "ellpack_page.cuh"
#include "sparse_page_dmatrix.h"
#include "sparse_page_source.h"
namespace xgboost {
namespace data {
BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(const BatchParam& param) {
CHECK_GE(param.gpu_id, 0);
namespace xgboost::data {
BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(Context const* ctx,
const BatchParam& param) {
CHECK(ctx->IsCUDA());
CHECK_GE(param.max_bin, 2);
if (!(batch_param_ != BatchParam{})) {
CHECK(param != BatchParam{}) << "Batch parameter is not initialized.";
}
detail::CheckEmpty(batch_param_, param);
auto id = MakeCache(this, ".ellpack.page", cache_prefix_, &cache_info_);
size_t row_stride = 0;
this->InitializeSparsePage();
if (!cache_info_.at(id)->written || RegenGHist(batch_param_, param)) {
this->InitializeSparsePage(ctx);
if (!cache_info_.at(id)->written || detail::RegenGHist(batch_param_, param)) {
// reinitialize the cache
cache_info_.erase(id);
MakeCache(this, ".ellpack.page", cache_prefix_, &cache_info_);
std::unique_ptr<common::HistogramCuts> cuts;
cuts.reset(new common::HistogramCuts{
common::DeviceSketch(param.gpu_id, this, param.max_bin, 0)});
this->InitializeSparsePage(); // reset after use.
cuts.reset(
new common::HistogramCuts{common::DeviceSketch(ctx->gpu_id, this, param.max_bin, 0)});
this->InitializeSparsePage(ctx); // reset after use.
row_stride = GetRowStride(this);
this->InitializeSparsePage(); // reset after use.
this->InitializeSparsePage(ctx); // reset after use.
CHECK_NE(row_stride, 0);
batch_param_ = param;
auto ft = this->info_.feature_types.ConstDeviceSpan();
ellpack_page_source_.reset(); // release resources.
ellpack_page_source_.reset(new EllpackPageSource(
this->missing_, this->ctx_.Threads(), this->Info().num_col_,
this->n_batches_, cache_info_.at(id), param, std::move(cuts),
this->IsDense(), row_stride, ft, sparse_page_source_));
this->missing_, ctx->Threads(), this->Info().num_col_, this->n_batches_, cache_info_.at(id),
param, std::move(cuts), this->IsDense(), row_stride, ft, sparse_page_source_, ctx->gpu_id));
} else {
CHECK(sparse_page_source_);
ellpack_page_source_->Reset();
@@ -45,5 +43,4 @@ BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(const BatchParam& par
auto begin_iter = BatchIterator<EllpackPage>(ellpack_page_source_);
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2015-2021 by Contributors
/**
* Copyright 2015-2023, XGBoost Contributors
* \file sparse_page_dmatrix.h
* \brief External-memory version of DMatrix.
* \author Tianqi Chen
@@ -9,12 +9,13 @@
#include <xgboost/data.h>
#include <xgboost/logging.h>
#include <algorithm>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include <map>
#include "ellpack_page_source.h"
#include "gradient_index_page_source.h"
@@ -69,19 +70,18 @@ class SparsePageDMatrix : public DMatrix {
XGDMatrixCallbackNext *next_;
float missing_;
Context ctx_;
Context fmat_ctx_;
std::string cache_prefix_;
uint32_t n_batches_ {0};
uint32_t n_batches_{0};
// sparse page is the source to other page types, we make a special member function.
void InitializeSparsePage();
void InitializeSparsePage(Context const *ctx);
// Non-virtual version that can be used in constructor
BatchSet<SparsePage> GetRowBatchesImpl();
BatchSet<SparsePage> GetRowBatchesImpl(Context const *ctx);
public:
explicit SparsePageDMatrix(DataIterHandle iter, DMatrixHandle proxy,
DataIterResetCallback *reset,
XGDMatrixCallbackNext *next, float missing,
int32_t nthreads, std::string cache_prefix);
explicit SparsePageDMatrix(DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset,
XGDMatrixCallbackNext *next, float missing, int32_t nthreads,
std::string cache_prefix);
~SparsePageDMatrix() override {
// Clear out all resources before deleting the cache file.
@@ -98,9 +98,9 @@ class SparsePageDMatrix : public DMatrix {
}
}
MetaInfo& Info() override;
const MetaInfo& Info() const override;
Context const* Ctx() const override { return &ctx_; }
MetaInfo &Info() override;
const MetaInfo &Info() const override;
Context const *Ctx() const override { return &fmat_ctx_; }
bool SingleColBlock() const override { return false; }
DMatrix *Slice(common::Span<int32_t const>) override {
@@ -114,11 +114,11 @@ class SparsePageDMatrix : public DMatrix {
private:
BatchSet<SparsePage> GetRowBatches() override;
BatchSet<CSCPage> GetColumnBatches() override;
BatchSet<SortedCSCPage> GetSortedColumnBatches() override;
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) override;
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam&) override;
BatchSet<ExtSparsePage> GetExtBatches(BatchParam const &) override {
BatchSet<CSCPage> GetColumnBatches(Context const *ctx) override;
BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const *ctx) override;
BatchSet<EllpackPage> GetEllpackBatches(Context const *ctx, const BatchParam &param) override;
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const *ctx, const BatchParam &) override;
BatchSet<ExtSparsePage> GetExtBatches(Context const *, BatchParam const &) override {
LOG(FATAL) << "Can not obtain a single CSR page for external memory DMatrix";
return BatchSet<ExtSparsePage>(BatchIterator<ExtSparsePage>(nullptr));
}
@@ -141,9 +141,8 @@ inline std::string MakeId(std::string prefix, SparsePageDMatrix *ptr) {
return prefix + "-" + ss.str();
}
inline std::string
MakeCache(SparsePageDMatrix *ptr, std::string format, std::string prefix,
std::map<std::string, std::shared_ptr<Cache>> *out) {
inline std::string MakeCache(SparsePageDMatrix *ptr, std::string format, std::string prefix,
std::map<std::string, std::shared_ptr<Cache>> *out) {
auto &cache_info = *out;
auto name = MakeId(prefix, ptr);
auto id = name + format;

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2018 by Contributors
/**
* Copyright 2018-2023 by XGBoost Contributors
* \author Rory Mitchell
*/
#pragma once
@@ -78,11 +78,12 @@ inline double CoordinateDeltaBias(double sum_grad, double sum_hess) {
*
* \return The gradient and diagonal Hessian entry for a given feature.
*/
inline std::pair<double, double> GetGradient(int group_idx, int num_group, int fidx,
const std::vector<GradientPair> &gpair,
inline std::pair<double, double> GetGradient(Context const *ctx, int group_idx, int num_group,
bst_feature_t fidx,
std::vector<GradientPair> const &gpair,
DMatrix *p_fmat) {
double sum_grad = 0.0, sum_hess = 0.0;
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
auto page = batch.GetView();
auto col = page[fidx];
const auto ndata = static_cast<bst_omp_uint>(col.size());
@@ -115,7 +116,7 @@ inline std::pair<double, double> GetGradientParallel(Context const *ctx, int gro
std::vector<double> sum_grad_tloc(ctx->Threads(), 0.0);
std::vector<double> sum_hess_tloc(ctx->Threads(), 0.0);
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
auto page = batch.GetView();
auto col = page[fidx];
const auto ndata = static_cast<bst_omp_uint>(col.size());
@@ -177,16 +178,16 @@ inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_
* \param in_gpair The gradient vector to be updated.
* \param p_fmat The input feature matrix.
*/
inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
float dw, std::vector<GradientPair> *in_gpair,
DMatrix *p_fmat, int32_t n_threads) {
inline void UpdateResidualParallel(Context const *ctx, bst_feature_t fidx, int group_idx,
int num_group, float dw, std::vector<GradientPair> *in_gpair,
DMatrix *p_fmat) {
if (dw == 0.0f) return;
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
auto page = batch.GetView();
auto col = page[fidx];
// update grad value
const auto num_row = static_cast<bst_omp_uint>(col.size());
common::ParallelFor(num_row, n_threads, [&](auto j) {
common::ParallelFor(num_row, ctx->Threads(), [&](auto j) {
GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx];
if (p.GetHess() < 0.0f) return;
p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
@@ -203,12 +204,12 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
* \param in_gpair The gradient vector to be updated.
* \param p_fmat The input feature matrix.
*/
inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias,
std::vector<GradientPair> *in_gpair, DMatrix *p_fmat,
int32_t n_threads) {
inline void UpdateBiasResidualParallel(Context const *ctx, int group_idx, int num_group,
float dbias, std::vector<GradientPair> *in_gpair,
DMatrix *p_fmat) {
if (dbias == 0.0f) return;
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
common::ParallelFor(ndata, n_threads, [&](auto i) {
common::ParallelFor(ndata, ctx->Threads(), [&](auto i) {
GradientPair &g = (*in_gpair)[i * num_group + group_idx];
if (g.GetHess() < 0.0f) return;
g += GradientPair(g.GetHess() * dbias, 0);
@@ -220,18 +221,16 @@ inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias
* in coordinate descent algorithms.
*/
class FeatureSelector {
protected:
int32_t n_threads_{-1};
public:
explicit FeatureSelector(int32_t n_threads) : n_threads_{n_threads} {}
FeatureSelector() = default;
/*! \brief factory method */
static FeatureSelector *Create(int choice, int32_t n_threads);
static FeatureSelector *Create(int choice);
/*! \brief virtual destructor */
virtual ~FeatureSelector() = default;
/**
* \brief Setting up the selector state prior to looping through features.
*
* \param ctx The booster context.
* \param model The model.
* \param gpair The gpair.
* \param p_fmat The feature matrix.
@@ -239,13 +238,12 @@ class FeatureSelector {
* \param lambda Regularisation lambda.
* \param param A parameter with algorithm-dependent use.
*/
virtual void Setup(const gbm::GBLinearModel &,
const std::vector<GradientPair> &,
DMatrix *,
float , float , int ) {}
virtual void Setup(Context const *, const gbm::GBLinearModel &,
const std::vector<GradientPair> &, DMatrix *, float, float, int) {}
/**
* \brief Select next coordinate to update.
*
* \param ctx Booster context
* \param iteration The iteration in a loop through features
* \param model The model.
* \param group_idx Zero-based index of the group.
@@ -256,11 +254,9 @@ class FeatureSelector {
*
* \return The index of the selected feature. -1 indicates none selected.
*/
virtual int NextFeature(int iteration,
const gbm::GBLinearModel &model,
int group_idx,
const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda) = 0;
virtual int NextFeature(Context const *ctx, int iteration, const gbm::GBLinearModel &model,
int group_idx, const std::vector<GradientPair> &gpair, DMatrix *p_fmat,
float alpha, float lambda) = 0;
};
/**
@@ -269,9 +265,8 @@ class FeatureSelector {
class CyclicFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
int NextFeature(int iteration, const gbm::GBLinearModel &model,
int , const std::vector<GradientPair> &,
DMatrix *, float, float) override {
int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int,
const std::vector<GradientPair> &, DMatrix *, float, float) override {
return iteration % model.learner_model_param->num_feature;
}
};
@@ -283,8 +278,7 @@ class CyclicFeatureSelector : public FeatureSelector {
class ShuffleFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
void Setup(const gbm::GBLinearModel &model,
const std::vector<GradientPair>&,
void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector<GradientPair> &,
DMatrix *, float, float, int) override {
if (feat_index_.size() == 0) {
feat_index_.resize(model.learner_model_param->num_feature);
@@ -293,9 +287,8 @@ class ShuffleFeatureSelector : public FeatureSelector {
std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom());
}
int NextFeature(int iteration, const gbm::GBLinearModel &model,
int, const std::vector<GradientPair> &,
DMatrix *, float, float) override {
int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int,
const std::vector<GradientPair> &, DMatrix *, float, float) override {
return feat_index_[iteration % model.learner_model_param->num_feature];
}
@@ -310,9 +303,8 @@ class ShuffleFeatureSelector : public FeatureSelector {
class RandomFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
int NextFeature(int, const gbm::GBLinearModel &model,
int, const std::vector<GradientPair> &,
DMatrix *, float, float) override {
int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int,
const std::vector<GradientPair> &, DMatrix *, float, float) override {
return common::GlobalRandom()() % model.learner_model_param->num_feature;
}
};
@@ -329,8 +321,7 @@ class RandomFeatureSelector : public FeatureSelector {
class GreedyFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
void Setup(const gbm::GBLinearModel &model,
const std::vector<GradientPair> &,
void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector<GradientPair> &,
DMatrix *, float, float, int param) override {
top_k_ = static_cast<bst_uint>(param);
const bst_uint ngroup = model.learner_model_param->num_output_group;
@@ -344,7 +335,7 @@ class GreedyFeatureSelector : public FeatureSelector {
}
}
int NextFeature(int, const gbm::GBLinearModel &model,
int NextFeature(Context const* ctx, int, const gbm::GBLinearModel &model,
int group_idx, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda) override {
// k-th selected feature for a group
@@ -356,9 +347,9 @@ class GreedyFeatureSelector : public FeatureSelector {
const bst_omp_uint nfeat = model.learner_model_param->num_feature;
// Calculate univariate gradient sums
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
auto page = batch.GetView();
common::ParallelFor(nfeat, this->n_threads_, [&](bst_omp_uint i) {
common::ParallelFor(nfeat, ctx->Threads(), [&](bst_omp_uint i) {
const auto col = page[i];
const bst_uint ndata = col.size();
auto &sums = gpair_sums_[group_idx * nfeat + i];
@@ -406,9 +397,10 @@ class GreedyFeatureSelector : public FeatureSelector {
class ThriftyFeatureSelector : public FeatureSelector {
public:
using FeatureSelector::FeatureSelector;
void Setup(const gbm::GBLinearModel &model,
const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda, int param) override {
void Setup(Context const *ctx, const gbm::GBLinearModel &model,
const std::vector<GradientPair> &gpair, DMatrix *p_fmat, float alpha, float lambda,
int param) override {
top_k_ = static_cast<bst_uint>(param);
if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
const bst_uint ngroup = model.learner_model_param->num_output_group;
@@ -422,10 +414,10 @@ class ThriftyFeatureSelector : public FeatureSelector {
}
// Calculate univariate gradient sums
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
auto page = batch.GetView();
// column-parallel is usually fastaer than row-parallel
common::ParallelFor(nfeat, this->n_threads_, [&](auto i) {
common::ParallelFor(nfeat, ctx->Threads(), [&](auto i) {
const auto col = page[i];
const bst_uint ndata = col.size();
for (bst_uint gid = 0u; gid < ngroup; ++gid) {
@@ -462,9 +454,8 @@ class ThriftyFeatureSelector : public FeatureSelector {
}
}
int NextFeature(int, const gbm::GBLinearModel &model,
int group_idx, const std::vector<GradientPair> &,
DMatrix *, float, float) override {
int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int group_idx,
const std::vector<GradientPair> &, DMatrix *, float, float) override {
// k-th selected feature for a group
auto k = counter_[group_idx]++;
// stop after either reaching top-N or going through all the features in a group
@@ -482,18 +473,18 @@ class ThriftyFeatureSelector : public FeatureSelector {
std::vector<std::pair<double, double>> gpair_sums_;
};
inline FeatureSelector *FeatureSelector::Create(int choice, int32_t n_threads) {
inline FeatureSelector *FeatureSelector::Create(int choice) {
switch (choice) {
case kCyclic:
return new CyclicFeatureSelector(n_threads);
return new CyclicFeatureSelector;
case kShuffle:
return new ShuffleFeatureSelector(n_threads);
return new ShuffleFeatureSelector;
case kThrifty:
return new ThriftyFeatureSelector(n_threads);
return new ThriftyFeatureSelector;
case kGreedy:
return new GreedyFeatureSelector(n_threads);
return new GreedyFeatureSelector;
case kRandom:
return new RandomFeatureSelector(n_threads);
return new RandomFeatureSelector;
default:
LOG(FATAL) << "unknown coordinate selector: " << choice;
}

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2018 by Contributors
/**
* Copyright 2018-2023 by XGBoost Contributors
* \author Rory Mitchell
*/
@@ -30,7 +30,7 @@ class CoordinateUpdater : public LinearUpdater {
tparam_.UpdateAllowUnknown(args)
};
cparam_.UpdateAllowUnknown(rest);
selector_.reset(FeatureSelector::Create(tparam_.feature_selector, ctx_->Threads()));
selector_.reset(FeatureSelector::Create(tparam_.feature_selector));
monitor_.Init("CoordinateUpdater");
}
@@ -56,19 +56,17 @@ class CoordinateUpdater : public LinearUpdater {
auto dbias = static_cast<float>(tparam_.learning_rate *
CoordinateDeltaBias(grad.first, grad.second));
model->Bias()[group_idx] += dbias;
UpdateBiasResidualParallel(group_idx, ngroup, dbias, &in_gpair->HostVector(), p_fmat,
ctx_->Threads());
UpdateBiasResidualParallel(ctx_, group_idx, ngroup, dbias, &in_gpair->HostVector(), p_fmat);
}
// prepare for updating the weights
selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
tparam_.reg_alpha_denorm,
tparam_.reg_lambda_denorm, cparam_.top_k);
selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm,
tparam_.reg_lambda_denorm, cparam_.top_k);
// update weights
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
for (unsigned i = 0U; i < model->learner_model_param->num_feature; i++) {
int fidx = selector_->NextFeature
(i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
int fidx =
selector_->NextFeature(ctx_, i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
if (fidx < 0) break;
this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), p_fmat, model);
}
@@ -76,8 +74,8 @@ class CoordinateUpdater : public LinearUpdater {
monitor_.Stop("UpdateFeature");
}
inline void UpdateFeature(int fidx, int group_idx, std::vector<GradientPair> *in_gpair,
DMatrix *p_fmat, gbm::GBLinearModel *model) {
void UpdateFeature(int fidx, int group_idx, std::vector<GradientPair> *in_gpair, DMatrix *p_fmat,
gbm::GBLinearModel *model) {
const int ngroup = model->learner_model_param->num_output_group;
bst_float &w = (*model)[fidx][group_idx];
auto gradient = GetGradientParallel(ctx_, group_idx, ngroup, fidx,
@@ -87,8 +85,7 @@ class CoordinateUpdater : public LinearUpdater {
CoordinateDelta(gradient.first, gradient.second, w, tparam_.reg_alpha_denorm,
tparam_.reg_lambda_denorm));
w += dw;
UpdateResidualParallel(fidx, group_idx, ngroup, dw, in_gpair, p_fmat,
ctx_->Threads());
UpdateResidualParallel(ctx_, fidx, group_idx, ngroup, dw, in_gpair, p_fmat);
}
private:

View File

@@ -32,7 +32,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
void Configure(Args const& args) override {
tparam_.UpdateAllowUnknown(args);
coord_param_.UpdateAllowUnknown(args);
selector_.reset(FeatureSelector::Create(tparam_.feature_selector, ctx_->Threads()));
selector_.reset(FeatureSelector::Create(tparam_.feature_selector));
monitor_.Init("GPUCoordinateUpdater");
}
@@ -53,7 +53,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
num_row_ = static_cast<size_t>(p_fmat->Info().num_row_);
CHECK(p_fmat->SingleColBlock());
SparsePage const& batch = *(p_fmat->GetBatches<CSCPage>().begin());
SparsePage const &batch = *(p_fmat->GetBatches<CSCPage>(ctx_).begin());
auto page = batch.GetView();
if (IsEmpty()) {
@@ -112,16 +112,15 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
this->UpdateBias(model);
monitor_.Stop("UpdateBias");
// prepare for updating the weights
selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm,
coord_param_.top_k);
selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm,
tparam_.reg_lambda_denorm, coord_param_.top_k);
monitor_.Start("UpdateFeature");
for (uint32_t group_idx = 0; group_idx < model->learner_model_param->num_output_group;
++group_idx) {
for (auto i = 0U; i < model->learner_model_param->num_feature; i++) {
auto fidx = selector_->NextFeature(
i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
auto fidx =
selector_->NextFeature(ctx_, i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
if (fidx < 0) break;
this->UpdateFeature(fidx, group_idx, model);
}

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2018 by Contributors
/**
* Copyright 2018-2023 by XGBoost Contributors
* \author Tianqi Chen, Rory Mitchell
*/
@@ -21,7 +21,7 @@ class ShotgunUpdater : public LinearUpdater {
LOG(FATAL) << "Unsupported feature selector for shotgun updater.\n"
<< "Supported options are: {cyclic, shuffle}";
}
selector_.reset(FeatureSelector::Create(param_.feature_selector, ctx_->Threads()));
selector_.reset(FeatureSelector::Create(param_.feature_selector));
}
void LoadConfig(Json const& in) override {
auto const& config = get<Object const>(in);
@@ -45,18 +45,17 @@ class ShotgunUpdater : public LinearUpdater {
auto dbias = static_cast<bst_float>(param_.learning_rate *
CoordinateDeltaBias(grad.first, grad.second));
model->Bias()[gid] += dbias;
UpdateBiasResidualParallel(gid, ngroup, dbias, &in_gpair->HostVector(), p_fmat,
ctx_->Threads());
UpdateBiasResidualParallel(ctx_, gid, ngroup, dbias, &in_gpair->HostVector(), p_fmat);
}
// lock-free parallel updates of weights
selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
param_.reg_alpha_denorm, param_.reg_lambda_denorm, 0);
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, param_.reg_alpha_denorm,
param_.reg_lambda_denorm, 0);
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx_)) {
auto page = batch.GetView();
const auto nfeat = static_cast<bst_omp_uint>(batch.Size());
common::ParallelFor(nfeat, ctx_->Threads(), [&](auto i) {
int ii = selector_->NextFeature(i, *model, 0, in_gpair->ConstHostVector(), p_fmat,
int ii = selector_->NextFeature(ctx_, i, *model, 0, in_gpair->ConstHostVector(), p_fmat,
param_.reg_alpha_denorm, param_.reg_lambda_denorm);
if (ii < 0) return;
const bst_uint fid = ii;

View File

@@ -634,7 +634,7 @@ class CPUPredictor : public Predictor {
if (!p_fmat->PageExists<SparsePage>()) {
std::vector<Entry> workspace(p_fmat->Info().num_col_ * kUnroll * n_threads);
auto ft = p_fmat->Info().feature_types.ConstHostVector();
for (auto const &batch : p_fmat->GetBatches<GHistIndexMatrix>({})) {
for (auto const &batch : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, {})) {
if (blocked) {
PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, kBlockOfRowsSize>(
GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads}, model,

View File

@@ -706,7 +706,7 @@ class GPUPredictor : public xgboost::Predictor {
}
} else {
size_t batch_offset = 0;
for (auto const& page : dmat->GetBatches<EllpackPage>(BatchParam{})) {
for (auto const& page : dmat->GetBatches<EllpackPage>(ctx_, BatchParam{})) {
dmat->Info().feature_types.SetDevice(ctx_->gpu_id);
auto feature_types = dmat->Info().feature_types.ConstDeviceSpan();
this->PredictInternal(
@@ -983,7 +983,7 @@ class GPUPredictor : public xgboost::Predictor {
batch_offset += batch.Size();
}
} else {
for (auto const& batch : p_fmat->GetBatches<EllpackPage>(BatchParam{})) {
for (auto const& batch : p_fmat->GetBatches<EllpackPage>(ctx_, BatchParam{})) {
bst_row_t batch_offset = 0;
EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(ctx_->gpu_id)};
size_t num_rows = batch.Size();

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2019-2021 by XGBoost Contributors
/**
* Copyright 2019-2023 by XGBoost Contributors
*/
#include <thrust/functional.h>
#include <thrust/random.h>
@@ -12,6 +12,7 @@
#include <utility>
#include "../../common/compressed_iterator.h"
#include "../../common/cuda_context.cuh" // for CUDAContext
#include "../../common/random.h"
#include "../param.h"
#include "gradient_based_sampler.cuh"
@@ -147,25 +148,26 @@ class PoissonSampling : public thrust::binary_function<GradientPair, size_t, Gra
NoSampling::NoSampling(EllpackPageImpl const* page) : page_(page) {}
GradientBasedSample NoSampling::Sample(common::Span<GradientPair> gpair, DMatrix* dmat) {
GradientBasedSample NoSampling::Sample(Context const*, common::Span<GradientPair> gpair,
DMatrix* dmat) {
return {dmat->Info().num_row_, page_, gpair};
}
ExternalMemoryNoSampling::ExternalMemoryNoSampling(EllpackPageImpl const* page,
size_t n_rows,
const BatchParam& batch_param)
: batch_param_(batch_param),
page_(new EllpackPageImpl(batch_param.gpu_id, page->Cuts(), page->is_dense,
page->row_stride, n_rows)) {}
ExternalMemoryNoSampling::ExternalMemoryNoSampling(Context const* ctx, EllpackPageImpl const* page,
size_t n_rows, BatchParam batch_param)
: batch_param_{std::move(batch_param)},
page_(new EllpackPageImpl(ctx->gpu_id, page->Cuts(), page->is_dense, page->row_stride,
n_rows)) {}
GradientBasedSample ExternalMemoryNoSampling::Sample(common::Span<GradientPair> gpair,
GradientBasedSample ExternalMemoryNoSampling::Sample(Context const* ctx,
common::Span<GradientPair> gpair,
DMatrix* dmat) {
if (!page_concatenated_) {
// Concatenate all the external memory ELLPACK pages into a single in-memory page.
size_t offset = 0;
for (auto& batch : dmat->GetBatches<EllpackPage>(batch_param_)) {
for (auto& batch : dmat->GetBatches<EllpackPage>(ctx, batch_param_)) {
auto page = batch.Impl();
size_t num_elements = page_->Copy(batch_param_.gpu_id, page, offset);
size_t num_elements = page_->Copy(ctx->gpu_id, page, offset);
offset += num_elements;
}
page_concatenated_ = true;
@@ -176,12 +178,13 @@ GradientBasedSample ExternalMemoryNoSampling::Sample(common::Span<GradientPair>
UniformSampling::UniformSampling(EllpackPageImpl const* page, float subsample)
: page_(page), subsample_(subsample) {}
GradientBasedSample UniformSampling::Sample(common::Span<GradientPair> gpair, DMatrix* dmat) {
GradientBasedSample UniformSampling::Sample(Context const* ctx, common::Span<GradientPair> gpair,
DMatrix* dmat) {
// Set gradient pair to 0 with p = 1 - subsample
thrust::replace_if(dh::tbegin(gpair), dh::tend(gpair),
thrust::counting_iterator<size_t>(0),
BernoulliTrial(common::GlobalRandom()(), subsample_),
GradientPair());
auto cuctx = ctx->CUDACtx();
thrust::replace_if(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair),
thrust::counting_iterator<std::size_t>(0),
BernoulliTrial(common::GlobalRandom()(), subsample_), GradientPair());
return {dmat->Info().num_row_, page_, gpair};
}
@@ -192,7 +195,8 @@ ExternalMemoryUniformSampling::ExternalMemoryUniformSampling(size_t n_rows,
subsample_(subsample),
sample_row_index_(n_rows) {}
GradientBasedSample ExternalMemoryUniformSampling::Sample(common::Span<GradientPair> gpair,
GradientBasedSample ExternalMemoryUniformSampling::Sample(Context const* ctx,
common::Span<GradientPair> gpair,
DMatrix* dmat) {
// Set gradient pair to 0 with p = 1 - subsample
thrust::replace_if(dh::tbegin(gpair), dh::tend(gpair),
@@ -216,18 +220,17 @@ GradientBasedSample ExternalMemoryUniformSampling::Sample(common::Span<GradientP
sample_row_index_.begin(),
ClearEmptyRows());
auto batch_iterator = dmat->GetBatches<EllpackPage>(batch_param_);
auto batch_iterator = dmat->GetBatches<EllpackPage>(ctx, batch_param_);
auto first_page = (*batch_iterator.begin()).Impl();
// Create a new ELLPACK page with empty rows.
page_.reset(); // Release the device memory first before reallocating
page_.reset(new EllpackPageImpl(
batch_param_.gpu_id, first_page->Cuts(), first_page->is_dense,
first_page->row_stride, sample_rows));
page_.reset(new EllpackPageImpl(ctx->gpu_id, first_page->Cuts(), first_page->is_dense,
first_page->row_stride, sample_rows));
// Compact the ELLPACK pages into the single sample page.
thrust::fill(dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0);
for (auto& batch : batch_iterator) {
page_->Compact(batch_param_.gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_));
page_->Compact(ctx->gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_));
}
return {sample_rows, page_.get(), dh::ToSpan(gpair_)};
@@ -242,18 +245,17 @@ GradientBasedSampling::GradientBasedSampling(EllpackPageImpl const* page,
threshold_(n_rows + 1, 0.0f),
grad_sum_(n_rows, 0.0f) {}
GradientBasedSample GradientBasedSampling::Sample(common::Span<GradientPair> gpair,
DMatrix* dmat) {
GradientBasedSample GradientBasedSampling::Sample(Context const* ctx,
common::Span<GradientPair> gpair, DMatrix* dmat) {
auto cuctx = ctx->CUDACtx();
size_t n_rows = dmat->Info().num_row_;
size_t threshold_index = GradientBasedSampler::CalculateThresholdIndex(
gpair, dh::ToSpan(threshold_), dh::ToSpan(grad_sum_), n_rows * subsample_);
// Perform Poisson sampling in place.
thrust::transform(dh::tbegin(gpair), dh::tend(gpair),
thrust::counting_iterator<size_t>(0),
dh::tbegin(gpair),
PoissonSampling(dh::ToSpan(threshold_),
threshold_index,
thrust::transform(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair),
thrust::counting_iterator<size_t>(0), dh::tbegin(gpair),
PoissonSampling(dh::ToSpan(threshold_), threshold_index,
RandomWeight(common::GlobalRandom()())));
return {n_rows, page_, gpair};
}
@@ -268,7 +270,8 @@ ExternalMemoryGradientBasedSampling::ExternalMemoryGradientBasedSampling(
grad_sum_(n_rows, 0.0f),
sample_row_index_(n_rows) {}
GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(common::Span<GradientPair> gpair,
GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* ctx,
common::Span<GradientPair> gpair,
DMatrix* dmat) {
size_t n_rows = dmat->Info().num_row_;
size_t threshold_index = GradientBasedSampler::CalculateThresholdIndex(
@@ -298,28 +301,25 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(common::Span<Gra
sample_row_index_.begin(),
ClearEmptyRows());
auto batch_iterator = dmat->GetBatches<EllpackPage>(batch_param_);
auto batch_iterator = dmat->GetBatches<EllpackPage>(ctx, batch_param_);
auto first_page = (*batch_iterator.begin()).Impl();
// Create a new ELLPACK page with empty rows.
page_.reset(); // Release the device memory first before reallocating
page_.reset(new EllpackPageImpl(batch_param_.gpu_id, first_page->Cuts(),
first_page->is_dense,
page_.reset(new EllpackPageImpl(ctx->gpu_id, first_page->Cuts(), first_page->is_dense,
first_page->row_stride, sample_rows));
// Compact the ELLPACK pages into the single sample page.
thrust::fill(dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0);
for (auto& batch : batch_iterator) {
page_->Compact(batch_param_.gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_));
page_->Compact(ctx->gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_));
}
return {sample_rows, page_.get(), dh::ToSpan(gpair_)};
}
GradientBasedSampler::GradientBasedSampler(EllpackPageImpl const* page,
size_t n_rows,
const BatchParam& batch_param,
float subsample,
int sampling_method) {
GradientBasedSampler::GradientBasedSampler(Context const* ctx, EllpackPageImpl const* page,
size_t n_rows, const BatchParam& batch_param,
float subsample, int sampling_method) {
monitor_.Init("gradient_based_sampler");
bool is_sampling = subsample < 1.0;
@@ -346,7 +346,7 @@ GradientBasedSampler::GradientBasedSampler(EllpackPageImpl const* page,
}
} else {
if (is_external_memory) {
strategy_.reset(new ExternalMemoryNoSampling(page, n_rows, batch_param));
strategy_.reset(new ExternalMemoryNoSampling(ctx, page, n_rows, batch_param));
} else {
strategy_.reset(new NoSampling(page));
}
@@ -354,10 +354,10 @@ GradientBasedSampler::GradientBasedSampler(EllpackPageImpl const* page,
}
// Sample a DMatrix based on the given gradient pairs.
GradientBasedSample GradientBasedSampler::Sample(common::Span<GradientPair> gpair,
DMatrix* dmat) {
GradientBasedSample GradientBasedSampler::Sample(Context const* ctx,
common::Span<GradientPair> gpair, DMatrix* dmat) {
monitor_.Start("Sample");
GradientBasedSample sample = strategy_->Sample(gpair, dmat);
GradientBasedSample sample = strategy_->Sample(ctx, gpair, dmat);
monitor_.Stop("Sample");
return sample;
}

View File

@@ -24,7 +24,8 @@ struct GradientBasedSample {
class SamplingStrategy {
public:
/*! \brief Sample from a DMatrix based on the given gradient pairs. */
virtual GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) = 0;
virtual GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair,
DMatrix* dmat) = 0;
virtual ~SamplingStrategy() = default;
};
@@ -32,7 +33,8 @@ class SamplingStrategy {
class NoSampling : public SamplingStrategy {
public:
explicit NoSampling(EllpackPageImpl const* page);
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair,
DMatrix* dmat) override;
private:
EllpackPageImpl const* page_;
@@ -41,10 +43,10 @@ class NoSampling : public SamplingStrategy {
/*! \brief No sampling in external memory mode. */
class ExternalMemoryNoSampling : public SamplingStrategy {
public:
ExternalMemoryNoSampling(EllpackPageImpl const* page,
size_t n_rows,
const BatchParam& batch_param);
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
ExternalMemoryNoSampling(Context const* ctx, EllpackPageImpl const* page, size_t n_rows,
BatchParam batch_param);
GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair,
DMatrix* dmat) override;
private:
BatchParam batch_param_;
@@ -56,7 +58,8 @@ class ExternalMemoryNoSampling : public SamplingStrategy {
class UniformSampling : public SamplingStrategy {
public:
UniformSampling(EllpackPageImpl const* page, float subsample);
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair,
DMatrix* dmat) override;
private:
EllpackPageImpl const* page_;
@@ -66,10 +69,9 @@ class UniformSampling : public SamplingStrategy {
/*! \brief No sampling in external memory mode. */
class ExternalMemoryUniformSampling : public SamplingStrategy {
public:
ExternalMemoryUniformSampling(size_t n_rows,
BatchParam batch_param,
float subsample);
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
ExternalMemoryUniformSampling(size_t n_rows, BatchParam batch_param, float subsample);
GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair,
DMatrix* dmat) override;
private:
BatchParam batch_param_;
@@ -82,11 +84,10 @@ class ExternalMemoryUniformSampling : public SamplingStrategy {
/*! \brief Gradient-based sampling in in-memory mode.. */
class GradientBasedSampling : public SamplingStrategy {
public:
GradientBasedSampling(EllpackPageImpl const* page,
size_t n_rows,
const BatchParam& batch_param,
GradientBasedSampling(EllpackPageImpl const* page, size_t n_rows, const BatchParam& batch_param,
float subsample);
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair,
DMatrix* dmat) override;
private:
EllpackPageImpl const* page_;
@@ -98,10 +99,9 @@ class GradientBasedSampling : public SamplingStrategy {
/*! \brief Gradient-based sampling in external memory mode.. */
class ExternalMemoryGradientBasedSampling : public SamplingStrategy {
public:
ExternalMemoryGradientBasedSampling(size_t n_rows,
BatchParam batch_param,
float subsample);
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
ExternalMemoryGradientBasedSampling(size_t n_rows, BatchParam batch_param, float subsample);
GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair,
DMatrix* dmat) override;
private:
BatchParam batch_param_;
@@ -124,14 +124,11 @@ class ExternalMemoryGradientBasedSampling : public SamplingStrategy {
*/
class GradientBasedSampler {
public:
GradientBasedSampler(EllpackPageImpl const* page,
size_t n_rows,
const BatchParam& batch_param,
float subsample,
int sampling_method);
GradientBasedSampler(Context const* ctx, EllpackPageImpl const* page, size_t n_rows,
const BatchParam& batch_param, float subsample, int sampling_method);
/*! \brief Sample from a DMatrix based on the given gradient pairs. */
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat);
GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair, DMatrix* dmat);
/*! \brief Calculate the threshold used to normalize sampling probabilities. */
static size_t CalculateThresholdIndex(common::Span<GradientPair> gpair,

View File

@@ -66,7 +66,7 @@ class GloablApproxBuilder {
partitioner_.clear();
// Generating the GHistIndexMatrix is quite slow, is there a way to speed it up?
for (auto const &page :
p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess, *task_))) {
p_fmat->GetBatches<GHistIndexMatrix>(ctx_, BatchSpec(*param_, hess, *task_))) {
if (n_total_bins == 0) {
n_total_bins = page.cut.TotalBins();
feature_values_ = page.cut;
@@ -97,7 +97,7 @@ class GloablApproxBuilder {
std::vector<CPUExpandEntry> nodes{best};
size_t i = 0;
auto space = ConstructHistSpace(partitioner_, nodes);
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess))) {
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, BatchSpec(*param_, hess))) {
histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), nodes,
{}, gpair);
i++;
@@ -148,7 +148,7 @@ class GloablApproxBuilder {
size_t i = 0;
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess))) {
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, BatchSpec(*param_, hess))) {
histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(),
nodes_to_build, nodes_to_sub, gpair);
i++;
@@ -214,7 +214,8 @@ class GloablApproxBuilder {
monitor_->Start("UpdatePosition");
size_t page_id = 0;
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess))) {
for (auto const &page :
p_fmat->GetBatches<GHistIndexMatrix>(ctx_, BatchSpec(*param_, hess))) {
partitioner_.at(page_id).UpdatePosition(ctx_, page, applied, p_tree);
page_id++;
}

View File

@@ -76,7 +76,7 @@ class ColMaker: public TreeUpdater {
// Finds densities if we don't already have them
if (column_densities_.empty()) {
std::vector<size_t> column_size(dmat->Info().num_col_);
for (const auto &batch : dmat->GetBatches<SortedCSCPage>()) {
for (const auto &batch : dmat->GetBatches<SortedCSCPage>(ctx_)) {
auto page = batch.GetView();
for (auto i = 0u; i < batch.Size(); i++) {
column_size[i] += page[i].size();
@@ -467,7 +467,7 @@ class ColMaker: public TreeUpdater {
auto evaluator = tree_evaluator_.GetEvaluator();
auto feat_set = column_sampler_.GetFeatureSet(depth);
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>()) {
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>(ctx_)) {
this->UpdateSolution(batch, feat_set->HostVector(), gpair, p_fmat);
}
// after this each thread's stemp will get the best candidates, aggregate results
@@ -546,7 +546,7 @@ class ColMaker: public TreeUpdater {
}
std::sort(fsplits.begin(), fsplits.end());
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>()) {
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>(ctx_)) {
auto page = batch.GetView();
for (auto fid : fsplits) {
auto col = page[fid];

View File

@@ -218,7 +218,7 @@ struct GPUHistMakerDevice {
column_sampler(column_sampler_seed),
interaction_constraints(param, n_features),
batch_param(std::move(_batch_param)) {
sampler.reset(new GradientBasedSampler(page, _n_rows, batch_param, param.subsample,
sampler.reset(new GradientBasedSampler(ctx, page, _n_rows, batch_param, param.subsample,
param.sampling_method));
if (!param.monotone_constraints.empty()) {
// Copy assigning an empty vector causes an exception in MSVC debug builds
@@ -258,7 +258,7 @@ struct GPUHistMakerDevice {
dh::safe_cuda(cudaMemcpyAsync(
d_gpair.data().get(), dh_gpair->ConstDevicePointer(),
dh_gpair->Size() * sizeof(GradientPair), cudaMemcpyDeviceToDevice));
auto sample = sampler->Sample(dh::ToSpan(d_gpair), dmat);
auto sample = sampler->Sample(ctx_, dh::ToSpan(d_gpair), dmat);
page = sample.page;
gpair = sample.gpair;
@@ -808,11 +808,8 @@ class GPUHistMaker : public TreeUpdater {
uint32_t column_sampling_seed = common::GlobalRandom()();
collective::Broadcast(&column_sampling_seed, sizeof(column_sampling_seed), 0);
BatchParam batch_param{
ctx_->gpu_id,
param->max_bin,
};
auto page = (*dmat->GetBatches<EllpackPage>(batch_param).begin()).Impl();
auto batch_param = BatchParam{param->max_bin, TrainParam::DftSparseThreshold()};
auto page = (*dmat->GetBatches<EllpackPage>(ctx_, batch_param).begin()).Impl();
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
info_->feature_types.SetDevice(ctx_->gpu_id);
maker.reset(new GPUHistMakerDevice<GradientSumT>(

View File

@@ -134,7 +134,7 @@ class MultiTargetHistBuilder {
std::vector<MultiExpandEntry> const &applied) {
monitor_->Start(__func__);
std::size_t page_id{0};
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(this->param_))) {
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(this->param_))) {
this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree);
page_id++;
}
@@ -152,7 +152,7 @@ class MultiTargetHistBuilder {
std::size_t page_id = 0;
bst_bin_t n_total_bins = 0;
partitioner_.clear();
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
if (n_total_bins == 0) {
n_total_bins = page.cut.TotalBins();
} else {
@@ -206,7 +206,7 @@ class MultiTargetHistBuilder {
std::vector<MultiExpandEntry> nodes{best};
std::size_t i = 0;
auto space = ConstructHistSpace(partitioner_, nodes);
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
for (bst_target_t t{0}; t < n_targets; ++t) {
auto t_gpair = gpair.Slice(linalg::All(), t);
histogram_builder_[t].BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(),
@@ -225,7 +225,7 @@ class MultiTargetHistBuilder {
for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) {
hists.push_back(&histogram_builder_[t].Histogram());
}
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, &nodes);
break;
}
@@ -263,7 +263,7 @@ class MultiTargetHistBuilder {
std::size_t i = 0;
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
for (std::size_t t = 0; t < p_tree->NumTargets(); ++t) {
auto t_gpair = gpair.Slice(linalg::All(), t);
// Make sure the gradient matrix is f-order.
@@ -283,7 +283,7 @@ class MultiTargetHistBuilder {
for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) {
hists.push_back(&histogram_builder_[t].Histogram());
}
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, best_splits);
break;
}
@@ -383,7 +383,7 @@ class HistBuilder {
std::size_t page_id{0};
bst_bin_t n_total_bins{0};
partitioner_.clear();
for (auto const &page : fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
for (auto const &page : fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
if (n_total_bins == 0) {
n_total_bins = page.cut.TotalBins();
} else {
@@ -406,7 +406,7 @@ class HistBuilder {
monitor_->Start(__func__);
auto const &histograms = histogram_builder_->Histogram();
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
evaluator_->EvaluateSplits(histograms, gmat.cut, ft, *p_tree, best_splits);
break;
}
@@ -423,7 +423,7 @@ class HistBuilder {
std::size_t page_id = 0;
auto space = ConstructHistSpace(partitioner_, {node});
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
std::vector<CPUExpandEntry> nodes_to_build{node};
std::vector<CPUExpandEntry> nodes_to_sub;
this->histogram_builder_->BuildHist(page_id, space, gidx, p_tree,
@@ -439,7 +439,7 @@ class HistBuilder {
* Specialized code for dense data: For dense data (with no missing value), the sum
* of gradient histogram is equal to snode[nid]
*/
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_)).begin());
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_)).begin());
std::vector<std::uint32_t> const &row_ptr = gmat.cut.Ptrs();
CHECK_GE(row_ptr.size(), 2);
std::uint32_t const ibegin = row_ptr[0];
@@ -467,7 +467,7 @@ class HistBuilder {
std::vector<CPUExpandEntry> entries{node};
monitor_->Start("EvaluateSplits");
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
evaluator_->EvaluateSplits(histogram_builder_->Histogram(), gmat.cut, ft, *p_tree,
&entries);
break;
@@ -503,7 +503,7 @@ class HistBuilder {
std::size_t page_id{0};
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
histogram_builder_->BuildHist(page_id, space, gidx, p_tree,
partitioner_.at(page_id).Partitions(), nodes_to_build,
nodes_to_sub, gpair.Values());
@@ -515,7 +515,7 @@ class HistBuilder {
std::vector<CPUExpandEntry> const &applied) {
monitor_->Start(__func__);
std::size_t page_id{0};
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(this->param_))) {
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree);
page_id++;
}