/** * Copyright 2019-2024, XGBoost Contributors */ #ifndef XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_ #define XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_ #include // for int32_t #include // for shared_ptr #include // for move #include "../common/hist_util.h" // for HistogramCuts #include "ellpack_page.h" // for EllpackPage #include "ellpack_page_raw_format.h" // for EllpackPageRawFormat #include "sparse_page_source.h" // for PageSourceIncMixIn #include "xgboost/base.h" // for bst_idx_t #include "xgboost/context.h" // for DeviceOrd #include "xgboost/data.h" // for BatchParam #include "xgboost/span.h" // for Span namespace xgboost::data { // We need to decouple the storage and the view of the storage so that we can implement // concurrent read. // Dummy type to hide CUDA calls from the host compiler. struct EllpackHostCache; // Pimpl to hide CUDA calls from the host compiler. class EllpackHostCacheStreamImpl; // A view onto the actual cache implemented by `EllpackHostCache`. class EllpackHostCacheStream { std::unique_ptr p_impl_; public: explicit EllpackHostCacheStream(std::shared_ptr cache); ~EllpackHostCacheStream(); [[nodiscard]] bst_idx_t Write(void const* ptr, bst_idx_t n_bytes); template [[nodiscard]] std::enable_if_t, bst_idx_t> Write(T const& v) { return this->Write(&v, sizeof(T)); } [[nodiscard]] bool Read(void* ptr, bst_idx_t n_bytes); template [[nodiscard]] auto Read(T* ptr) -> std::enable_if_t, bool> { return this->Read(ptr, sizeof(T)); } [[nodiscard]] bst_idx_t Tell() const; void Seek(bst_idx_t offset_bytes); // Limit the size of read. offset_bytes is the maximum offset that this stream can read // to. An error is raised if the limited is exceeded. void Bound(bst_idx_t offset_bytes); }; template class EllpackFormatPolicy { std::shared_ptr cuts_{nullptr}; DeviceOrd device_; public: using FormatT = EllpackPageRawFormat; public: [[nodiscard]] auto CreatePageFormat() const { CHECK_EQ(cuts_->cut_values_.Device(), device_); std::unique_ptr fmt{new EllpackPageRawFormat{cuts_, device_}}; return fmt; } void SetCuts(std::shared_ptr cuts, DeviceOrd device) { std::swap(cuts_, cuts); device_ = device; CHECK(this->device_.IsCUDA()); } [[nodiscard]] auto GetCuts() { CHECK(cuts_); return cuts_; } [[nodiscard]] auto Device() const { return device_; } }; template typename F> class EllpackFormatStreamPolicy : public F { std::shared_ptr p_cache_; public: using WriterT = EllpackHostCacheStream; using ReaderT = EllpackHostCacheStream; public: EllpackFormatStreamPolicy(); [[nodiscard]] std::unique_ptr CreateWriter(StringView name, std::uint32_t iter); [[nodiscard]] std::unique_ptr CreateReader(StringView name, bst_idx_t offset, bst_idx_t length) const; }; template class EllpackPageSourceImpl : public PageSourceIncMixIn { using Super = PageSourceIncMixIn; bool is_dense_; bst_idx_t row_stride_; BatchParam param_; common::Span feature_types_; public: EllpackPageSourceImpl(float missing, std::int32_t nthreads, bst_feature_t n_features, std::size_t n_batches, std::shared_ptr cache, BatchParam param, std::shared_ptr cuts, bool is_dense, bst_idx_t row_stride, common::Span feature_types, std::shared_ptr source, DeviceOrd device) : Super{missing, nthreads, n_features, n_batches, cache, false}, is_dense_{is_dense}, row_stride_{row_stride}, param_{std::move(param)}, feature_types_{feature_types} { this->source_ = source; cuts->SetDevice(device); this->SetCuts(std::move(cuts), device); this->Fetch(); } void Fetch() final; }; // Cache to host using EllpackPageHostSource = EllpackPageSourceImpl>; // Cache to disk using EllpackPageSource = EllpackPageSourceImpl>; #if !defined(XGBOOST_USE_CUDA) template inline void EllpackPageSourceImpl::Fetch() { // silent the warning about unused variables. (void)(row_stride_); (void)(is_dense_); common::AssertGPUSupport(); } #endif // !defined(XGBOOST_USE_CUDA) } // namespace xgboost::data #endif // XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_