/** * Copyright 2019-2024, XGBoost Contributors */ #ifndef XGBOOST_DATA_ELLPACK_PAGE_CUH_ #define XGBOOST_DATA_ELLPACK_PAGE_CUH_ #include #include // for numeric_limits #include "../common/categorical.h" #include "../common/compressed_iterator.h" #include "../common/device_helpers.cuh" #include "../common/hist_util.h" #include "../common/ref_resource_view.h" // for RefResourceView #include "ellpack_page.h" #include "xgboost/data.h" namespace xgboost { /** * @brief Struct for accessing and manipulating an ELLPACK matrix on the device. * * Does not own underlying memory and may be trivially copied into kernels. */ struct EllpackDeviceAccessor { /** @brief Whether or not if the matrix is dense. */ bool is_dense; /** @brief Row length for ELLPACK, equal to number of features when the data is dense. */ bst_idx_t row_stride; /** @brief Starting index of the rows. Used for external memory. */ bst_idx_t base_rowid; /** @brief Number of rows in this batch. */ bst_idx_t n_rows; /** @brief Acessor for the gradient index. */ common::CompressedIterator gidx_iter; /** @brief Minimum value for each feature. Size equals to number of features. */ common::Span min_fvalue; /** @brief Histogram cut pointers. Size equals to (number of features + 1). */ common::Span feature_segments; /** @brief Histogram cut values. Size equals to (bins per feature * number of features). */ common::Span gidx_fvalue_map; /** @brief Type of each feature, categorical or numerical. */ common::Span feature_types; EllpackDeviceAccessor() = delete; EllpackDeviceAccessor(DeviceOrd device, std::shared_ptr cuts, bool is_dense, size_t row_stride, size_t base_rowid, size_t n_rows, common::CompressedIterator gidx_iter, common::Span feature_types) : is_dense(is_dense), row_stride(row_stride), base_rowid(base_rowid), n_rows(n_rows), gidx_iter(gidx_iter), feature_types{feature_types} { if (device.IsCUDA()) { cuts->cut_values_.SetDevice(device); cuts->cut_ptrs_.SetDevice(device); cuts->min_vals_.SetDevice(device); gidx_fvalue_map = cuts->cut_values_.ConstDeviceSpan(); feature_segments = cuts->cut_ptrs_.ConstDeviceSpan(); min_fvalue = cuts->min_vals_.ConstDeviceSpan(); } else { gidx_fvalue_map = cuts->cut_values_.ConstHostSpan(); feature_segments = cuts->cut_ptrs_.ConstHostSpan(); min_fvalue = cuts->min_vals_.ConstHostSpan(); } } /** * @brief Given a row index and a feature index, returns the corresponding cut value. * * Uses binary search for look up. Returns NaN if missing. * * @tparam global_ridx Whether the row index is global to all ellpack batches or it's * local to the current batch. */ template [[nodiscard]] __device__ bst_bin_t GetBinIndex(size_t ridx, size_t fidx) const { if (global_ridx) { ridx -= base_rowid; } auto row_begin = row_stride * ridx; auto row_end = row_begin + row_stride; bst_bin_t gidx = -1; if (is_dense) { gidx = gidx_iter[row_begin + fidx]; } else { gidx = common::BinarySearchBin(row_begin, row_end, gidx_iter, feature_segments[fidx], feature_segments[fidx + 1]); } return gidx; } template [[nodiscard]] __device__ uint32_t SearchBin(float value, size_t column_id) const { auto beg = feature_segments[column_id]; auto end = feature_segments[column_id + 1]; uint32_t idx = 0; if (is_cat) { auto it = dh::MakeTransformIterator( gidx_fvalue_map.cbegin(), [](float v) { return common::AsCat(v); }); idx = thrust::lower_bound(thrust::seq, it + beg, it + end, common::AsCat(value)) - it; } else { auto it = thrust::upper_bound(thrust::seq, gidx_fvalue_map.cbegin() + beg, gidx_fvalue_map.cbegin() + end, value); idx = it - gidx_fvalue_map.cbegin(); } if (idx == end) { idx -= 1; } return idx; } [[nodiscard]] __device__ float GetFvalue(size_t ridx, size_t fidx) const { auto gidx = GetBinIndex(ridx, fidx); if (gidx == -1) { return std::numeric_limits::quiet_NaN(); } return gidx_fvalue_map[gidx]; } // Check if the row id is withing range of the current batch. [[nodiscard]] __device__ bool IsInRange(size_t row_id) const { return row_id >= base_rowid && row_id < base_rowid + n_rows; } /*! \brief Return the total number of symbols (total number of bins plus 1 for * not found). */ [[nodiscard]] XGBOOST_DEVICE size_t NumSymbols() const { return gidx_fvalue_map.size() + 1; } [[nodiscard]] XGBOOST_DEVICE size_t NullValue() const { return this->NumBins(); } [[nodiscard]] XGBOOST_DEVICE size_t NumBins() const { return gidx_fvalue_map.size(); } [[nodiscard]] XGBOOST_DEVICE size_t NumFeatures() const { return min_fvalue.size(); } }; class GHistIndexMatrix; class EllpackPageImpl { public: /** * @brief Default constructor. * * This is used in the external memory case. An empty ELLPACK page is constructed with its content * set later by the reader. */ EllpackPageImpl() = default; /** * @brief Constructor from an existing EllpackInfo. * * This is used in the sampling case. The ELLPACK page is constructed from an existing * Ellpack page and the given number of rows. */ EllpackPageImpl(Context const* ctx, std::shared_ptr cuts, bool is_dense, bst_idx_t row_stride, bst_idx_t n_rows); /** * @brief Constructor used for external memory. */ EllpackPageImpl(Context const* ctx, std::shared_ptr cuts, const SparsePage& page, bool is_dense, size_t row_stride, common::Span feature_types); /** * @brief Constructor from an existing DMatrix. * * This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix * in CSR format. */ explicit EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchParam& parm); template explicit EllpackPageImpl(Context const* ctx, AdapterBatch batch, float missing, bool is_dense, common::Span row_counts_span, common::Span feature_types, size_t row_stride, size_t n_rows, std::shared_ptr cuts); /** * @brief Constructor from an existing CPU gradient index. */ explicit EllpackPageImpl(Context const* ctx, GHistIndexMatrix const& page, common::Span ft); /** * @brief Copy the elements of the given ELLPACK page into this page. * * @param ctx The GPU context. * @param page The ELLPACK page to copy from. * @param offset The number of elements to skip before copying. * @returns The number of elements copied. */ bst_idx_t Copy(Context const* ctx, EllpackPageImpl const* page, bst_idx_t offset); /** * @brief Compact the given ELLPACK page into the current page. * * @param ctx The GPU context. * @param page The ELLPACK page to compact from. * @param row_indexes Row indexes for the compacted page. */ void Compact(Context const* ctx, EllpackPageImpl const* page, common::Span row_indexes); /** @return Number of instances in the page. */ [[nodiscard]] bst_idx_t Size() const; /** @brief Set the base row id for this page. */ void SetBaseRowId(std::size_t row_id) { base_rowid = row_id; } [[nodiscard]] common::HistogramCuts const& Cuts() const { return *cuts_; } [[nodiscard]] std::shared_ptr CutsShared() const { return cuts_; } void SetCuts(std::shared_ptr cuts) { cuts_ = cuts; } [[nodiscard]] bool IsDense() const { return is_dense; } /** @return Estimation of memory cost of this page. */ static size_t MemCostBytes(size_t num_rows, size_t row_stride, const common::HistogramCuts&cuts) ; /** * @brief Return the total number of symbols (total number of bins plus 1 for not * found). */ [[nodiscard]] std::size_t NumSymbols() const { return cuts_->TotalBins() + 1; } /** * @brief Get an accessor that can be passed into CUDA kernels. */ [[nodiscard]] EllpackDeviceAccessor GetDeviceAccessor( DeviceOrd device, common::Span feature_types = {}) const; /** * @brief Get an accessor for host code. */ [[nodiscard]] EllpackDeviceAccessor GetHostAccessor( Context const* ctx, std::vector* h_gidx_buffer, common::Span feature_types = {}) const; private: /** * @brief Compress a single page of CSR data into ELLPACK. * * @param device The GPU device to use. * @param row_batch The CSR page. */ void CreateHistIndices(DeviceOrd device, const SparsePage& row_batch, common::Span feature_types); /** * @brief Initialize the buffer to store compressed features. */ void InitCompressedData(Context const* ctx); public: /** @brief Whether or not if the matrix is dense. */ bool is_dense; /** @brief Row length for ELLPACK. */ bst_idx_t row_stride; bst_idx_t base_rowid{0}; bst_idx_t n_rows{0}; /** * @brief Index of the gradient histogram, which is stored in ELLPACK format. * * This can be backed by various storage types. */ common::RefResourceView gidx_buffer; private: std::shared_ptr cuts_; common::Monitor monitor_; }; inline size_t GetRowStride(DMatrix* dmat) { if (dmat->IsDense()) return dmat->Info().num_col_; size_t row_stride = 0; for (const auto& batch : dmat->GetBatches()) { const auto& row_offset = batch.offset.ConstHostVector(); for (auto i = 1ull; i < row_offset.size(); i++) { row_stride = std::max( row_stride, static_cast(row_offset[i] - row_offset[i - 1])); } } return row_stride; } } // namespace xgboost #endif // XGBOOST_DATA_ELLPACK_PAGE_CUH_