Prepare gradient index for Quantile DMatrix. (#8103)

* Prepare gradient index for Quantile DMatrix.

- Implement push batch with adapter batch.
- Implement `GetFvalue` for prediction.
This commit is contained in:
Jiaming Yuan
2022-07-22 17:26:33 +08:00
committed by GitHub
parent 1be09848a7
commit 4a4e5c7c18
7 changed files with 254 additions and 70 deletions

View File

@@ -4,13 +4,17 @@
*/
#ifndef XGBOOST_DATA_GRADIENT_INDEX_H_
#define XGBOOST_DATA_GRADIENT_INDEX_H_
#include <algorithm> // std::min
#include <memory>
#include <vector>
#include "../common/categorical.h"
#include "../common/hist_util.h"
#include "../common/numeric.h"
#include "../common/threading_utils.h"
#include "adapter.h"
#include "proxy_dmatrix.h"
#include "xgboost/base.h"
#include "xgboost/data.h"
@@ -18,7 +22,6 @@ namespace xgboost {
namespace common {
class ColumnMatrix;
} // namespace common
/*!
* \brief preprocessed global index matrix, in CSR format
*
@@ -26,24 +29,39 @@ class ColumnMatrix;
* index for CPU histogram. On GPU ellpack page is used.
*/
class GHistIndexMatrix {
// Get the size of each row
template <typename AdapterBatchT>
auto GetRowCounts(AdapterBatchT const& batch, float missing, int32_t n_threads) {
std::vector<size_t> valid_counts(batch.Size(), 0);
common::ParallelFor(batch.Size(), n_threads, [&](size_t i) {
auto line = batch.GetLine(i);
for (size_t j = 0; j < line.Size(); ++j) {
data::COOTuple elem = line.GetElement(j);
if (data::IsValidFunctor {missing}(elem)) {
valid_counts[i]++;
}
}
});
return valid_counts;
}
/**
* \brief Push a page into index matrix, the function is only necessary because hist has
* partial support for external memory.
*/
void PushBatch(SparsePage const& batch, common::Span<FeatureType const> ft,
bst_bin_t n_total_bins, int32_t n_threads);
void PushBatch(SparsePage const& batch, common::Span<FeatureType const> ft, int32_t n_threads);
template <typename Batch, typename BinIdxType, typename GetOffset, typename IsValid>
void SetIndexData(common::Span<BinIdxType> index_data_span, common::Span<FeatureType const> ft,
size_t batch_threads, Batch const& batch, IsValid&& is_valid, size_t nbins,
GetOffset&& get_offset) {
void SetIndexData(common::Span<BinIdxType> index_data_span, size_t rbegin,
common::Span<FeatureType const> ft, size_t batch_threads, Batch const& batch,
IsValid&& is_valid, size_t nbins, GetOffset&& get_offset) {
auto batch_size = batch.Size();
BinIdxType* index_data = index_data_span.data();
auto const& ptrs = cut.Ptrs();
auto const& values = cut.Values();
common::ParallelFor(batch_size, batch_threads, [&](size_t i) {
auto line = batch.GetLine(i);
size_t ibegin = row_ptr[i]; // index of first entry for current block
size_t ibegin = row_ptr[rbegin + i]; // index of first entry for current block
size_t k = 0;
auto tid = omp_get_thread_num();
for (size_t j = 0; j < line.Size(); ++j) {
@@ -63,6 +81,49 @@ class GHistIndexMatrix {
});
}
template <typename Batch, typename IsValid>
void PushBatchImpl(int32_t n_threads, Batch const& batch, size_t rbegin, IsValid&& is_valid,
common::Span<FeatureType const> ft) {
// The number of threads is pegged to the batch size. If the OMP block is parallelized
// on anything other than the batch/block size, it should be reassigned
size_t batch_threads =
std::max(static_cast<size_t>(1), std::min(batch.Size(), static_cast<size_t>(n_threads)));
auto n_bins_total = cut.TotalBins();
const size_t n_index = row_ptr[rbegin + batch.Size()]; // number of entries in this page
ResizeIndex(n_index, isDense_);
if (isDense_) {
index.SetBinOffset(cut.Ptrs());
}
uint32_t const* offsets = index.Offset();
if (isDense_) {
// Inside the lambda functions, bin_idx is the index for cut value across all
// features. By subtracting it with starting pointer of each feature, we can reduce
// it to smaller value and compress it to smaller types.
common::DispatchBinType(index.GetBinTypeSize(), [&](auto dtype) {
using T = decltype(dtype);
common::Span<T> index_data_span = {index.data<T>(), index.Size()};
SetIndexData(
index_data_span, rbegin, ft, batch_threads, batch, is_valid, n_bins_total,
[offsets](auto bin_idx, auto fidx) { return static_cast<T>(bin_idx - offsets[fidx]); });
});
} else {
/* For sparse DMatrix we have to store index of feature for each bin
in index field to chose right offset. So offset is nullptr and index is
not reduced */
common::Span<uint32_t> index_data_span = {index.data<uint32_t>(), n_index};
SetIndexData(index_data_span, rbegin, ft, batch_threads, batch, is_valid, n_bins_total,
[](auto idx, auto) { return idx; });
}
common::ParallelFor(n_bins_total, n_threads, [&](bst_omp_uint idx) {
for (int32_t tid = 0; tid < n_threads; ++tid) {
hit_count[idx] += hit_count_tloc_[tid * n_bins_total + idx];
hit_count_tloc_[tid * n_bins_total + idx] = 0; // reset for next batch
}
});
}
public:
/*! \brief row pointer to rows by element position */
std::vector<size_t> row_ptr;
@@ -77,15 +138,53 @@ class GHistIndexMatrix {
/*! \brief base row index for current page (used by external memory) */
size_t base_rowid{0};
GHistIndexMatrix();
~GHistIndexMatrix();
/**
* \brief Constrcutor for SimpleDMatrix.
*/
GHistIndexMatrix(DMatrix* x, bst_bin_t max_bins_per_feat, double sparse_thresh,
bool sorted_sketch, int32_t n_threads, common::Span<float> hess = {});
~GHistIndexMatrix();
/**
* \brief Constructor for Iterative DMatrix. Initialize basic information and prepare
* for push batch.
*/
GHistIndexMatrix(MetaInfo const& info, common::HistogramCuts&& cuts, bst_bin_t max_bin_per_feat);
/**
* \brief Constructor for external memory.
*/
GHistIndexMatrix(SparsePage const& page, common::Span<FeatureType const> ft,
common::HistogramCuts const& cuts, int32_t max_bins_per_feat, bool is_dense,
double sparse_thresh, int32_t n_threads);
GHistIndexMatrix(); // also for ext mem, empty ctor so that we can read the cache back.
// Create a global histogram matrix, given cut. Used by external memory
void Init(SparsePage const& page, common::Span<FeatureType const> ft,
common::HistogramCuts const& cuts, int32_t max_bins_per_feat, bool is_dense,
double sparse_thresh, int32_t n_threads);
template <typename Batch>
void PushAdapterBatch(Context const* ctx, size_t rbegin, size_t prev_sum, Batch const& batch,
float missing, common::Span<FeatureType const> ft, double sparse_thresh,
size_t n_samples_total) {
auto n_bins_total = cut.TotalBins();
hit_count_tloc_.clear();
hit_count_tloc_.resize(ctx->Threads() * n_bins_total, 0);
auto n_threads = ctx->Threads();
auto valid_counts = GetRowCounts(batch, missing, n_threads);
auto it = common::MakeIndexTransformIter([&](size_t ridx) { return valid_counts[ridx]; });
common::PartialSum(n_threads, it, it + batch.Size(), prev_sum, row_ptr.begin() + rbegin);
auto is_valid = data::IsValidFunctor{missing};
PushBatchImpl(ctx->Threads(), batch, rbegin, is_valid, ft);
if (rbegin + batch.Size() == n_samples_total) {
// finished
CHECK(!std::isnan(sparse_thresh));
this->columns_ = std::make_unique<common::ColumnMatrix>(*this, sparse_thresh);
}
}
// Call ColumnMatrix::PushBatch
template <typename Batch>
void PushAdapterBatchColumns(Context const* ctx, Batch const& batch, float missing,
size_t rbegin);
void ResizeIndex(const size_t n_index, const bool isDense);
@@ -117,6 +216,8 @@ class GHistIndexMatrix {
common::ColumnMatrix const& Transpose() const;
float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const;
private:
std::unique_ptr<common::ColumnMatrix> columns_;
std::vector<size_t> hit_count_tloc_;