Define bin type. (#7850)

This commit is contained in:
Jiaming Yuan 2022-04-29 19:41:39 +08:00 committed by GitHub
parent f7db16add1
commit 288c52596c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 25 additions and 23 deletions

View File

@ -121,6 +121,8 @@ using bst_float = float; // NOLINT
using bst_cat_t = int32_t; // NOLINT
/*! \brief Type for data column (feature) index. */
using bst_feature_t = uint32_t; // NOLINT
/*! \brief Type for histogram bin index. */
using bst_bin_t = int32_t; // NOLINT
/*! \brief Type for data row index.
*
* Be careful `std::size_t' is implementation-defined. Meaning that the binary

View File

@ -34,8 +34,8 @@ class Column {
public:
static constexpr int32_t kMissingId = -1;
Column(ColumnType type, common::Span<const BinIdxType> index, const uint32_t index_base)
: type_(type), index_(index), index_base_(index_base) {}
Column(ColumnType type, common::Span<const BinIdxType> index, const bst_bin_t index_base)
: type_(type), index_(index), index_base_{index_base} {}
virtual ~Column() = default;
@ -60,19 +60,19 @@ class Column {
/* bin indexes in range [0, max_bins - 1] */
common::Span<const BinIdxType> index_;
/* bin index offset for specific feature */
const uint32_t index_base_;
bst_bin_t const index_base_;
};
template <typename BinIdxType>
class SparseColumn : public Column<BinIdxType> {
public:
SparseColumn(ColumnType type, common::Span<const BinIdxType> index, uint32_t index_base,
SparseColumn(ColumnType type, common::Span<const BinIdxType> index, bst_bin_t index_base,
common::Span<const size_t> row_ind)
: Column<BinIdxType>(type, index, index_base), row_ind_(row_ind) {}
const size_t* GetRowData() const { return row_ind_.data(); }
int32_t GetBinIdx(size_t rid, size_t* state) const {
bst_bin_t GetBinIdx(size_t rid, size_t* state) const {
const size_t column_size = this->Size();
if (!((*state) < column_size)) {
return this->kMissingId;

View File

@ -40,8 +40,6 @@ class HistogramCuts {
float max_cat_{-1.0f};
protected:
using BinIdx = uint32_t;
void Swap(HistogramCuts&& that) noexcept(true) {
std::swap(cut_values_, that.cut_values_);
std::swap(cut_ptrs_, that.cut_ptrs_);
@ -110,31 +108,31 @@ class HistogramCuts {
// Return the index of a cut point that is strictly greater than the input
// value, or the last available index if none exists
BinIdx SearchBin(float value, bst_feature_t column_id, std::vector<uint32_t> const& ptrs,
bst_bin_t SearchBin(float value, bst_feature_t column_id, std::vector<uint32_t> const& ptrs,
std::vector<float> const& values) const {
auto end = ptrs[column_id + 1];
auto beg = ptrs[column_id];
auto it = std::upper_bound(values.cbegin() + beg, values.cbegin() + end, value);
BinIdx idx = it - values.cbegin();
bst_bin_t idx = it - values.cbegin();
idx -= !!(idx == end);
return idx;
}
BinIdx SearchBin(float value, bst_feature_t column_id) const {
bst_bin_t SearchBin(float value, bst_feature_t column_id) const {
return this->SearchBin(value, column_id, Ptrs(), Values());
}
/**
* \brief Search the bin index for numerical feature.
*/
BinIdx SearchBin(Entry const& e) const {
bst_bin_t SearchBin(Entry const& e) const {
return SearchBin(e.fvalue, e.index);
}
/**
* \brief Search the bin index for categorical feature.
*/
BinIdx SearchCatBin(Entry const &e) const {
bst_bin_t SearchCatBin(Entry const &e) const {
auto const &ptrs = this->Ptrs();
auto const &vals = this->Values();
auto end = ptrs.at(e.index + 1) + vals.cbegin();
@ -296,7 +294,7 @@ struct Index {
};
template <typename GradientIndex>
int32_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(size_t begin, size_t end,
bst_bin_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(size_t begin, size_t end,
GradientIndex const& data,
uint32_t const fidx_begin,
uint32_t const fidx_end) {
@ -324,8 +322,6 @@ int32_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(size_t begin, size_t end,
return -1;
}
class ColumnMatrix;
template<typename GradientSumT>
using GHistRow = Span<xgboost::detail::GradientPairInternal<GradientSumT> >;

View File

@ -14,6 +14,10 @@
#include "xgboost/data.h"
namespace xgboost {
namespace common {
class ColumnMatrix;
} // namespace common
/*!
* \brief preprocessed global index matrix, in CSR format
*
@ -80,13 +84,13 @@ class GHistIndexMatrix {
for (bst_uint j = 0; j < inst.size(); ++j) {
auto e = inst[j];
if (common::IsCat(ft, e.index)) {
auto bin_idx = cut.SearchCatBin(e);
bst_bin_t bin_idx = cut.SearchCatBin(e);
index_data[ibegin + j] = get_offset(bin_idx, j);
++hit_count_tloc_[tid * nbins + bin_idx];
} else {
uint32_t idx = cut.SearchBin(e.fvalue, e.index, ptrs, values);
index_data[ibegin + j] = get_offset(idx, j);
++hit_count_tloc_[tid * nbins + idx];
bst_bin_t bin_idx = cut.SearchBin(e.fvalue, e.index, ptrs, values);
index_data[ibegin + j] = get_offset(bin_idx, j);
++hit_count_tloc_[tid * nbins + bin_idx];
}
}
});