Remove hist builder class. (#9400)
* Remove hist build class. * Cleanup this stateless class. * Add comment to thread block.
This commit is contained in:
@@ -8,8 +8,8 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../common/common.h"
|
||||
#include "column_matrix.h"
|
||||
#include "../data/adapter.h" // for SparsePageAdapterBatch
|
||||
#include "../data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "quantile.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/context.h" // Context
|
||||
@@ -24,9 +24,7 @@
|
||||
#define PREFETCH_READ_T0(addr) do {} while (0)
|
||||
#endif // defined(XGBOOST_MM_PREFETCH_PRESENT)
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
namespace xgboost::common {
|
||||
HistogramCuts::HistogramCuts() {
|
||||
cut_ptrs_.HostVector().emplace_back(0);
|
||||
}
|
||||
@@ -350,9 +348,8 @@ void BuildHistDispatch(Span<GradientPair const> gpair, const RowSetCollection::E
|
||||
}
|
||||
|
||||
template <bool any_missing>
|
||||
void GHistBuilder::BuildHist(Span<GradientPair const> gpair,
|
||||
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
|
||||
GHistRow hist, bool force_read_by_column) const {
|
||||
void BuildHist(Span<GradientPair const> gpair, const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix &gmat, GHistRow hist, bool force_read_by_column) {
|
||||
/* force_read_by_column is used for testing the columnwise building of histograms.
|
||||
* default force_read_by_column = false
|
||||
*/
|
||||
@@ -369,14 +366,13 @@ void GHistBuilder::BuildHist(Span<GradientPair const> gpair,
|
||||
});
|
||||
}
|
||||
|
||||
template void GHistBuilder::BuildHist<true>(Span<GradientPair const> gpair,
|
||||
const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix &gmat, GHistRow hist,
|
||||
bool force_read_by_column) const;
|
||||
template void BuildHist<true>(Span<GradientPair const> gpair,
|
||||
const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix &gmat, GHistRow hist,
|
||||
bool force_read_by_column);
|
||||
|
||||
template void GHistBuilder::BuildHist<false>(Span<GradientPair const> gpair,
|
||||
const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix &gmat, GHistRow hist,
|
||||
bool force_read_by_column) const;
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
template void BuildHist<false>(Span<GradientPair const> gpair,
|
||||
const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix &gmat, GHistRow hist,
|
||||
bool force_read_by_column);
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -16,11 +16,9 @@
|
||||
#include <vector>
|
||||
|
||||
#include "categorical.h"
|
||||
#include "common.h"
|
||||
#include "quantile.h"
|
||||
#include "row_set.h"
|
||||
#include "threading_utils.h"
|
||||
#include "timer.h"
|
||||
#include "xgboost/base.h" // for bst_feature_t, bst_bin_t
|
||||
#include "xgboost/data.h"
|
||||
|
||||
@@ -598,6 +596,8 @@ class ParallelGHistBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] bst_bin_t TotalBins() const { return nbins_; }
|
||||
|
||||
private:
|
||||
void MatchNodeNidPairToHist() {
|
||||
size_t hist_allocated_additionally = 0;
|
||||
@@ -643,27 +643,10 @@ class ParallelGHistBuilder {
|
||||
std::map<std::pair<size_t, size_t>, int> tid_nid_to_hist_;
|
||||
};
|
||||
|
||||
/*!
|
||||
* \brief builder for histograms of gradient statistics
|
||||
*/
|
||||
class GHistBuilder {
|
||||
public:
|
||||
GHistBuilder() = default;
|
||||
explicit GHistBuilder(uint32_t nbins): nbins_{nbins} {}
|
||||
|
||||
// construct a histogram via histogram aggregation
|
||||
template <bool any_missing>
|
||||
void BuildHist(Span<GradientPair const> gpair, const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix& gmat, GHistRow hist,
|
||||
bool force_read_by_column = false) const;
|
||||
uint32_t GetNumBins() const {
|
||||
return nbins_;
|
||||
}
|
||||
|
||||
private:
|
||||
/*! \brief number of all bins over all features */
|
||||
uint32_t nbins_ { 0 };
|
||||
};
|
||||
// construct a histogram via histogram aggregation
|
||||
template <bool any_missing>
|
||||
void BuildHist(Span<GradientPair const> gpair, const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix& gmat, GHistRow hist, bool force_read_by_column = false);
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_COMMON_HIST_UTIL_H_
|
||||
|
||||
@@ -30,9 +30,7 @@ inline int32_t omp_get_thread_limit() { return std::numeric_limits<int32_t>::max
|
||||
}
|
||||
#endif // defined(_MSC_VER)
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
namespace xgboost::common {
|
||||
// Represent simple range of indexes [begin, end)
|
||||
// Inspired by tbb::blocked_range
|
||||
class Range1d {
|
||||
@@ -69,7 +67,7 @@ class Range1d {
|
||||
// [1,2], [3,4], [5,6], [7,8], [9]
|
||||
// The class helps to process data in several tree nodes (non-balanced usually) in parallel
|
||||
// Using nested parallelism (by nodes and by data in each node)
|
||||
// it helps to improve CPU resources utilization
|
||||
// it helps to improve CPU resources utilization
|
||||
class BlockedSpace2d {
|
||||
public:
|
||||
// Example of space:
|
||||
@@ -86,39 +84,47 @@ class BlockedSpace2d {
|
||||
// dim1 - size of the first dimension in the space
|
||||
// getter_size_dim2 - functor to get the second dimensions for each 'row' by row-index
|
||||
// grain_size - max size of produced blocks
|
||||
template<typename Func>
|
||||
BlockedSpace2d(size_t dim1, Func getter_size_dim2, size_t grain_size) {
|
||||
for (size_t i = 0; i < dim1; ++i) {
|
||||
const size_t size = getter_size_dim2(i);
|
||||
const size_t n_blocks = size/grain_size + !!(size % grain_size);
|
||||
for (size_t iblock = 0; iblock < n_blocks; ++iblock) {
|
||||
const size_t begin = iblock * grain_size;
|
||||
const size_t end = std::min(begin + grain_size, size);
|
||||
template <typename Func>
|
||||
BlockedSpace2d(std::size_t dim1, Func getter_size_dim2, std::size_t grain_size) {
|
||||
for (std::size_t i = 0; i < dim1; ++i) {
|
||||
std::size_t size = getter_size_dim2(i);
|
||||
// Each row (second dim) is divided into n_blocks
|
||||
std::size_t n_blocks = size / grain_size + !!(size % grain_size);
|
||||
for (std::size_t iblock = 0; iblock < n_blocks; ++iblock) {
|
||||
std::size_t begin = iblock * grain_size;
|
||||
std::size_t end = std::min(begin + grain_size, size);
|
||||
AddBlock(i, begin, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Amount of blocks(tasks) in a space
|
||||
size_t Size() const {
|
||||
[[nodiscard]] std::size_t Size() const {
|
||||
return ranges_.size();
|
||||
}
|
||||
|
||||
// get index of the first dimension of i-th block(task)
|
||||
size_t GetFirstDimension(size_t i) const {
|
||||
[[nodiscard]] std::size_t GetFirstDimension(size_t i) const {
|
||||
CHECK_LT(i, first_dimension_.size());
|
||||
return first_dimension_[i];
|
||||
}
|
||||
|
||||
// get a range of indexes for the second dimension of i-th block(task)
|
||||
Range1d GetRange(size_t i) const {
|
||||
[[nodiscard]] Range1d GetRange(size_t i) const {
|
||||
CHECK_LT(i, ranges_.size());
|
||||
return ranges_[i];
|
||||
}
|
||||
|
||||
private:
|
||||
void AddBlock(size_t first_dimension, size_t begin, size_t end) {
|
||||
first_dimension_.push_back(first_dimension);
|
||||
/**
|
||||
* @brief Add a parallel block.
|
||||
*
|
||||
* @param first_dim The row index.
|
||||
* @param begin The begin of the second dimension.
|
||||
* @param end The end of the second dimension.
|
||||
*/
|
||||
void AddBlock(std::size_t first_dim, std::size_t begin, std::size_t end) {
|
||||
first_dimension_.push_back(first_dim);
|
||||
ranges_.emplace_back(begin, end);
|
||||
}
|
||||
|
||||
@@ -303,7 +309,6 @@ class MemStackAllocator {
|
||||
* \brief Constant that can be used for initializing static thread local memory.
|
||||
*/
|
||||
std::int32_t constexpr DefaultMaxThreads() { return 128; }
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::common
|
||||
|
||||
#endif // XGBOOST_COMMON_THREADING_UTILS_H_
|
||||
|
||||
Reference in New Issue
Block a user