Remove hist builder class. (#9400)
* Remove hist build class. * Cleanup this stateless class. * Add comment to thread block.
This commit is contained in:
parent
0de7c47495
commit
22b0a55a04
@ -8,8 +8,8 @@
|
|||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "../common/common.h"
|
#include "../data/adapter.h" // for SparsePageAdapterBatch
|
||||||
#include "column_matrix.h"
|
#include "../data/gradient_index.h" // for GHistIndexMatrix
|
||||||
#include "quantile.h"
|
#include "quantile.h"
|
||||||
#include "xgboost/base.h"
|
#include "xgboost/base.h"
|
||||||
#include "xgboost/context.h" // Context
|
#include "xgboost/context.h" // Context
|
||||||
@ -24,9 +24,7 @@
|
|||||||
#define PREFETCH_READ_T0(addr) do {} while (0)
|
#define PREFETCH_READ_T0(addr) do {} while (0)
|
||||||
#endif // defined(XGBOOST_MM_PREFETCH_PRESENT)
|
#endif // defined(XGBOOST_MM_PREFETCH_PRESENT)
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost::common {
|
||||||
namespace common {
|
|
||||||
|
|
||||||
HistogramCuts::HistogramCuts() {
|
HistogramCuts::HistogramCuts() {
|
||||||
cut_ptrs_.HostVector().emplace_back(0);
|
cut_ptrs_.HostVector().emplace_back(0);
|
||||||
}
|
}
|
||||||
@ -350,9 +348,8 @@ void BuildHistDispatch(Span<GradientPair const> gpair, const RowSetCollection::E
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <bool any_missing>
|
template <bool any_missing>
|
||||||
void GHistBuilder::BuildHist(Span<GradientPair const> gpair,
|
void BuildHist(Span<GradientPair const> gpair, const RowSetCollection::Elem row_indices,
|
||||||
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
|
const GHistIndexMatrix &gmat, GHistRow hist, bool force_read_by_column) {
|
||||||
GHistRow hist, bool force_read_by_column) const {
|
|
||||||
/* force_read_by_column is used for testing the columnwise building of histograms.
|
/* force_read_by_column is used for testing the columnwise building of histograms.
|
||||||
* default force_read_by_column = false
|
* default force_read_by_column = false
|
||||||
*/
|
*/
|
||||||
@ -369,14 +366,13 @@ void GHistBuilder::BuildHist(Span<GradientPair const> gpair,
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template void GHistBuilder::BuildHist<true>(Span<GradientPair const> gpair,
|
template void BuildHist<true>(Span<GradientPair const> gpair,
|
||||||
const RowSetCollection::Elem row_indices,
|
const RowSetCollection::Elem row_indices,
|
||||||
const GHistIndexMatrix &gmat, GHistRow hist,
|
const GHistIndexMatrix &gmat, GHistRow hist,
|
||||||
bool force_read_by_column) const;
|
bool force_read_by_column);
|
||||||
|
|
||||||
template void GHistBuilder::BuildHist<false>(Span<GradientPair const> gpair,
|
template void BuildHist<false>(Span<GradientPair const> gpair,
|
||||||
const RowSetCollection::Elem row_indices,
|
const RowSetCollection::Elem row_indices,
|
||||||
const GHistIndexMatrix &gmat, GHistRow hist,
|
const GHistIndexMatrix &gmat, GHistRow hist,
|
||||||
bool force_read_by_column) const;
|
bool force_read_by_column);
|
||||||
} // namespace common
|
} // namespace xgboost::common
|
||||||
} // namespace xgboost
|
|
||||||
|
|||||||
@ -16,11 +16,9 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "categorical.h"
|
#include "categorical.h"
|
||||||
#include "common.h"
|
|
||||||
#include "quantile.h"
|
#include "quantile.h"
|
||||||
#include "row_set.h"
|
#include "row_set.h"
|
||||||
#include "threading_utils.h"
|
#include "threading_utils.h"
|
||||||
#include "timer.h"
|
|
||||||
#include "xgboost/base.h" // for bst_feature_t, bst_bin_t
|
#include "xgboost/base.h" // for bst_feature_t, bst_bin_t
|
||||||
#include "xgboost/data.h"
|
#include "xgboost/data.h"
|
||||||
|
|
||||||
@ -598,6 +596,8 @@ class ParallelGHistBuilder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bst_bin_t TotalBins() const { return nbins_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void MatchNodeNidPairToHist() {
|
void MatchNodeNidPairToHist() {
|
||||||
size_t hist_allocated_additionally = 0;
|
size_t hist_allocated_additionally = 0;
|
||||||
@ -643,27 +643,10 @@ class ParallelGHistBuilder {
|
|||||||
std::map<std::pair<size_t, size_t>, int> tid_nid_to_hist_;
|
std::map<std::pair<size_t, size_t>, int> tid_nid_to_hist_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*!
|
// construct a histogram via histogram aggregation
|
||||||
* \brief builder for histograms of gradient statistics
|
template <bool any_missing>
|
||||||
*/
|
void BuildHist(Span<GradientPair const> gpair, const RowSetCollection::Elem row_indices,
|
||||||
class GHistBuilder {
|
const GHistIndexMatrix& gmat, GHistRow hist, bool force_read_by_column = false);
|
||||||
public:
|
|
||||||
GHistBuilder() = default;
|
|
||||||
explicit GHistBuilder(uint32_t nbins): nbins_{nbins} {}
|
|
||||||
|
|
||||||
// construct a histogram via histogram aggregation
|
|
||||||
template <bool any_missing>
|
|
||||||
void BuildHist(Span<GradientPair const> gpair, const RowSetCollection::Elem row_indices,
|
|
||||||
const GHistIndexMatrix& gmat, GHistRow hist,
|
|
||||||
bool force_read_by_column = false) const;
|
|
||||||
uint32_t GetNumBins() const {
|
|
||||||
return nbins_;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
/*! \brief number of all bins over all features */
|
|
||||||
uint32_t nbins_ { 0 };
|
|
||||||
};
|
|
||||||
} // namespace common
|
} // namespace common
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
#endif // XGBOOST_COMMON_HIST_UTIL_H_
|
#endif // XGBOOST_COMMON_HIST_UTIL_H_
|
||||||
|
|||||||
@ -30,9 +30,7 @@ inline int32_t omp_get_thread_limit() { return std::numeric_limits<int32_t>::max
|
|||||||
}
|
}
|
||||||
#endif // defined(_MSC_VER)
|
#endif // defined(_MSC_VER)
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost::common {
|
||||||
namespace common {
|
|
||||||
|
|
||||||
// Represent simple range of indexes [begin, end)
|
// Represent simple range of indexes [begin, end)
|
||||||
// Inspired by tbb::blocked_range
|
// Inspired by tbb::blocked_range
|
||||||
class Range1d {
|
class Range1d {
|
||||||
@ -69,7 +67,7 @@ class Range1d {
|
|||||||
// [1,2], [3,4], [5,6], [7,8], [9]
|
// [1,2], [3,4], [5,6], [7,8], [9]
|
||||||
// The class helps to process data in several tree nodes (non-balanced usually) in parallel
|
// The class helps to process data in several tree nodes (non-balanced usually) in parallel
|
||||||
// Using nested parallelism (by nodes and by data in each node)
|
// Using nested parallelism (by nodes and by data in each node)
|
||||||
// it helps to improve CPU resources utilization
|
// it helps to improve CPU resources utilization
|
||||||
class BlockedSpace2d {
|
class BlockedSpace2d {
|
||||||
public:
|
public:
|
||||||
// Example of space:
|
// Example of space:
|
||||||
@ -86,39 +84,47 @@ class BlockedSpace2d {
|
|||||||
// dim1 - size of the first dimension in the space
|
// dim1 - size of the first dimension in the space
|
||||||
// getter_size_dim2 - functor to get the second dimensions for each 'row' by row-index
|
// getter_size_dim2 - functor to get the second dimensions for each 'row' by row-index
|
||||||
// grain_size - max size of produced blocks
|
// grain_size - max size of produced blocks
|
||||||
template<typename Func>
|
template <typename Func>
|
||||||
BlockedSpace2d(size_t dim1, Func getter_size_dim2, size_t grain_size) {
|
BlockedSpace2d(std::size_t dim1, Func getter_size_dim2, std::size_t grain_size) {
|
||||||
for (size_t i = 0; i < dim1; ++i) {
|
for (std::size_t i = 0; i < dim1; ++i) {
|
||||||
const size_t size = getter_size_dim2(i);
|
std::size_t size = getter_size_dim2(i);
|
||||||
const size_t n_blocks = size/grain_size + !!(size % grain_size);
|
// Each row (second dim) is divided into n_blocks
|
||||||
for (size_t iblock = 0; iblock < n_blocks; ++iblock) {
|
std::size_t n_blocks = size / grain_size + !!(size % grain_size);
|
||||||
const size_t begin = iblock * grain_size;
|
for (std::size_t iblock = 0; iblock < n_blocks; ++iblock) {
|
||||||
const size_t end = std::min(begin + grain_size, size);
|
std::size_t begin = iblock * grain_size;
|
||||||
|
std::size_t end = std::min(begin + grain_size, size);
|
||||||
AddBlock(i, begin, end);
|
AddBlock(i, begin, end);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Amount of blocks(tasks) in a space
|
// Amount of blocks(tasks) in a space
|
||||||
size_t Size() const {
|
[[nodiscard]] std::size_t Size() const {
|
||||||
return ranges_.size();
|
return ranges_.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
// get index of the first dimension of i-th block(task)
|
// get index of the first dimension of i-th block(task)
|
||||||
size_t GetFirstDimension(size_t i) const {
|
[[nodiscard]] std::size_t GetFirstDimension(size_t i) const {
|
||||||
CHECK_LT(i, first_dimension_.size());
|
CHECK_LT(i, first_dimension_.size());
|
||||||
return first_dimension_[i];
|
return first_dimension_[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
// get a range of indexes for the second dimension of i-th block(task)
|
// get a range of indexes for the second dimension of i-th block(task)
|
||||||
Range1d GetRange(size_t i) const {
|
[[nodiscard]] Range1d GetRange(size_t i) const {
|
||||||
CHECK_LT(i, ranges_.size());
|
CHECK_LT(i, ranges_.size());
|
||||||
return ranges_[i];
|
return ranges_[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void AddBlock(size_t first_dimension, size_t begin, size_t end) {
|
/**
|
||||||
first_dimension_.push_back(first_dimension);
|
* @brief Add a parallel block.
|
||||||
|
*
|
||||||
|
* @param first_dim The row index.
|
||||||
|
* @param begin The begin of the second dimension.
|
||||||
|
* @param end The end of the second dimension.
|
||||||
|
*/
|
||||||
|
void AddBlock(std::size_t first_dim, std::size_t begin, std::size_t end) {
|
||||||
|
first_dimension_.push_back(first_dim);
|
||||||
ranges_.emplace_back(begin, end);
|
ranges_.emplace_back(begin, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -303,7 +309,6 @@ class MemStackAllocator {
|
|||||||
* \brief Constant that can be used for initializing static thread local memory.
|
* \brief Constant that can be used for initializing static thread local memory.
|
||||||
*/
|
*/
|
||||||
std::int32_t constexpr DefaultMaxThreads() { return 128; }
|
std::int32_t constexpr DefaultMaxThreads() { return 128; }
|
||||||
} // namespace common
|
} // namespace xgboost::common
|
||||||
} // namespace xgboost
|
|
||||||
|
|
||||||
#endif // XGBOOST_COMMON_THREADING_UTILS_H_
|
#endif // XGBOOST_COMMON_THREADING_UTILS_H_
|
||||||
|
|||||||
@ -22,7 +22,6 @@ class HistogramBuilder {
|
|||||||
common::HistCollection hist_;
|
common::HistCollection hist_;
|
||||||
/*! \brief culmulative local parent histogram of gradients. */
|
/*! \brief culmulative local parent histogram of gradients. */
|
||||||
common::HistCollection hist_local_worker_;
|
common::HistCollection hist_local_worker_;
|
||||||
common::GHistBuilder builder_;
|
|
||||||
common::ParallelGHistBuilder buffer_;
|
common::ParallelGHistBuilder buffer_;
|
||||||
BatchParam param_;
|
BatchParam param_;
|
||||||
int32_t n_threads_{-1};
|
int32_t n_threads_{-1};
|
||||||
@ -49,7 +48,6 @@ class HistogramBuilder {
|
|||||||
hist_.Init(total_bins);
|
hist_.Init(total_bins);
|
||||||
hist_local_worker_.Init(total_bins);
|
hist_local_worker_.Init(total_bins);
|
||||||
buffer_.Init(total_bins);
|
buffer_.Init(total_bins);
|
||||||
builder_ = common::GHistBuilder(total_bins);
|
|
||||||
is_distributed_ = is_distributed;
|
is_distributed_ = is_distributed;
|
||||||
is_col_split_ = is_col_split;
|
is_col_split_ = is_col_split;
|
||||||
// Workaround s390x gcc 7.5.0
|
// Workaround s390x gcc 7.5.0
|
||||||
@ -88,8 +86,7 @@ class HistogramBuilder {
|
|||||||
elem.begin + end_of_row_set, nid);
|
elem.begin + end_of_row_set, nid);
|
||||||
auto hist = buffer_.GetInitializedHist(tid, nid_in_set);
|
auto hist = buffer_.GetInitializedHist(tid, nid_in_set);
|
||||||
if (rid_set.Size() != 0) {
|
if (rid_set.Size() != 0) {
|
||||||
builder_.template BuildHist<any_missing>(gpair_h, rid_set, gidx, hist,
|
common::BuildHist<any_missing>(gpair_h, rid_set, gidx, hist, force_read_by_column);
|
||||||
force_read_by_column);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -163,9 +160,9 @@ class HistogramBuilder {
|
|||||||
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
|
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
|
||||||
std::vector<ExpandEntry> const &nodes_for_subtraction_trick,
|
std::vector<ExpandEntry> const &nodes_for_subtraction_trick,
|
||||||
int starting_index, int sync_count) {
|
int starting_index, int sync_count) {
|
||||||
const size_t nbins = builder_.GetNumBins();
|
auto n_bins = buffer_.TotalBins();
|
||||||
common::BlockedSpace2d space(
|
common::BlockedSpace2d space(
|
||||||
nodes_for_explicit_hist_build.size(), [&](size_t) { return nbins; }, 1024);
|
nodes_for_explicit_hist_build.size(), [&](size_t) { return n_bins; }, 1024);
|
||||||
common::ParallelFor2d(space, n_threads_, [&](size_t node, common::Range1d r) {
|
common::ParallelFor2d(space, n_threads_, [&](size_t node, common::Range1d r) {
|
||||||
const auto &entry = nodes_for_explicit_hist_build[node];
|
const auto &entry = nodes_for_explicit_hist_build[node];
|
||||||
auto this_hist = this->hist_[entry.nid];
|
auto this_hist = this->hist_[entry.nid];
|
||||||
@ -188,14 +185,13 @@ class HistogramBuilder {
|
|||||||
});
|
});
|
||||||
|
|
||||||
collective::Allreduce<collective::Operation::kSum>(
|
collective::Allreduce<collective::Operation::kSum>(
|
||||||
reinterpret_cast<double *>(this->hist_[starting_index].data()),
|
reinterpret_cast<double *>(this->hist_[starting_index].data()), n_bins * sync_count * 2);
|
||||||
builder_.GetNumBins() * sync_count * 2);
|
|
||||||
|
|
||||||
ParallelSubtractionHist(space, nodes_for_explicit_hist_build, nodes_for_subtraction_trick,
|
ParallelSubtractionHist(space, nodes_for_explicit_hist_build, nodes_for_subtraction_trick,
|
||||||
p_tree);
|
p_tree);
|
||||||
|
|
||||||
common::BlockedSpace2d space2(
|
common::BlockedSpace2d space2(
|
||||||
nodes_for_subtraction_trick.size(), [&](size_t) { return nbins; }, 1024);
|
nodes_for_subtraction_trick.size(), [&](size_t) { return n_bins; }, 1024);
|
||||||
ParallelSubtractionHist(space2, nodes_for_subtraction_trick, nodes_for_explicit_hist_build,
|
ParallelSubtractionHist(space2, nodes_for_subtraction_trick, nodes_for_explicit_hist_build,
|
||||||
p_tree);
|
p_tree);
|
||||||
}
|
}
|
||||||
@ -203,7 +199,7 @@ class HistogramBuilder {
|
|||||||
void SyncHistogramLocal(RegTree const *p_tree,
|
void SyncHistogramLocal(RegTree const *p_tree,
|
||||||
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
|
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
|
||||||
std::vector<ExpandEntry> const &nodes_for_subtraction_trick) {
|
std::vector<ExpandEntry> const &nodes_for_subtraction_trick) {
|
||||||
const size_t nbins = this->builder_.GetNumBins();
|
const size_t nbins = this->buffer_.TotalBins();
|
||||||
common::BlockedSpace2d space(
|
common::BlockedSpace2d space(
|
||||||
nodes_for_explicit_hist_build.size(), [&](size_t) { return nbins; }, 1024);
|
nodes_for_explicit_hist_build.size(), [&](size_t) { return nbins; }, 1024);
|
||||||
|
|
||||||
|
|||||||
@ -48,12 +48,10 @@ void TestEvaluateSplits(bool force_read_by_column) {
|
|||||||
std::iota(row_indices.begin(), row_indices.end(), 0);
|
std::iota(row_indices.begin(), row_indices.end(), 0);
|
||||||
row_set_collection.Init();
|
row_set_collection.Init();
|
||||||
|
|
||||||
auto hist_builder = common::GHistBuilder(gmat.cut.Ptrs().back());
|
|
||||||
hist.Init(gmat.cut.Ptrs().back());
|
hist.Init(gmat.cut.Ptrs().back());
|
||||||
hist.AddHistRow(0);
|
hist.AddHistRow(0);
|
||||||
hist.AllocateAllData();
|
hist.AllocateAllData();
|
||||||
hist_builder.template BuildHist<false>(row_gpairs, row_set_collection[0],
|
common::BuildHist<false>(row_gpairs, row_set_collection[0], gmat, hist[0], force_read_by_column);
|
||||||
gmat, hist[0], force_read_by_column);
|
|
||||||
|
|
||||||
// Compute total gradient for all data points
|
// Compute total gradient for all data points
|
||||||
GradientPairPrecise total_gpair;
|
GradientPairPrecise total_gpair;
|
||||||
|
|||||||
@ -13,8 +13,7 @@
|
|||||||
#include "../../categorical_helpers.h"
|
#include "../../categorical_helpers.h"
|
||||||
#include "../../helpers.h"
|
#include "../../helpers.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost::tree {
|
||||||
namespace tree {
|
|
||||||
namespace {
|
namespace {
|
||||||
void InitRowPartitionForTest(common::RowSetCollection *row_set, size_t n_samples, size_t base_rowid = 0) {
|
void InitRowPartitionForTest(common::RowSetCollection *row_set, size_t n_samples, size_t base_rowid = 0) {
|
||||||
auto &row_indices = *row_set->Data();
|
auto &row_indices = *row_set->Data();
|
||||||
@ -487,5 +486,5 @@ TEST(CPUHistogram, ExternalMemory) {
|
|||||||
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false);
|
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false);
|
||||||
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true);
|
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true);
|
||||||
}
|
}
|
||||||
} // namespace tree
|
} // namespace xgboost::tree
|
||||||
} // namespace xgboost
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user