[breaking] Drop single precision histogram (#7892)

Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Jiaming Yuan 2022-05-13 19:54:55 +08:00 committed by GitHub
parent c8f9d4b6e6
commit 1b6538b4e5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 171 additions and 407 deletions

View File

@ -1,5 +1,5 @@
/*! /*!
* Copyright 2015-2019 by Contributors. * Copyright 2015-2022 by Contributors.
* \brief XGBoost Amalgamation. * \brief XGBoost Amalgamation.
* This offers an alternative way to compile the entire library from this single file. * This offers an alternative way to compile the entire library from this single file.
* *
@ -50,7 +50,6 @@
// trees // trees
#include "../src/tree/constraints.cc" #include "../src/tree/constraints.cc"
#include "../src/tree/hist/param.cc"
#include "../src/tree/param.cc" #include "../src/tree/param.cc"
#include "../src/tree/tree_model.cc" #include "../src/tree/tree_model.cc"
#include "../src/tree/tree_updater.cc" #include "../src/tree/tree_updater.cc"

View File

@ -34,34 +34,6 @@ Supported parameters
.. |tick| unicode:: U+2714 .. |tick| unicode:: U+2714
.. |cross| unicode:: U+2718 .. |cross| unicode:: U+2718
+--------------------------------+--------------+
| parameter | ``gpu_hist`` |
+================================+==============+
| ``subsample`` | |tick| |
+--------------------------------+--------------+
| ``sampling_method`` | |tick| |
+--------------------------------+--------------+
| ``colsample_bytree`` | |tick| |
+--------------------------------+--------------+
| ``colsample_bylevel`` | |tick| |
+--------------------------------+--------------+
| ``max_bin`` | |tick| |
+--------------------------------+--------------+
| ``gamma`` | |tick| |
+--------------------------------+--------------+
| ``gpu_id`` | |tick| |
+--------------------------------+--------------+
| ``predictor`` | |tick| |
+--------------------------------+--------------+
| ``grow_policy`` | |tick| |
+--------------------------------+--------------+
| ``monotone_constraints`` | |tick| |
+--------------------------------+--------------+
| ``interaction_constraints`` | |tick| |
+--------------------------------+--------------+
| ``single_precision_histogram`` | |cross| |
+--------------------------------+--------------+
GPU accelerated prediction is enabled by default for the above mentioned ``tree_method`` parameters but can be switched to CPU prediction by setting ``predictor`` to ``cpu_predictor``. This could be useful if you want to conserve GPU memory. Likewise when using CPU algorithms, GPU accelerated prediction can be enabled by setting ``predictor`` to ``gpu_predictor``. GPU accelerated prediction is enabled by default for the above mentioned ``tree_method`` parameters but can be switched to CPU prediction by setting ``predictor`` to ``cpu_predictor``. This could be useful if you want to conserve GPU memory. Likewise when using CPU algorithms, GPU accelerated prediction can be enabled by setting ``predictor`` to ``gpu_predictor``.
The device ordinal (which GPU to use if you have many of them) can be selected using the The device ordinal (which GPU to use if you have many of them) can be selected using the

View File

@ -238,10 +238,6 @@ Parameters for Tree Booster
Additional parameters for ``hist``, ``gpu_hist`` and ``approx`` tree method Additional parameters for ``hist``, ``gpu_hist`` and ``approx`` tree method
=========================================================================== ===========================================================================
* ``single_precision_histogram``, [default= ``false``]
- Use single precision to build histograms instead of double precision. Currently disabled for ``gpu_hist``.
* ``max_cat_to_onehot`` * ``max_cat_to_onehot``
.. versionadded:: 1.6 .. versionadded:: 1.6

View File

@ -171,8 +171,6 @@ Will print out something similar to (not actual output as it's too long for demo
"grow_gpu_hist": { "grow_gpu_hist": {
"gpu_hist_train_param": { "gpu_hist_train_param": {
"debug_synchronize": "0", "debug_synchronize": "0",
"gpu_batch_nrows": "0",
"single_precision_histogram": "0"
}, },
"train_param": { "train_param": {
"alpha": "0", "alpha": "0",

View File

@ -36,78 +36,51 @@ HistogramCuts::HistogramCuts() {
/*! /*!
* \brief fill a histogram by zeros in range [begin, end) * \brief fill a histogram by zeros in range [begin, end)
*/ */
template<typename GradientSumT> void InitilizeHistByZeroes(GHistRow hist, size_t begin, size_t end) {
void InitilizeHistByZeroes(GHistRow<GradientSumT> hist, size_t begin, size_t end) {
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1 #if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
std::fill(hist.begin() + begin, hist.begin() + end, std::fill(hist.begin() + begin, hist.begin() + end, xgboost::GradientPairPrecise());
xgboost::detail::GradientPairInternal<GradientSumT>());
#else // defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1 #else // defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
memset(hist.data() + begin, '\0', (end-begin)* memset(hist.data() + begin, '\0', (end - begin) * sizeof(xgboost::GradientPairPrecise));
sizeof(xgboost::detail::GradientPairInternal<GradientSumT>));
#endif // defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1 #endif // defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
} }
template void InitilizeHistByZeroes(GHistRow<float> hist, size_t begin,
size_t end);
template void InitilizeHistByZeroes(GHistRow<double> hist, size_t begin,
size_t end);
/*! /*!
* \brief Increment hist as dst += add in range [begin, end) * \brief Increment hist as dst += add in range [begin, end)
*/ */
template<typename GradientSumT> void IncrementHist(GHistRow dst, const GHistRow add, size_t begin, size_t end) {
void IncrementHist(GHistRow<GradientSumT> dst, const GHistRow<GradientSumT> add, double* pdst = reinterpret_cast<double*>(dst.data());
size_t begin, size_t end) { const double *padd = reinterpret_cast<const double *>(add.data());
GradientSumT* pdst = reinterpret_cast<GradientSumT*>(dst.data());
const GradientSumT* padd = reinterpret_cast<const GradientSumT*>(add.data());
for (size_t i = 2 * begin; i < 2 * end; ++i) { for (size_t i = 2 * begin; i < 2 * end; ++i) {
pdst[i] += padd[i]; pdst[i] += padd[i];
} }
} }
template void IncrementHist(GHistRow<float> dst, const GHistRow<float> add,
size_t begin, size_t end);
template void IncrementHist(GHistRow<double> dst, const GHistRow<double> add,
size_t begin, size_t end);
/*! /*!
* \brief Copy hist from src to dst in range [begin, end) * \brief Copy hist from src to dst in range [begin, end)
*/ */
template<typename GradientSumT> void CopyHist(GHistRow dst, const GHistRow src, size_t begin, size_t end) {
void CopyHist(GHistRow<GradientSumT> dst, const GHistRow<GradientSumT> src, double *pdst = reinterpret_cast<double *>(dst.data());
size_t begin, size_t end) { const double *psrc = reinterpret_cast<const double *>(src.data());
GradientSumT* pdst = reinterpret_cast<GradientSumT*>(dst.data());
const GradientSumT* psrc = reinterpret_cast<const GradientSumT*>(src.data());
for (size_t i = 2 * begin; i < 2 * end; ++i) { for (size_t i = 2 * begin; i < 2 * end; ++i) {
pdst[i] = psrc[i]; pdst[i] = psrc[i];
} }
} }
template void CopyHist(GHistRow<float> dst, const GHistRow<float> src,
size_t begin, size_t end);
template void CopyHist(GHistRow<double> dst, const GHistRow<double> src,
size_t begin, size_t end);
/*! /*!
* \brief Compute Subtraction: dst = src1 - src2 in range [begin, end) * \brief Compute Subtraction: dst = src1 - src2 in range [begin, end)
*/ */
template<typename GradientSumT> void SubtractionHist(GHistRow dst, const GHistRow src1, const GHistRow src2, size_t begin,
void SubtractionHist(GHistRow<GradientSumT> dst, const GHistRow<GradientSumT> src1, size_t end) {
const GHistRow<GradientSumT> src2, double* pdst = reinterpret_cast<double*>(dst.data());
size_t begin, size_t end) { const double* psrc1 = reinterpret_cast<const double*>(src1.data());
GradientSumT* pdst = reinterpret_cast<GradientSumT*>(dst.data()); const double* psrc2 = reinterpret_cast<const double*>(src2.data());
const GradientSumT* psrc1 = reinterpret_cast<const GradientSumT*>(src1.data());
const GradientSumT* psrc2 = reinterpret_cast<const GradientSumT*>(src2.data());
for (size_t i = 2 * begin; i < 2 * end; ++i) { for (size_t i = 2 * begin; i < 2 * end; ++i) {
pdst[i] = psrc1[i] - psrc2[i]; pdst[i] = psrc1[i] - psrc2[i];
} }
} }
template void SubtractionHist(GHistRow<float> dst, const GHistRow<float> src1,
const GHistRow<float> src2,
size_t begin, size_t end);
template void SubtractionHist(GHistRow<double> dst, const GHistRow<double> src1,
const GHistRow<double> src2,
size_t begin, size_t end);
struct Prefetch { struct Prefetch {
public: public:
@ -132,11 +105,10 @@ struct Prefetch {
constexpr size_t Prefetch::kNoPrefetchSize; constexpr size_t Prefetch::kNoPrefetchSize;
template <typename FPType, bool do_prefetch, typename BinIdxType, template <bool do_prefetch, typename BinIdxType, bool first_page, bool any_missing = true>
bool first_page, bool any_missing = true>
void BuildHistKernel(const std::vector<GradientPair> &gpair, void BuildHistKernel(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices, const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
const GHistIndexMatrix &gmat, GHistRow<FPType> hist) { GHistRow hist) {
const size_t size = row_indices.Size(); const size_t size = row_indices.Size();
const size_t *rid = row_indices.begin; const size_t *rid = row_indices.begin;
auto const *pgh = reinterpret_cast<const float *>(gpair.data()); auto const *pgh = reinterpret_cast<const float *>(gpair.data());
@ -154,7 +126,7 @@ void BuildHistKernel(const std::vector<GradientPair> &gpair,
const size_t n_features = const size_t n_features =
get_row_ptr(row_indices.begin[0] + 1) - get_row_ptr(row_indices.begin[0]); get_row_ptr(row_indices.begin[0] + 1) - get_row_ptr(row_indices.begin[0]);
auto hist_data = reinterpret_cast<FPType *>(hist.data()); auto hist_data = reinterpret_cast<double *>(hist.data());
const uint32_t two{2}; // Each element from 'gpair' and 'hist' contains const uint32_t two{2}; // Each element from 'gpair' and 'hist' contains
// 2 FP values: gradient and hessian. // 2 FP values: gradient and hessian.
// So we need to multiply each row-index/bin-index by 2 // So we need to multiply each row-index/bin-index by 2
@ -195,24 +167,21 @@ void BuildHistKernel(const std::vector<GradientPair> &gpair,
} }
} }
template <typename FPType, bool do_prefetch, bool any_missing> template <bool do_prefetch, bool any_missing>
void BuildHistDispatch(const std::vector<GradientPair> &gpair, void BuildHistDispatch(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices, const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
const GHistIndexMatrix &gmat, GHistRow<FPType> hist) { GHistRow hist) {
auto first_page = gmat.base_rowid == 0; auto first_page = gmat.base_rowid == 0;
if (first_page) { if (first_page) {
switch (gmat.index.GetBinTypeSize()) { switch (gmat.index.GetBinTypeSize()) {
case kUint8BinsTypeSize: case kUint8BinsTypeSize:
BuildHistKernel<FPType, do_prefetch, uint8_t, true, any_missing>( BuildHistKernel<do_prefetch, uint8_t, true, any_missing>(gpair, row_indices, gmat, hist);
gpair, row_indices, gmat, hist);
break; break;
case kUint16BinsTypeSize: case kUint16BinsTypeSize:
BuildHistKernel<FPType, do_prefetch, uint16_t, true, any_missing>( BuildHistKernel<do_prefetch, uint16_t, true, any_missing>(gpair, row_indices, gmat, hist);
gpair, row_indices, gmat, hist);
break; break;
case kUint32BinsTypeSize: case kUint32BinsTypeSize:
BuildHistKernel<FPType, do_prefetch, uint32_t, true, any_missing>( BuildHistKernel<do_prefetch, uint32_t, true, any_missing>(gpair, row_indices, gmat, hist);
gpair, row_indices, gmat, hist);
break; break;
default: default:
CHECK(false); // no default behavior CHECK(false); // no default behavior
@ -220,16 +189,13 @@ void BuildHistDispatch(const std::vector<GradientPair> &gpair,
} else { } else {
switch (gmat.index.GetBinTypeSize()) { switch (gmat.index.GetBinTypeSize()) {
case kUint8BinsTypeSize: case kUint8BinsTypeSize:
BuildHistKernel<FPType, do_prefetch, uint8_t, false, any_missing>( BuildHistKernel<do_prefetch, uint8_t, false, any_missing>(gpair, row_indices, gmat, hist);
gpair, row_indices, gmat, hist);
break; break;
case kUint16BinsTypeSize: case kUint16BinsTypeSize:
BuildHistKernel<FPType, do_prefetch, uint16_t, false, any_missing>( BuildHistKernel<do_prefetch, uint16_t, false, any_missing>(gpair, row_indices, gmat, hist);
gpair, row_indices, gmat, hist);
break; break;
case kUint32BinsTypeSize: case kUint32BinsTypeSize:
BuildHistKernel<FPType, do_prefetch, uint32_t, false, any_missing>( BuildHistKernel<do_prefetch, uint32_t, false, any_missing>(gpair, row_indices, gmat, hist);
gpair, row_indices, gmat, hist);
break; break;
default: default:
CHECK(false); // no default behavior CHECK(false); // no default behavior
@ -237,12 +203,10 @@ void BuildHistDispatch(const std::vector<GradientPair> &gpair,
} }
} }
template <typename GradientSumT>
template <bool any_missing> template <bool any_missing>
void GHistBuilder<GradientSumT>::BuildHist( void GHistBuilder::BuildHist(const std::vector<GradientPair> &gpair,
const std::vector<GradientPair> &gpair, const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat, GHistRow hist) const {
GHistRowT hist) const {
const size_t nrows = row_indices.Size(); const size_t nrows = row_indices.Size();
const size_t no_prefetch_size = Prefetch::NoPrefetchSize(nrows); const size_t no_prefetch_size = Prefetch::NoPrefetchSize(nrows);
@ -252,7 +216,7 @@ void GHistBuilder<GradientSumT>::BuildHist(
if (contiguousBlock) { if (contiguousBlock) {
// contiguous memory access, built-in HW prefetching is enough // contiguous memory access, built-in HW prefetching is enough
BuildHistDispatch<GradientSumT, false, any_missing>(gpair, row_indices, BuildHistDispatch<false, any_missing>(gpair, row_indices,
gmat, hist); gmat, hist);
} else { } else {
const RowSetCollection::Elem span1(row_indices.begin, const RowSetCollection::Elem span1(row_indices.begin,
@ -260,33 +224,18 @@ void GHistBuilder<GradientSumT>::BuildHist(
const RowSetCollection::Elem span2(row_indices.end - no_prefetch_size, const RowSetCollection::Elem span2(row_indices.end - no_prefetch_size,
row_indices.end); row_indices.end);
BuildHistDispatch<GradientSumT, true, any_missing>(gpair, span1, gmat, BuildHistDispatch<true, any_missing>(gpair, span1, gmat, hist);
hist);
// no prefetching to avoid loading extra memory // no prefetching to avoid loading extra memory
BuildHistDispatch<GradientSumT, false, any_missing>(gpair, span2, gmat, BuildHistDispatch<false, any_missing>(gpair, span2, gmat, hist);
hist);
} }
} }
template void template void GHistBuilder::BuildHist<true>(const std::vector<GradientPair> &gpair,
GHistBuilder<float>::BuildHist<true>(const std::vector<GradientPair> &gpair, const RowSetCollection::Elem row_indices,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat, GHistRow hist) const;
const GHistIndexMatrix &gmat,
GHistRow<float> hist) const; template void GHistBuilder::BuildHist<false>(const std::vector<GradientPair> &gpair,
template void const RowSetCollection::Elem row_indices,
GHistBuilder<float>::BuildHist<false>(const std::vector<GradientPair> &gpair, const GHistIndexMatrix &gmat, GHistRow hist) const;
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat,
GHistRow<float> hist) const;
template void
GHistBuilder<double>::BuildHist<true>(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat,
GHistRow<double> hist) const;
template void
GHistBuilder<double>::BuildHist<false>(const std::vector<GradientPair> &gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix &gmat,
GHistRow<double> hist) const;
} // namespace common } // namespace common
} // namespace xgboost } // namespace xgboost

View File

@ -322,56 +322,44 @@ bst_bin_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(size_t begin, size_t end,
return -1; return -1;
} }
template<typename GradientSumT> using GHistRow = Span<xgboost::GradientPairPrecise>;
using GHistRow = Span<xgboost::detail::GradientPairInternal<GradientSumT> >;
/*! /*!
* \brief fill a histogram by zeros * \brief fill a histogram by zeros
*/ */
template<typename GradientSumT> void InitilizeHistByZeroes(GHistRow hist, size_t begin, size_t end);
void InitilizeHistByZeroes(GHistRow<GradientSumT> hist, size_t begin, size_t end);
/*! /*!
* \brief Increment hist as dst += add in range [begin, end) * \brief Increment hist as dst += add in range [begin, end)
*/ */
template<typename GradientSumT> void IncrementHist(GHistRow dst, const GHistRow add, size_t begin, size_t end);
void IncrementHist(GHistRow<GradientSumT> dst, const GHistRow<GradientSumT> add,
size_t begin, size_t end);
/*! /*!
* \brief Copy hist from src to dst in range [begin, end) * \brief Copy hist from src to dst in range [begin, end)
*/ */
template<typename GradientSumT> void CopyHist(GHistRow dst, const GHistRow src, size_t begin, size_t end);
void CopyHist(GHistRow<GradientSumT> dst, const GHistRow<GradientSumT> src,
size_t begin, size_t end);
/*! /*!
* \brief Compute Subtraction: dst = src1 - src2 in range [begin, end) * \brief Compute Subtraction: dst = src1 - src2 in range [begin, end)
*/ */
template<typename GradientSumT> void SubtractionHist(GHistRow dst, const GHistRow src1, const GHistRow src2, size_t begin,
void SubtractionHist(GHistRow<GradientSumT> dst, const GHistRow<GradientSumT> src1, size_t end);
const GHistRow<GradientSumT> src2,
size_t begin, size_t end);
/*! /*!
* \brief histogram of gradient statistics for multiple nodes * \brief histogram of gradient statistics for multiple nodes
*/ */
template<typename GradientSumT>
class HistCollection { class HistCollection {
public: public:
using GHistRowT = GHistRow<GradientSumT>;
using GradientPairT = xgboost::detail::GradientPairInternal<GradientSumT>;
// access histogram for i-th node // access histogram for i-th node
GHistRowT operator[](bst_uint nid) const { GHistRow operator[](bst_uint nid) const {
constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max(); constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
const size_t id = row_ptr_.at(nid); const size_t id = row_ptr_.at(nid);
CHECK_NE(id, kMax); CHECK_NE(id, kMax);
GradientPairT* ptr = nullptr; GradientPairPrecise* ptr = nullptr;
if (contiguous_allocation_) { if (contiguous_allocation_) {
ptr = const_cast<GradientPairT*>(data_[0].data() + nbins_*id); ptr = const_cast<GradientPairPrecise*>(data_[0].data() + nbins_*id);
} else { } else {
ptr = const_cast<GradientPairT*>(data_[id].data()); ptr = const_cast<GradientPairPrecise*>(data_[id].data());
} }
return {ptr, nbins_}; return {ptr, nbins_};
} }
@ -431,7 +419,7 @@ class HistCollection {
/*! \brief flag to identify contiguous memory allocation */ /*! \brief flag to identify contiguous memory allocation */
bool contiguous_allocation_ = false; bool contiguous_allocation_ = false;
std::vector<std::vector<GradientPairT>> data_; std::vector<std::vector<GradientPairPrecise>> data_;
/*! \brief row_ptr_[nid] locates bin for histogram of node nid */ /*! \brief row_ptr_[nid] locates bin for histogram of node nid */
std::vector<size_t> row_ptr_; std::vector<size_t> row_ptr_;
@ -442,11 +430,8 @@ class HistCollection {
* Supports processing multiple tree-nodes for nested parallelism * Supports processing multiple tree-nodes for nested parallelism
* Able to reduce histograms across threads in efficient way * Able to reduce histograms across threads in efficient way
*/ */
template<typename GradientSumT>
class ParallelGHistBuilder { class ParallelGHistBuilder {
public: public:
using GHistRowT = GHistRow<GradientSumT>;
void Init(size_t nbins) { void Init(size_t nbins) {
if (nbins != nbins_) { if (nbins != nbins_) {
hist_buffer_.Init(nbins); hist_buffer_.Init(nbins);
@ -457,7 +442,7 @@ class ParallelGHistBuilder {
// Add new elements if needed, mark all hists as unused // Add new elements if needed, mark all hists as unused
// targeted_hists - already allocated hists which should contain final results after Reduce() call // targeted_hists - already allocated hists which should contain final results after Reduce() call
void Reset(size_t nthreads, size_t nodes, const BlockedSpace2d& space, void Reset(size_t nthreads, size_t nodes, const BlockedSpace2d& space,
const std::vector<GHistRowT>& targeted_hists) { const std::vector<GHistRow>& targeted_hists) {
hist_buffer_.Init(nbins_); hist_buffer_.Init(nbins_);
tid_nid_to_hist_.clear(); tid_nid_to_hist_.clear();
threads_to_nids_map_.clear(); threads_to_nids_map_.clear();
@ -478,7 +463,7 @@ class ParallelGHistBuilder {
} }
// Get specified hist, initialize hist by zeros if it wasn't used before // Get specified hist, initialize hist by zeros if it wasn't used before
GHistRowT GetInitializedHist(size_t tid, size_t nid) { GHistRow GetInitializedHist(size_t tid, size_t nid) {
CHECK_LT(nid, nodes_); CHECK_LT(nid, nodes_);
CHECK_LT(tid, nthreads_); CHECK_LT(tid, nthreads_);
@ -486,7 +471,7 @@ class ParallelGHistBuilder {
if (idx >= 0) { if (idx >= 0) {
hist_buffer_.AllocateData(idx); hist_buffer_.AllocateData(idx);
} }
GHistRowT hist = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx]; GHistRow hist = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];
if (!hist_was_used_[tid * nodes_ + nid]) { if (!hist_was_used_[tid * nodes_ + nid]) {
InitilizeHistByZeroes(hist, 0, hist.size()); InitilizeHistByZeroes(hist, 0, hist.size());
@ -501,7 +486,7 @@ class ParallelGHistBuilder {
CHECK_GT(end, begin); CHECK_GT(end, begin);
CHECK_LT(nid, nodes_); CHECK_LT(nid, nodes_);
GHistRowT dst = targeted_hists_[nid]; GHistRow dst = targeted_hists_[nid];
bool is_updated = false; bool is_updated = false;
for (size_t tid = 0; tid < nthreads_; ++tid) { for (size_t tid = 0; tid < nthreads_; ++tid) {
@ -509,7 +494,7 @@ class ParallelGHistBuilder {
is_updated = true; is_updated = true;
int idx = tid_nid_to_hist_.at({tid, nid}); int idx = tid_nid_to_hist_.at({tid, nid});
GHistRowT src = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx]; GHistRow src = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];
if (dst.data() != src.data()) { if (dst.data() != src.data()) {
IncrementHist(dst, src, begin, end); IncrementHist(dst, src, begin, end);
@ -595,7 +580,7 @@ class ParallelGHistBuilder {
/*! \brief number of nodes which will be processed in parallel */ /*! \brief number of nodes which will be processed in parallel */
size_t nodes_ = 0; size_t nodes_ = 0;
/*! \brief Buffer for additional histograms for Parallel processing */ /*! \brief Buffer for additional histograms for Parallel processing */
HistCollection<GradientSumT> hist_buffer_; HistCollection hist_buffer_;
/*! /*!
* \brief Marks which hists were used, it means that they should be merged. * \brief Marks which hists were used, it means that they should be merged.
* Contains only {true or false} values * Contains only {true or false} values
@ -606,7 +591,7 @@ class ParallelGHistBuilder {
/*! \brief Buffer for additional histograms for Parallel processing */ /*! \brief Buffer for additional histograms for Parallel processing */
std::vector<bool> threads_to_nids_map_; std::vector<bool> threads_to_nids_map_;
/*! \brief Contains histograms for final results */ /*! \brief Contains histograms for final results */
std::vector<GHistRowT> targeted_hists_; std::vector<GHistRow> targeted_hists_;
/*! /*!
* \brief map pair {tid, nid} to index of allocated histogram from hist_buffer_ and targeted_hists_, * \brief map pair {tid, nid} to index of allocated histogram from hist_buffer_ and targeted_hists_,
* -1 is reserved for targeted_hists_ * -1 is reserved for targeted_hists_
@ -617,19 +602,15 @@ class ParallelGHistBuilder {
/*! /*!
* \brief builder for histograms of gradient statistics * \brief builder for histograms of gradient statistics
*/ */
template<typename GradientSumT>
class GHistBuilder { class GHistBuilder {
public: public:
using GHistRowT = GHistRow<GradientSumT>;
GHistBuilder() = default; GHistBuilder() = default;
explicit GHistBuilder(uint32_t nbins): nbins_{nbins} {} explicit GHistBuilder(uint32_t nbins): nbins_{nbins} {}
// construct a histogram via histogram aggregation // construct a histogram via histogram aggregation
template <bool any_missing> template <bool any_missing>
void BuildHist(const std::vector<GradientPair> &gpair, void BuildHist(const std::vector<GradientPair>& gpair, const RowSetCollection::Elem row_indices,
const RowSetCollection::Elem row_indices, const GHistIndexMatrix& gmat, GHistRow hist) const;
const GHistIndexMatrix &gmat, GHistRowT hist) const;
uint32_t GetNumBins() const { uint32_t GetNumBins() const {
return nbins_; return nbins_;
} }

View File

@ -22,7 +22,8 @@
namespace xgboost { namespace xgboost {
namespace tree { namespace tree {
template <typename GradientSumT, typename ExpandEntry> class HistEvaluator { template <typename ExpandEntry>
class HistEvaluator {
private: private:
struct NodeEntry { struct NodeEntry {
/*! \brief statics for node entry */ /*! \brief statics for node entry */
@ -57,7 +58,7 @@ template <typename GradientSumT, typename ExpandEntry> class HistEvaluator {
// a non-missing value for the particular feature fid. // a non-missing value for the particular feature fid.
template <int d_step, SplitType split_type> template <int d_step, SplitType split_type>
GradStats EnumerateSplit(common::HistogramCuts const &cut, common::Span<size_t const> sorted_idx, GradStats EnumerateSplit(common::HistogramCuts const &cut, common::Span<size_t const> sorted_idx,
const common::GHistRow<GradientSumT> &hist, bst_feature_t fidx, const common::GHistRow &hist, bst_feature_t fidx,
bst_node_t nidx, bst_node_t nidx,
TreeEvaluator::SplitEvaluator<TrainParam> const &evaluator, TreeEvaluator::SplitEvaluator<TrainParam> const &evaluator,
SplitEntry *p_best) const { SplitEntry *p_best) const {
@ -197,10 +198,8 @@ template <typename GradientSumT, typename ExpandEntry> class HistEvaluator {
} }
public: public:
void EvaluateSplits(const common::HistCollection<GradientSumT> &hist, void EvaluateSplits(const common::HistCollection &hist, common::HistogramCuts const &cut,
common::HistogramCuts const &cut, common::Span<FeatureType const> feature_types, const RegTree &tree,
common::Span<FeatureType const> feature_types,
const RegTree &tree,
std::vector<ExpandEntry> *p_entries) { std::vector<ExpandEntry> *p_entries) {
auto& entries = *p_entries; auto& entries = *p_entries;
// All nodes are on the same level, so we can store the shared ptr. // All nodes are on the same level, so we can store the shared ptr.
@ -377,10 +376,10 @@ template <typename GradientSumT, typename ExpandEntry> class HistEvaluator {
* *
* \param p_last_tree The last tree being updated by tree updater * \param p_last_tree The last tree being updated by tree updater
*/ */
template <typename Partitioner, typename GradientSumT, typename ExpandEntry> template <typename Partitioner, typename ExpandEntry>
void UpdatePredictionCacheImpl(GenericParameter const *ctx, RegTree const *p_last_tree, void UpdatePredictionCacheImpl(GenericParameter const *ctx, RegTree const *p_last_tree,
std::vector<Partitioner> const &partitioner, std::vector<Partitioner> const &partitioner,
HistEvaluator<GradientSumT, ExpandEntry> const &hist_evaluator, HistEvaluator<ExpandEntry> const &hist_evaluator,
TrainParam const &param, linalg::VectorView<float> out_preds) { TrainParam const &param, linalg::VectorView<float> out_preds) {
CHECK_GT(out_preds.Size(), 0U); CHECK_GT(out_preds.Size(), 0U);

View File

@ -16,17 +16,15 @@
namespace xgboost { namespace xgboost {
namespace tree { namespace tree {
template <typename GradientSumT, typename ExpandEntry> class HistogramBuilder { template <typename ExpandEntry>
using GradientPairT = xgboost::detail::GradientPairInternal<GradientSumT>; class HistogramBuilder {
using GHistRowT = common::GHistRow<GradientSumT>;
/*! \brief culmulative histogram of gradients. */ /*! \brief culmulative histogram of gradients. */
common::HistCollection<GradientSumT> hist_; common::HistCollection hist_;
/*! \brief culmulative local parent histogram of gradients. */ /*! \brief culmulative local parent histogram of gradients. */
common::HistCollection<GradientSumT> hist_local_worker_; common::HistCollection hist_local_worker_;
common::GHistBuilder<GradientSumT> builder_; common::GHistBuilder builder_;
common::ParallelGHistBuilder<GradientSumT> buffer_; common::ParallelGHistBuilder buffer_;
rabit::Reducer<GradientPairT, GradientPairT::Reduce> reducer_; rabit::Reducer<GradientPairPrecise, GradientPairPrecise::Reduce> reducer_;
BatchParam param_; BatchParam param_;
int32_t n_threads_{-1}; int32_t n_threads_{-1};
size_t n_batches_{0}; size_t n_batches_{0};
@ -51,8 +49,10 @@ template <typename GradientSumT, typename ExpandEntry> class HistogramBuilder {
hist_.Init(total_bins); hist_.Init(total_bins);
hist_local_worker_.Init(total_bins); hist_local_worker_.Init(total_bins);
buffer_.Init(total_bins); buffer_.Init(total_bins);
builder_ = common::GHistBuilder<GradientSumT>(total_bins); builder_ = common::GHistBuilder(total_bins);
is_distributed_ = is_distributed; is_distributed_ = is_distributed;
// Workaround s390x gcc 7.5.0
auto DMLC_ATTRIBUTE_UNUSED __force_instantiation = &GradientPairPrecise::Reduce;
} }
template <bool any_missing> template <bool any_missing>
@ -64,7 +64,7 @@ template <typename GradientSumT, typename ExpandEntry> class HistogramBuilder {
const size_t n_nodes = nodes_for_explicit_hist_build.size(); const size_t n_nodes = nodes_for_explicit_hist_build.size();
CHECK_GT(n_nodes, 0); CHECK_GT(n_nodes, 0);
std::vector<GHistRowT> target_hists(n_nodes); std::vector<common::GHistRow> target_hists(n_nodes);
for (size_t i = 0; i < n_nodes; ++i) { for (size_t i = 0; i < n_nodes; ++i) {
const int32_t nid = nodes_for_explicit_hist_build[i].nid; const int32_t nid = nodes_for_explicit_hist_build[i].nid;
target_hists[i] = hist_[nid]; target_hists[i] = hist_[nid];
@ -243,9 +243,7 @@ template <typename GradientSumT, typename ExpandEntry> class HistogramBuilder {
public: public:
/* Getters for tests. */ /* Getters for tests. */
common::HistCollection<GradientSumT> const& Histogram() { common::HistCollection const &Histogram() { return hist_; }
return hist_;
}
auto& Buffer() { return buffer_; } auto& Buffer() { return buffer_; }
private: private:

View File

@ -1,10 +0,0 @@
/*!
* Copyright 2022 XGBoost contributors
*/
#include "param.h"
namespace xgboost {
namespace tree {
DMLC_REGISTER_PARAMETER(CPUHistMakerTrainParam);
} // namespace tree
} // namespace xgboost

View File

@ -1,23 +0,0 @@
/*!
* Copyright 2021 XGBoost contributors
*/
#ifndef XGBOOST_TREE_HIST_PARAM_H_
#define XGBOOST_TREE_HIST_PARAM_H_
#include "xgboost/parameter.h"
namespace xgboost {
namespace tree {
// training parameters specific to this algorithm
struct CPUHistMakerTrainParam
: public XGBoostParameter<CPUHistMakerTrainParam> {
bool single_precision_histogram;
// declare parameters
DMLC_DECLARE_PARAMETER(CPUHistMakerTrainParam) {
DMLC_DECLARE_FIELD(single_precision_histogram).set_default(false).describe(
"Use single precision to build histograms.");
}
};
} // namespace tree
} // namespace xgboost
#endif // XGBOOST_TREE_HIST_PARAM_H_

View File

@ -15,7 +15,6 @@
#include "driver.h" #include "driver.h"
#include "hist/evaluate_splits.h" #include "hist/evaluate_splits.h"
#include "hist/histogram.h" #include "hist/histogram.h"
#include "hist/param.h"
#include "param.h" #include "param.h"
#include "xgboost/base.h" #include "xgboost/base.h"
#include "xgboost/json.h" #include "xgboost/json.h"
@ -38,13 +37,12 @@ auto BatchSpec(TrainParam const &p, common::Span<float> hess) {
} }
} // anonymous namespace } // anonymous namespace
template <typename GradientSumT>
class GloablApproxBuilder { class GloablApproxBuilder {
protected: protected:
TrainParam param_; TrainParam param_;
std::shared_ptr<common::ColumnSampler> col_sampler_; std::shared_ptr<common::ColumnSampler> col_sampler_;
HistEvaluator<GradientSumT, CPUExpandEntry> evaluator_; HistEvaluator<CPUExpandEntry> evaluator_;
HistogramBuilder<GradientSumT, CPUExpandEntry> histogram_builder_; HistogramBuilder<CPUExpandEntry> histogram_builder_;
Context const *ctx_; Context const *ctx_;
ObjInfo const task_; ObjInfo const task_;
@ -166,7 +164,7 @@ class GloablApproxBuilder {
} }
public: public:
explicit GloablApproxBuilder(TrainParam param, MetaInfo const &info, GenericParameter const *ctx, explicit GloablApproxBuilder(TrainParam param, MetaInfo const &info, Context const *ctx,
std::shared_ptr<common::ColumnSampler> column_sampler, ObjInfo task, std::shared_ptr<common::ColumnSampler> column_sampler, ObjInfo task,
common::Monitor *monitor) common::Monitor *monitor)
: param_{std::move(param)}, : param_{std::move(param)},
@ -256,10 +254,8 @@ class GloablApproxBuilder {
class GlobalApproxUpdater : public TreeUpdater { class GlobalApproxUpdater : public TreeUpdater {
TrainParam param_; TrainParam param_;
common::Monitor monitor_; common::Monitor monitor_;
CPUHistMakerTrainParam hist_param_;
// specializations for different histogram precision. // specializations for different histogram precision.
std::unique_ptr<GloablApproxBuilder<float>> f32_impl_; std::unique_ptr<GloablApproxBuilder> pimpl_;
std::unique_ptr<GloablApproxBuilder<double>> f64_impl_;
// pointer to the last DMatrix, used for update prediction cache. // pointer to the last DMatrix, used for update prediction cache.
DMatrix *cached_{nullptr}; DMatrix *cached_{nullptr};
std::shared_ptr<common::ColumnSampler> column_sampler_ = std::shared_ptr<common::ColumnSampler> column_sampler_ =
@ -272,19 +268,14 @@ class GlobalApproxUpdater : public TreeUpdater {
monitor_.Init(__func__); monitor_.Init(__func__);
} }
void Configure(const Args &args) override { void Configure(const Args &args) override { param_.UpdateAllowUnknown(args); }
param_.UpdateAllowUnknown(args);
hist_param_.UpdateAllowUnknown(args);
}
void LoadConfig(Json const &in) override { void LoadConfig(Json const &in) override {
auto const &config = get<Object const>(in); auto const &config = get<Object const>(in);
FromJson(config.at("train_param"), &this->param_); FromJson(config.at("train_param"), &this->param_);
FromJson(config.at("hist_param"), &this->hist_param_);
} }
void SaveConfig(Json *p_out) const override { void SaveConfig(Json *p_out) const override {
auto &out = *p_out; auto &out = *p_out;
out["train_param"] = ToJson(param_); out["train_param"] = ToJson(param_);
out["hist_param"] = ToJson(hist_param_);
} }
void InitData(TrainParam const &param, HostDeviceVector<GradientPair> const *gpair, void InitData(TrainParam const &param, HostDeviceVector<GradientPair> const *gpair,
@ -316,13 +307,8 @@ class GlobalApproxUpdater : public TreeUpdater {
float lr = param_.learning_rate; float lr = param_.learning_rate;
param_.learning_rate = lr / trees.size(); param_.learning_rate = lr / trees.size();
if (hist_param_.single_precision_histogram) { pimpl_ = std::make_unique<GloablApproxBuilder>(param_, m->Info(), ctx_, column_sampler_, task_,
f32_impl_ = std::make_unique<GloablApproxBuilder<float>>(param_, m->Info(), ctx_, &monitor_);
column_sampler_, task_, &monitor_);
} else {
f64_impl_ = std::make_unique<GloablApproxBuilder<double>>(param_, m->Info(), ctx_,
column_sampler_, task_, &monitor_);
}
std::vector<GradientPair> h_gpair; std::vector<GradientPair> h_gpair;
InitData(param_, gpair, &h_gpair); InitData(param_, gpair, &h_gpair);
@ -335,26 +321,17 @@ class GlobalApproxUpdater : public TreeUpdater {
size_t t_idx = 0; size_t t_idx = 0;
for (auto p_tree : trees) { for (auto p_tree : trees) {
if (hist_param_.single_precision_histogram) { this->pimpl_->UpdateTree(m, h_gpair, hess, p_tree, &out_position[t_idx]);
this->f32_impl_->UpdateTree(m, h_gpair, hess, p_tree, &out_position[t_idx]);
} else {
this->f64_impl_->UpdateTree(m, h_gpair, hess, p_tree, &out_position[t_idx]);
}
++t_idx; ++t_idx;
} }
param_.learning_rate = lr; param_.learning_rate = lr;
} }
bool UpdatePredictionCache(const DMatrix *data, linalg::VectorView<float> out_preds) override { bool UpdatePredictionCache(const DMatrix *data, linalg::VectorView<float> out_preds) override {
if (data != cached_ || (!this->f32_impl_ && !this->f64_impl_)) { if (data != cached_ || !pimpl_) {
return false; return false;
} }
this->pimpl_->UpdatePredictionCache(data, out_preds);
if (hist_param_.single_precision_histogram) {
this->f32_impl_->UpdatePredictionCache(data, out_preds);
} else {
this->f64_impl_->UpdatePredictionCache(data, out_preds);
}
return true; return true;
} }

View File

@ -16,7 +16,6 @@
#include "driver.h" #include "driver.h"
#include "hist/evaluate_splits.h" #include "hist/evaluate_splits.h"
#include "hist/expand_entry.h" #include "hist/expand_entry.h"
#include "hist/param.h"
#include "param.h" #include "param.h"
#include "xgboost/generic_parameters.h" #include "xgboost/generic_parameters.h"
#include "xgboost/json.h" #include "xgboost/json.h"

View File

@ -32,7 +32,6 @@ DMLC_REGISTRY_FILE_TAG(updater_quantile_hist);
void QuantileHistMaker::Configure(const Args &args) { void QuantileHistMaker::Configure(const Args &args) {
param_.UpdateAllowUnknown(args); param_.UpdateAllowUnknown(args);
hist_maker_param_.UpdateAllowUnknown(args);
} }
void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair, DMatrix *dmat, void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair, DMatrix *dmat,
@ -44,24 +43,14 @@ void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair, DMatrix *d
// build tree // build tree
const size_t n_trees = trees.size(); const size_t n_trees = trees.size();
if (hist_maker_param_.single_precision_histogram) { if (!pimpl_) {
if (!float_builder_) { pimpl_.reset(new Builder(n_trees, param_, dmat, task_, ctx_));
float_builder_.reset(new Builder<float>(n_trees, param_, dmat, task_, ctx_));
}
} else {
if (!double_builder_) {
double_builder_.reset(new Builder<double>(n_trees, param_, dmat, task_, ctx_));
}
} }
size_t t_idx{0}; size_t t_idx{0};
for (auto p_tree : trees) { for (auto p_tree : trees) {
auto &t_row_position = out_position[t_idx]; auto &t_row_position = out_position[t_idx];
if (hist_maker_param_.single_precision_histogram) { this->pimpl_->UpdateTree(gpair, dmat, p_tree, &t_row_position);
this->float_builder_->UpdateTree(gpair, dmat, p_tree, &t_row_position);
} else {
this->double_builder_->UpdateTree(gpair, dmat, p_tree, &t_row_position);
}
++t_idx; ++t_idx;
} }
@ -70,17 +59,14 @@ void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair, DMatrix *d
bool QuantileHistMaker::UpdatePredictionCache(const DMatrix *data, bool QuantileHistMaker::UpdatePredictionCache(const DMatrix *data,
linalg::VectorView<float> out_preds) { linalg::VectorView<float> out_preds) {
if (hist_maker_param_.single_precision_histogram && float_builder_) { if (pimpl_) {
return float_builder_->UpdatePredictionCache(data, out_preds); return pimpl_->UpdatePredictionCache(data, out_preds);
} else if (double_builder_) {
return double_builder_->UpdatePredictionCache(data, out_preds);
} else { } else {
return false; return false;
} }
} }
template <typename GradientSumT> CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
CPUExpandEntry QuantileHistMaker::Builder<GradientSumT>::InitRoot(
DMatrix *p_fmat, RegTree *p_tree, const std::vector<GradientPair> &gpair_h) { DMatrix *p_fmat, RegTree *p_tree, const std::vector<GradientPair> &gpair_h) {
CPUExpandEntry node(RegTree::kRoot, p_tree->GetDepth(0), 0.0f); CPUExpandEntry node(RegTree::kRoot, p_tree->GetDepth(0), 0.0f);
@ -96,7 +82,7 @@ CPUExpandEntry QuantileHistMaker::Builder<GradientSumT>::InitRoot(
} }
{ {
GradientPairT grad_stat; GradientPairPrecise grad_stat;
if (p_fmat->IsDense()) { if (p_fmat->IsDense()) {
/** /**
* Specialized code for dense data: For dense data (with no missing value), the sum * Specialized code for dense data: For dense data (with no missing value), the sum
@ -110,15 +96,14 @@ CPUExpandEntry QuantileHistMaker::Builder<GradientSumT>::InitRoot(
auto hist = this->histogram_builder_->Histogram()[RegTree::kRoot]; auto hist = this->histogram_builder_->Histogram()[RegTree::kRoot];
auto begin = hist.data(); auto begin = hist.data();
for (uint32_t i = ibegin; i < iend; ++i) { for (uint32_t i = ibegin; i < iend; ++i) {
GradientPairT const &et = begin[i]; GradientPairPrecise const &et = begin[i];
grad_stat.Add(et.GetGrad(), et.GetHess()); grad_stat.Add(et.GetGrad(), et.GetHess());
} }
} else { } else {
for (auto const &grad : gpair_h) { for (auto const &grad : gpair_h) {
grad_stat.Add(grad.GetGrad(), grad.GetHess()); grad_stat.Add(grad.GetGrad(), grad.GetHess());
} }
rabit::Allreduce<rabit::op::Sum, GradientSumT>(reinterpret_cast<GradientSumT *>(&grad_stat), rabit::Allreduce<rabit::op::Sum, double>(reinterpret_cast<double *>(&grad_stat), 2);
2);
} }
auto weight = evaluator_->InitRoot(GradStats{grad_stat}); auto weight = evaluator_->InitRoot(GradStats{grad_stat});
@ -140,10 +125,9 @@ CPUExpandEntry QuantileHistMaker::Builder<GradientSumT>::InitRoot(
return node; return node;
} }
template <typename GradientSumT> void QuantileHistMaker::Builder::BuildHistogram(DMatrix *p_fmat, RegTree *p_tree,
void QuantileHistMaker::Builder<GradientSumT>::BuildHistogram( std::vector<CPUExpandEntry> const &valid_candidates,
DMatrix *p_fmat, RegTree *p_tree, std::vector<CPUExpandEntry> const &valid_candidates, std::vector<GradientPair> const &gpair) {
std::vector<GradientPair> const &gpair) {
std::vector<CPUExpandEntry> nodes_to_build(valid_candidates.size()); std::vector<CPUExpandEntry> nodes_to_build(valid_candidates.size());
std::vector<CPUExpandEntry> nodes_to_sub(valid_candidates.size()); std::vector<CPUExpandEntry> nodes_to_sub(valid_candidates.size());
@ -173,10 +157,9 @@ void QuantileHistMaker::Builder<GradientSumT>::BuildHistogram(
} }
} }
template <typename GradientSumT> void QuantileHistMaker::Builder::LeafPartition(RegTree const &tree,
void QuantileHistMaker::Builder<GradientSumT>::LeafPartition( common::Span<GradientPair const> gpair,
RegTree const &tree, common::Span<GradientPair const> gpair, std::vector<bst_node_t> *p_out_position) {
std::vector<bst_node_t> *p_out_position) {
monitor_->Start(__func__); monitor_->Start(__func__);
if (!task_.UpdateTreeLeaf()) { if (!task_.UpdateTreeLeaf()) {
return; return;
@ -187,10 +170,9 @@ void QuantileHistMaker::Builder<GradientSumT>::LeafPartition(
monitor_->Stop(__func__); monitor_->Stop(__func__);
} }
template <typename GradientSumT> void QuantileHistMaker::Builder::ExpandTree(DMatrix *p_fmat, RegTree *p_tree,
void QuantileHistMaker::Builder<GradientSumT>::ExpandTree( const std::vector<GradientPair> &gpair_h,
DMatrix *p_fmat, RegTree *p_tree, const std::vector<GradientPair> &gpair_h, HostDeviceVector<bst_node_t> *p_out_position) {
HostDeviceVector<bst_node_t> *p_out_position) {
monitor_->Start(__func__); monitor_->Start(__func__);
Driver<CPUExpandEntry> driver(static_cast<TrainParam::TreeGrowPolicy>(param_.grow_policy)); Driver<CPUExpandEntry> driver(static_cast<TrainParam::TreeGrowPolicy>(param_.grow_policy));
@ -252,10 +234,9 @@ void QuantileHistMaker::Builder<GradientSumT>::ExpandTree(
monitor_->Stop(__func__); monitor_->Stop(__func__);
} }
template <typename GradientSumT> void QuantileHistMaker::Builder::UpdateTree(HostDeviceVector<GradientPair> *gpair, DMatrix *p_fmat,
void QuantileHistMaker::Builder<GradientSumT>::UpdateTree( RegTree *p_tree,
HostDeviceVector<GradientPair> *gpair, DMatrix *p_fmat, RegTree *p_tree, HostDeviceVector<bst_node_t> *p_out_position) {
HostDeviceVector<bst_node_t> *p_out_position) {
monitor_->Start(__func__); monitor_->Start(__func__);
std::vector<GradientPair> *gpair_ptr = &(gpair->HostVector()); std::vector<GradientPair> *gpair_ptr = &(gpair->HostVector());
@ -272,9 +253,8 @@ void QuantileHistMaker::Builder<GradientSumT>::UpdateTree(
monitor_->Stop(__func__); monitor_->Stop(__func__);
} }
template <typename GradientSumT> bool QuantileHistMaker::Builder::UpdatePredictionCache(DMatrix const *data,
bool QuantileHistMaker::Builder<GradientSumT>::UpdatePredictionCache( linalg::VectorView<float> out_preds) const {
DMatrix const *data, linalg::VectorView<float> out_preds) const {
// p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in
// conjunction with Update(). // conjunction with Update().
if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_) { if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_) {
@ -287,9 +267,8 @@ bool QuantileHistMaker::Builder<GradientSumT>::UpdatePredictionCache(
return true; return true;
} }
template <typename GradientSumT> void QuantileHistMaker::Builder::InitSampling(const DMatrix &fmat,
void QuantileHistMaker::Builder<GradientSumT>::InitSampling(const DMatrix &fmat, std::vector<GradientPair> *gpair) {
std::vector<GradientPair> *gpair) {
monitor_->Start(__func__); monitor_->Start(__func__);
const auto &info = fmat.Info(); const auto &info = fmat.Info();
auto& rnd = common::GlobalRandom(); auto& rnd = common::GlobalRandom();
@ -325,14 +304,10 @@ void QuantileHistMaker::Builder<GradientSumT>::InitSampling(const DMatrix &fmat,
#endif // XGBOOST_CUSTOMIZE_GLOBAL_PRNG #endif // XGBOOST_CUSTOMIZE_GLOBAL_PRNG
monitor_->Stop(__func__); monitor_->Stop(__func__);
} }
template<typename GradientSumT> size_t QuantileHistMaker::Builder::GetNumberOfTrees() { return n_trees_; }
size_t QuantileHistMaker::Builder<GradientSumT>::GetNumberOfTrees() {
return n_trees_;
}
template <typename GradientSumT> void QuantileHistMaker::Builder::InitData(DMatrix *fmat, const RegTree &tree,
void QuantileHistMaker::Builder<GradientSumT>::InitData(DMatrix *fmat, const RegTree &tree, std::vector<GradientPair> *gpair) {
std::vector<GradientPair> *gpair) {
monitor_->Start(__func__); monitor_->Start(__func__);
const auto& info = fmat->Info(); const auto& info = fmat->Info();
@ -362,8 +337,8 @@ void QuantileHistMaker::Builder<GradientSumT>::InitData(DMatrix *fmat, const Reg
// store a pointer to the tree // store a pointer to the tree
p_last_tree_ = &tree; p_last_tree_ = &tree;
evaluator_.reset(new HistEvaluator<GradientSumT, CPUExpandEntry>{ evaluator_.reset(
param_, info, this->ctx_->Threads(), column_sampler_}); new HistEvaluator<CPUExpandEntry>{param_, info, this->ctx_->Threads(), column_sampler_});
monitor_->Stop(__func__); monitor_->Stop(__func__);
} }
@ -406,9 +381,6 @@ void HistRowPartitioner::AddSplitsToRowSet(const std::vector<CPUExpandEntry> &no
} }
} }
template struct QuantileHistMaker::Builder<float>;
template struct QuantileHistMaker::Builder<double>;
XGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, "grow_quantile_histmaker") XGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, "grow_quantile_histmaker")
.describe("Grow tree using quantized histogram.") .describe("Grow tree using quantized histogram.")
.set_body([](GenericParameter const *ctx, ObjInfo task) { .set_body([](GenericParameter const *ctx, ObjInfo task) {

View File

@ -24,7 +24,6 @@
#include "hist/evaluate_splits.h" #include "hist/evaluate_splits.h"
#include "hist/histogram.h" #include "hist/histogram.h"
#include "hist/expand_entry.h" #include "hist/expand_entry.h"
#include "hist/param.h"
#include "constraints.h" #include "constraints.h"
#include "./param.h" #include "./param.h"
@ -236,7 +235,7 @@ inline BatchParam HistBatch(TrainParam const& param) {
class QuantileHistMaker: public TreeUpdater { class QuantileHistMaker: public TreeUpdater {
public: public:
explicit QuantileHistMaker(GenericParameter const* ctx, ObjInfo task) explicit QuantileHistMaker(GenericParameter const* ctx, ObjInfo task)
: task_{task}, TreeUpdater(ctx) {} : TreeUpdater(ctx), task_{task} {}
void Configure(const Args& args) override; void Configure(const Args& args) override;
void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat, void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
@ -249,12 +248,10 @@ class QuantileHistMaker: public TreeUpdater {
void LoadConfig(Json const& in) override { void LoadConfig(Json const& in) override {
auto const& config = get<Object const>(in); auto const& config = get<Object const>(in);
FromJson(config.at("train_param"), &this->param_); FromJson(config.at("train_param"), &this->param_);
FromJson(config.at("cpu_hist_train_param"), &this->hist_maker_param_);
} }
void SaveConfig(Json* p_out) const override { void SaveConfig(Json* p_out) const override {
auto& out = *p_out; auto& out = *p_out;
out["train_param"] = ToJson(param_); out["train_param"] = ToJson(param_);
out["cpu_hist_train_param"] = ToJson(hist_maker_param_);
} }
char const* Name() const override { char const* Name() const override {
@ -264,22 +261,19 @@ class QuantileHistMaker: public TreeUpdater {
bool HasNodePosition() const override { return true; } bool HasNodePosition() const override { return true; }
protected: protected:
CPUHistMakerTrainParam hist_maker_param_;
// training parameter // training parameter
TrainParam param_; TrainParam param_;
// actual builder that runs the algorithm // actual builder that runs the algorithm
template<typename GradientSumT>
struct Builder { struct Builder {
public: public:
using GradientPairT = xgboost::detail::GradientPairInternal<GradientSumT>;
// constructor // constructor
explicit Builder(const size_t n_trees, const TrainParam& param, DMatrix const* fmat, explicit Builder(const size_t n_trees, const TrainParam& param, DMatrix const* fmat,
ObjInfo task, GenericParameter const* ctx) ObjInfo task, GenericParameter const* ctx)
: n_trees_(n_trees), : n_trees_(n_trees),
param_(param), param_(param),
p_last_fmat_(fmat), p_last_fmat_(fmat),
histogram_builder_{new HistogramBuilder<GradientSumT, CPUExpandEntry>}, histogram_builder_{new HistogramBuilder<CPUExpandEntry>},
task_{task}, task_{task},
ctx_{ctx}, ctx_{ctx},
monitor_{std::make_unique<common::Monitor>()} { monitor_{std::make_unique<common::Monitor>()} {
@ -320,14 +314,14 @@ class QuantileHistMaker: public TreeUpdater {
std::vector<GradientPair> gpair_local_; std::vector<GradientPair> gpair_local_;
std::unique_ptr<HistEvaluator<GradientSumT, CPUExpandEntry>> evaluator_; std::unique_ptr<HistEvaluator<CPUExpandEntry>> evaluator_;
std::vector<HistRowPartitioner> partitioner_; std::vector<HistRowPartitioner> partitioner_;
// back pointers to tree and data matrix // back pointers to tree and data matrix
const RegTree* p_last_tree_{nullptr}; const RegTree* p_last_tree_{nullptr};
DMatrix const* const p_last_fmat_; DMatrix const* const p_last_fmat_;
std::unique_ptr<HistogramBuilder<GradientSumT, CPUExpandEntry>> histogram_builder_; std::unique_ptr<HistogramBuilder<CPUExpandEntry>> histogram_builder_;
ObjInfo task_; ObjInfo task_;
// Context for number of threads // Context for number of threads
GenericParameter const* ctx_; GenericParameter const* ctx_;
@ -336,8 +330,7 @@ class QuantileHistMaker: public TreeUpdater {
}; };
protected: protected:
std::unique_ptr<Builder<float>> float_builder_; std::unique_ptr<Builder> pimpl_;
std::unique_ptr<Builder<double>> double_builder_;
ObjInfo task_; ObjInfo task_;
}; };
} // namespace tree } // namespace tree

View File

@ -16,7 +16,6 @@ namespace common {
size_t GetNThreads() { return common::OmpGetNumThreads(0); } size_t GetNThreads() { return common::OmpGetNumThreads(0); }
template <typename GradientSumT>
void ParallelGHistBuilderReset() { void ParallelGHistBuilderReset() {
constexpr size_t kBins = 10; constexpr size_t kBins = 10;
constexpr size_t kNodes = 5; constexpr size_t kNodes = 5;
@ -25,16 +24,16 @@ void ParallelGHistBuilderReset() {
constexpr double kValue = 1.0; constexpr double kValue = 1.0;
const size_t nthreads = GetNThreads(); const size_t nthreads = GetNThreads();
HistCollection<GradientSumT> collection; HistCollection collection;
collection.Init(kBins); collection.Init(kBins);
for(size_t inode = 0; inode < kNodesExtended; inode++) { for(size_t inode = 0; inode < kNodesExtended; inode++) {
collection.AddHistRow(inode); collection.AddHistRow(inode);
} }
collection.AllocateAllData(); collection.AllocateAllData();
ParallelGHistBuilder<GradientSumT> hist_builder; ParallelGHistBuilder hist_builder;
hist_builder.Init(kBins); hist_builder.Init(kBins);
std::vector<GHistRow<GradientSumT>> target_hist(kNodes); std::vector<GHistRow> target_hist(kNodes);
for(size_t i = 0; i < target_hist.size(); ++i) { for(size_t i = 0; i < target_hist.size(); ++i) {
target_hist[i] = collection[i]; target_hist[i] = collection[i];
} }
@ -45,7 +44,7 @@ void ParallelGHistBuilderReset() {
common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d r) { common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d r) {
const size_t tid = omp_get_thread_num(); const size_t tid = omp_get_thread_num();
GHistRow<GradientSumT> hist = hist_builder.GetInitializedHist(tid, inode); GHistRow hist = hist_builder.GetInitializedHist(tid, inode);
// fill hist by some non-null values // fill hist by some non-null values
for(size_t j = 0; j < kBins; ++j) { for(size_t j = 0; j < kBins; ++j) {
hist[j].Add(kValue, kValue); hist[j].Add(kValue, kValue);
@ -63,7 +62,7 @@ void ParallelGHistBuilderReset() {
common::ParallelFor2d(space2, nthreads, [&](size_t inode, common::Range1d r) { common::ParallelFor2d(space2, nthreads, [&](size_t inode, common::Range1d r) {
const size_t tid = omp_get_thread_num(); const size_t tid = omp_get_thread_num();
GHistRow<GradientSumT> hist = hist_builder.GetInitializedHist(tid, inode); GHistRow hist = hist_builder.GetInitializedHist(tid, inode);
// fill hist by some non-null values // fill hist by some non-null values
for(size_t j = 0; j < kBins; ++j) { for(size_t j = 0; j < kBins; ++j) {
ASSERT_EQ(0.0, hist[j].GetGrad()); ASSERT_EQ(0.0, hist[j].GetGrad());
@ -72,8 +71,6 @@ void ParallelGHistBuilderReset() {
}); });
} }
template <typename GradientSumT>
void ParallelGHistBuilderReduceHist(){ void ParallelGHistBuilderReduceHist(){
constexpr size_t kBins = 10; constexpr size_t kBins = 10;
constexpr size_t kNodes = 5; constexpr size_t kNodes = 5;
@ -81,16 +78,16 @@ void ParallelGHistBuilderReduceHist(){
constexpr double kValue = 1.0; constexpr double kValue = 1.0;
const size_t nthreads = GetNThreads(); const size_t nthreads = GetNThreads();
HistCollection<GradientSumT> collection; HistCollection collection;
collection.Init(kBins); collection.Init(kBins);
for(size_t inode = 0; inode < kNodes; inode++) { for(size_t inode = 0; inode < kNodes; inode++) {
collection.AddHistRow(inode); collection.AddHistRow(inode);
} }
collection.AllocateAllData(); collection.AllocateAllData();
ParallelGHistBuilder<GradientSumT> hist_builder; ParallelGHistBuilder hist_builder;
hist_builder.Init(kBins); hist_builder.Init(kBins);
std::vector<GHistRow<GradientSumT>> target_hist(kNodes); std::vector<GHistRow> target_hist(kNodes);
for(size_t i = 0; i < target_hist.size(); ++i) { for(size_t i = 0; i < target_hist.size(); ++i) {
target_hist[i] = collection[i]; target_hist[i] = collection[i];
} }
@ -102,7 +99,7 @@ void ParallelGHistBuilderReduceHist(){
common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d r) { common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d r) {
const size_t tid = omp_get_thread_num(); const size_t tid = omp_get_thread_num();
GHistRow<GradientSumT> hist = hist_builder.GetInitializedHist(tid, inode); GHistRow hist = hist_builder.GetInitializedHist(tid, inode);
for(size_t i = 0; i < kBins; ++i) { for(size_t i = 0; i < kBins; ++i) {
hist[i].Add(kValue, kValue); hist[i].Add(kValue, kValue);
} }
@ -120,21 +117,9 @@ void ParallelGHistBuilderReduceHist(){
} }
} }
TEST(ParallelGHistBuilder, ResetDouble) { TEST(ParallelGHistBuilder, Reset) { ParallelGHistBuilderReset(); }
ParallelGHistBuilderReset<double>();
}
TEST(ParallelGHistBuilder, ResetFloat) { TEST(ParallelGHistBuilder, ReduceHist) { ParallelGHistBuilderReduceHist(); }
ParallelGHistBuilderReset<float>();
}
TEST(ParallelGHistBuilder, ReduceHistDouble) {
ParallelGHistBuilderReduceHist<double>();
}
TEST(ParallelGHistBuilder, ReduceHistFloat) {
ParallelGHistBuilderReduceHist<float>();
}
TEST(CutsBuilder, SearchGroupInd) { TEST(CutsBuilder, SearchGroupInd) {
size_t constexpr kNumGroups = 4; size_t constexpr kNumGroups = 4;

View File

@ -12,7 +12,7 @@
namespace xgboost { namespace xgboost {
namespace tree { namespace tree {
template <typename GradientSumT> void TestEvaluateSplits() { void TestEvaluateSplits() {
int static constexpr kRows = 8, kCols = 16; int static constexpr kRows = 8, kCols = 16;
auto orig = omp_get_max_threads(); auto orig = omp_get_max_threads();
int32_t n_threads = std::min(omp_get_max_threads(), 4); int32_t n_threads = std::min(omp_get_max_threads(), 4);
@ -24,9 +24,8 @@ template <typename GradientSumT> void TestEvaluateSplits() {
auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix(); auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();
auto evaluator = auto evaluator = HistEvaluator<CPUExpandEntry>{param, dmat->Info(), n_threads, sampler};
HistEvaluator<GradientSumT, CPUExpandEntry>{param, dmat->Info(), n_threads, sampler}; common::HistCollection hist;
common::HistCollection<GradientSumT> hist;
std::vector<GradientPair> row_gpairs = { std::vector<GradientPair> row_gpairs = {
{1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f}, {1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
{0.27f, 0.29f}, {0.37f, 0.39f}, {-0.47f, 0.49f}, {0.57f, 0.59f}}; {0.27f, 0.29f}, {0.37f, 0.39f}, {-0.47f, 0.49f}, {0.57f, 0.59f}};
@ -40,7 +39,7 @@ template <typename GradientSumT> void TestEvaluateSplits() {
std::iota(row_indices.begin(), row_indices.end(), 0); std::iota(row_indices.begin(), row_indices.end(), 0);
row_set_collection.Init(); row_set_collection.Init();
auto hist_builder = common::GHistBuilder<GradientSumT>(gmat.cut.Ptrs().back()); auto hist_builder = common::GHistBuilder(gmat.cut.Ptrs().back());
hist.Init(gmat.cut.Ptrs().back()); hist.Init(gmat.cut.Ptrs().back());
hist.AddHistRow(0); hist.AddHistRow(0);
hist.AllocateAllData(); hist.AllocateAllData();
@ -85,10 +84,7 @@ template <typename GradientSumT> void TestEvaluateSplits() {
omp_set_num_threads(orig); omp_set_num_threads(orig);
} }
TEST(HistEvaluator, Evaluate) { TEST(HistEvaluator, Evaluate) { TestEvaluateSplits(); }
TestEvaluateSplits<float>();
TestEvaluateSplits<double>();
}
TEST(HistEvaluator, Apply) { TEST(HistEvaluator, Apply) {
RegTree tree; RegTree tree;
@ -97,7 +93,7 @@ TEST(HistEvaluator, Apply) {
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}}); param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}});
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix(); auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
auto sampler = std::make_shared<common::ColumnSampler>(); auto sampler = std::make_shared<common::ColumnSampler>();
auto evaluator_ = HistEvaluator<float, CPUExpandEntry>{param, dmat->Info(), 4, sampler}; auto evaluator_ = HistEvaluator<CPUExpandEntry>{param, dmat->Info(), 4, sampler};
CPUExpandEntry entry{0, 0, 10.0f}; CPUExpandEntry entry{0, 0, 10.0f};
entry.split.left_sum = GradStats{0.4, 0.6f}; entry.split.left_sum = GradStats{0.4, 0.6f};
@ -123,8 +119,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {
// check the evaluator is returning the optimal split // check the evaluator is returning the optimal split
std::vector<FeatureType> ft{FeatureType::kCategorical}; std::vector<FeatureType> ft{FeatureType::kCategorical};
auto sampler = std::make_shared<common::ColumnSampler>(); auto sampler = std::make_shared<common::ColumnSampler>();
HistEvaluator<double, CPUExpandEntry> evaluator{param_, info_, common::OmpGetNumThreads(0), HistEvaluator<CPUExpandEntry> evaluator{param_, info_, common::OmpGetNumThreads(0), sampler};
sampler};
evaluator.InitRoot(GradStats{total_gpair_}); evaluator.InitRoot(GradStats{total_gpair_});
RegTree tree; RegTree tree;
std::vector<CPUExpandEntry> entries(1); std::vector<CPUExpandEntry> entries(1);
@ -155,12 +150,11 @@ auto CompareOneHotAndPartition(bool onehot) {
int32_t n_threads = 16; int32_t n_threads = 16;
auto sampler = std::make_shared<common::ColumnSampler>(); auto sampler = std::make_shared<common::ColumnSampler>();
auto evaluator = auto evaluator = HistEvaluator<CPUExpandEntry>{param, dmat->Info(), n_threads, sampler};
HistEvaluator<GradientSumT, CPUExpandEntry>{param, dmat->Info(), n_threads, sampler};
std::vector<CPUExpandEntry> entries(1); std::vector<CPUExpandEntry> entries(1);
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({32, param.sparse_threshold})) { for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({32, param.sparse_threshold})) {
common::HistCollection<GradientSumT> hist; common::HistCollection hist;
entries.front().nid = 0; entries.front().nid = 0;
entries.front().depth = 0; entries.front().depth = 0;

View File

@ -23,7 +23,6 @@ void InitRowPartitionForTest(common::RowSetCollection *row_set, size_t n_samples
} }
} // anonymous namespace } // anonymous namespace
template <typename GradientSumT>
void TestAddHistRows(bool is_distributed) { void TestAddHistRows(bool is_distributed) {
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build_; std::vector<CPUExpandEntry> nodes_for_explicit_hist_build_;
std::vector<CPUExpandEntry> nodes_for_subtraction_trick_; std::vector<CPUExpandEntry> nodes_for_subtraction_trick_;
@ -46,7 +45,7 @@ void TestAddHistRows(bool is_distributed) {
nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5), 0.0f); nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5), 0.0f);
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f); nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f);
HistogramBuilder<GradientSumT, CPUExpandEntry> histogram_builder; HistogramBuilder<CPUExpandEntry> histogram_builder;
histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1, histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1,
is_distributed); is_distributed);
histogram_builder.AddHistRows(&starting_index, &sync_count, histogram_builder.AddHistRows(&starting_index, &sync_count,
@ -66,14 +65,10 @@ void TestAddHistRows(bool is_distributed) {
TEST(CPUHistogram, AddRows) { TEST(CPUHistogram, AddRows) {
TestAddHistRows<float>(true); TestAddHistRows(true);
TestAddHistRows<double>(true); TestAddHistRows(false);
TestAddHistRows<float>(false);
TestAddHistRows<double>(false);
} }
template <typename GradientSumT>
void TestSyncHist(bool is_distributed) { void TestSyncHist(bool is_distributed) {
size_t constexpr kNRows = 8, kNCols = 16; size_t constexpr kNRows = 8, kNCols = 16;
int32_t constexpr kMaxBins = 4; int32_t constexpr kMaxBins = 4;
@ -88,7 +83,7 @@ void TestSyncHist(bool is_distributed) {
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin()); auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
HistogramBuilder<GradientSumT, CPUExpandEntry> histogram; HistogramBuilder<CPUExpandEntry> histogram;
uint32_t total_bins = gmat.cut.Ptrs().back(); uint32_t total_bins = gmat.cut.Ptrs().back();
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed); histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed);
@ -153,7 +148,7 @@ void TestSyncHist(bool is_distributed) {
}, },
256); 256);
std::vector<common::GHistRow<GradientSumT>> target_hists(n_nodes); std::vector<common::GHistRow> target_hists(n_nodes);
for (size_t i = 0; i < nodes_for_explicit_hist_build_.size(); ++i) { for (size_t i = 0; i < nodes_for_explicit_hist_build_.size(); ++i) {
const int32_t nid = nodes_for_explicit_hist_build_[i].nid; const int32_t nid = nodes_for_explicit_hist_build_[i].nid;
target_hists[i] = histogram.Histogram()[nid]; target_hists[i] = histogram.Histogram()[nid];
@ -163,7 +158,7 @@ void TestSyncHist(bool is_distributed) {
std::vector<size_t> n_ids = {1, 2}; std::vector<size_t> n_ids = {1, 2};
for (size_t i : n_ids) { for (size_t i : n_ids) {
auto this_hist = histogram.Histogram()[i]; auto this_hist = histogram.Histogram()[i];
GradientSumT *p_hist = reinterpret_cast<GradientSumT *>(this_hist.data()); double *p_hist = reinterpret_cast<double *>(this_hist.data());
for (size_t bin_id = 0; bin_id < 2 * total_bins; ++bin_id) { for (size_t bin_id = 0; bin_id < 2 * total_bins; ++bin_id) {
p_hist[bin_id] = 2 * bin_id; p_hist[bin_id] = 2 * bin_id;
} }
@ -172,7 +167,7 @@ void TestSyncHist(bool is_distributed) {
n_ids[1] = 5; n_ids[1] = 5;
for (size_t i : n_ids) { for (size_t i : n_ids) {
auto this_hist = histogram.Histogram()[i]; auto this_hist = histogram.Histogram()[i];
GradientSumT *p_hist = reinterpret_cast<GradientSumT *>(this_hist.data()); double *p_hist = reinterpret_cast<double *>(this_hist.data());
for (size_t bin_id = 0; bin_id < 2 * total_bins; ++bin_id) { for (size_t bin_id = 0; bin_id < 2 * total_bins; ++bin_id) {
p_hist[bin_id] = bin_id; p_hist[bin_id] = bin_id;
} }
@ -190,15 +185,12 @@ void TestSyncHist(bool is_distributed) {
sync_count); sync_count);
} }
using GHistRowT = common::GHistRow<GradientSumT>; using GHistRowT = common::GHistRow;
auto check_hist = [](const GHistRowT parent, const GHistRowT left, auto check_hist = [](const GHistRowT parent, const GHistRowT left, const GHistRowT right,
const GHistRowT right, size_t begin, size_t end) { size_t begin, size_t end) {
const GradientSumT *p_parent = const double *p_parent = reinterpret_cast<const double *>(parent.data());
reinterpret_cast<const GradientSumT *>(parent.data()); const double *p_left = reinterpret_cast<const double *>(left.data());
const GradientSumT *p_left = const double *p_right = reinterpret_cast<const double *>(right.data());
reinterpret_cast<const GradientSumT *>(left.data());
const GradientSumT *p_right =
reinterpret_cast<const GradientSumT *>(right.data());
for (size_t i = 2 * begin; i < 2 * end; ++i) { for (size_t i = 2 * begin; i < 2 * end; ++i) {
ASSERT_EQ(p_parent[i], p_left[i] + p_right[i]); ASSERT_EQ(p_parent[i], p_left[i] + p_right[i]);
} }
@ -230,14 +222,10 @@ void TestSyncHist(bool is_distributed) {
} }
TEST(CPUHistogram, SyncHist) { TEST(CPUHistogram, SyncHist) {
TestSyncHist<float>(true); TestSyncHist(true);
TestSyncHist<double>(true); TestSyncHist(false);
TestSyncHist<float>(false);
TestSyncHist<double>(false);
} }
template <typename GradientSumT>
void TestBuildHistogram(bool is_distributed) { void TestBuildHistogram(bool is_distributed) {
size_t constexpr kNRows = 8, kNCols = 16; size_t constexpr kNRows = 8, kNCols = 16;
int32_t constexpr kMaxBins = 4; int32_t constexpr kMaxBins = 4;
@ -252,7 +240,7 @@ void TestBuildHistogram(bool is_distributed) {
{0.27f, 0.29f}, {0.37f, 0.39f}, {0.47f, 0.49f}, {0.57f, 0.59f}}; {0.27f, 0.29f}, {0.37f, 0.39f}, {0.47f, 0.49f}, {0.57f, 0.59f}};
bst_node_t nid = 0; bst_node_t nid = 0;
HistogramBuilder<GradientSumT, CPUExpandEntry> histogram; HistogramBuilder<CPUExpandEntry> histogram;
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed); histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed);
RegTree tree; RegTree tree;
@ -296,11 +284,8 @@ void TestBuildHistogram(bool is_distributed) {
} }
TEST(CPUHistogram, BuildHist) { TEST(CPUHistogram, BuildHist) {
TestBuildHistogram<float>(true); TestBuildHistogram(true);
TestBuildHistogram<double>(true); TestBuildHistogram(false);
TestBuildHistogram<float>(false);
TestBuildHistogram<double>(false);
} }
namespace { namespace {
@ -329,7 +314,7 @@ void TestHistogramCategorical(size_t n_categories) {
/** /**
* Generate hist with cat data. * Generate hist with cat data.
*/ */
HistogramBuilder<double, CPUExpandEntry> cat_hist; HistogramBuilder<CPUExpandEntry> cat_hist;
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) { for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
auto total_bins = gidx.cut.TotalBins(); auto total_bins = gidx.cut.TotalBins();
cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false); cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false);
@ -342,7 +327,7 @@ void TestHistogramCategorical(size_t n_categories) {
*/ */
auto x_encoded = OneHotEncodeFeature(x, n_categories); auto x_encoded = OneHotEncodeFeature(x, n_categories);
auto encode_m = GetDMatrixFromData(x_encoded, kRows, n_categories); auto encode_m = GetDMatrixFromData(x_encoded, kRows, n_categories);
HistogramBuilder<double, CPUExpandEntry> onehot_hist; HistogramBuilder<CPUExpandEntry> onehot_hist;
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) { for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
auto total_bins = gidx.cut.TotalBins(); auto total_bins = gidx.cut.TotalBins();
onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false); onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false);
@ -382,8 +367,8 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx) {
std::vector<CPUExpandEntry> nodes; std::vector<CPUExpandEntry> nodes;
nodes.emplace_back(0, tree.GetDepth(0), 0.0f); nodes.emplace_back(0, tree.GetDepth(0), 0.0f);
common::GHistRow<double> multi_page; common::GHistRow multi_page;
HistogramBuilder<double, CPUExpandEntry> multi_build; HistogramBuilder<CPUExpandEntry> multi_build;
{ {
/** /**
* Multi page * Multi page
@ -417,8 +402,8 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx) {
multi_page = multi_build.Histogram()[0]; multi_page = multi_build.Histogram()[0];
} }
HistogramBuilder<double, CPUExpandEntry> single_build; HistogramBuilder<CPUExpandEntry> single_build;
common::GHistRow<double> single_page; common::GHistRow single_page;
{ {
/** /**
* Single page * Single page

View File

@ -22,7 +22,7 @@ class TestPartitionBasedSplit : public ::testing::Test {
MetaInfo info_; MetaInfo info_;
float best_score_{-std::numeric_limits<float>::infinity()}; float best_score_{-std::numeric_limits<float>::infinity()};
common::HistogramCuts cuts_; common::HistogramCuts cuts_;
common::HistCollection<double> hist_; common::HistCollection hist_;
GradientPairPrecise total_gpair_; GradientPairPrecise total_gpair_;
void SetUp() override { void SetUp() override {
@ -55,7 +55,7 @@ class TestPartitionBasedSplit : public ::testing::Test {
total_gpair_ += e; total_gpair_ += e;
} }
auto enumerate = [this, n_feat = info_.num_col_](common::GHistRow<double> hist, auto enumerate = [this, n_feat = info_.num_col_](common::GHistRow hist,
GradientPairPrecise parent_sum) { GradientPairPrecise parent_sum) {
int32_t best_thresh = -1; int32_t best_thresh = -1;
float best_score{-std::numeric_limits<float>::infinity()}; float best_score{-std::numeric_limits<float>::infinity()};