Clean up training code. (#3825)
* Remove GHistRow, GHistEntry, GHistIndexRow. * Remove kSimpleStats. * Remove CheckInfo, SetLeafVec in GradStats and in SKStats. * Clean up the GradStats. * Cleanup calcgain. * Move LossChangeMissing out of common. * Remove [] operator from GHistIndexBlock.
This commit is contained in:
@@ -1005,7 +1005,7 @@ class AllReducer {
|
||||
*/
|
||||
void Synchronize() {
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
for (int i = 0; i < device_ordinals.size(); i++) {
|
||||
for (size_t i = 0; i < device_ordinals.size(); i++) {
|
||||
dh::safe_cuda(cudaSetDevice(device_ordinals[i]));
|
||||
dh::safe_cuda(cudaStreamSynchronize(streams[i]));
|
||||
}
|
||||
@@ -1051,7 +1051,7 @@ template <typename T, typename FunctionT>
|
||||
void ExecuteIndexShards(std::vector<T> *shards, FunctionT f) {
|
||||
SaveCudaContext{[&]() {
|
||||
#pragma omp parallel for schedule(static, 1) if (shards->size() > 1)
|
||||
for (int shard = 0; shard < shards->size(); ++shard) {
|
||||
for (size_t shard = 0; shard < shards->size(); ++shard) {
|
||||
f(shard, shards->at(shard));
|
||||
}
|
||||
}};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017 by Contributors
|
||||
* Copyright 2017-2018 by Contributors
|
||||
* \file hist_util.h
|
||||
* \brief Utilities to store histograms
|
||||
* \author Philip Cho, Tianqi Chen
|
||||
@@ -417,7 +417,7 @@ void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
|
||||
const size_t* row_ptr = gmat.row_ptr.data();
|
||||
const float* pgh = reinterpret_cast<const float*>(gpair.data());
|
||||
|
||||
double* hist_data = reinterpret_cast<double*>(hist.begin);
|
||||
double* hist_data = reinterpret_cast<double*>(hist.data());
|
||||
double* data = reinterpret_cast<double*>(data_.data());
|
||||
|
||||
const size_t block_size = 512;
|
||||
@@ -432,11 +432,11 @@ void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
|
||||
size_t no_prefetch_size = prefetch_offset + cache_line_size/sizeof(*rid);
|
||||
no_prefetch_size = no_prefetch_size > nrows ? nrows : no_prefetch_size;
|
||||
|
||||
#pragma omp parallel for num_threads(nthread_to_process) schedule(guided)
|
||||
#pragma omp parallel for num_threads(nthread_to_process) schedule(guided)
|
||||
for (bst_omp_uint iblock = 0; iblock < n_blocks; iblock++) {
|
||||
dmlc::omp_uint tid = omp_get_thread_num();
|
||||
double* data_local_hist = ((nthread_to_process == 1) ? hist_data :
|
||||
reinterpret_cast<double*>(data_.data() + tid * nbins_));
|
||||
reinterpret_cast<double*>(data_.data() + tid * nbins_));
|
||||
|
||||
if (!thread_init_[tid]) {
|
||||
memset(data_local_hist, '\0', 2*nbins_*sizeof(double));
|
||||
@@ -477,7 +477,7 @@ void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel for num_threads(std::min(nthread, n_blocks)) schedule(guided)
|
||||
#pragma omp parallel for num_threads(std::min(nthread, n_blocks)) schedule(guided)
|
||||
for (bst_omp_uint iblock = 0; iblock < n_blocks; iblock++) {
|
||||
const size_t istart = iblock * block_size;
|
||||
const size_t iend = (((iblock + 1) * block_size > size) ? size : istart + block_size);
|
||||
@@ -507,8 +507,9 @@ void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
|
||||
#if defined(_OPENMP)
|
||||
const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
|
||||
#endif
|
||||
tree::GradStats* p_hist = hist.data();
|
||||
|
||||
#pragma omp parallel for num_threads(nthread) schedule(guided)
|
||||
#pragma omp parallel for num_threads(nthread) schedule(guided)
|
||||
for (bst_omp_uint bid = 0; bid < nblock; ++bid) {
|
||||
auto gmat = gmatb[bid];
|
||||
|
||||
@@ -517,20 +518,17 @@ void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
|
||||
size_t ibegin[kUnroll];
|
||||
size_t iend[kUnroll];
|
||||
GradientPair stat[kUnroll];
|
||||
|
||||
for (int k = 0; k < kUnroll; ++k) {
|
||||
rid[k] = row_indices.begin[i + k];
|
||||
}
|
||||
for (int k = 0; k < kUnroll; ++k) {
|
||||
ibegin[k] = gmat.row_ptr[rid[k]];
|
||||
iend[k] = gmat.row_ptr[rid[k] + 1];
|
||||
}
|
||||
for (int k = 0; k < kUnroll; ++k) {
|
||||
stat[k] = gpair[rid[k]];
|
||||
}
|
||||
for (int k = 0; k < kUnroll; ++k) {
|
||||
for (size_t j = ibegin[k]; j < iend[k]; ++j) {
|
||||
const uint32_t bin = gmat.index[j];
|
||||
hist.begin[bin].Add(stat[k]);
|
||||
p_hist[bin].Add(stat[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -541,7 +539,7 @@ void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
|
||||
const GradientPair stat = gpair[rid];
|
||||
for (size_t j = ibegin; j < iend; ++j) {
|
||||
const uint32_t bin = gmat.index[j];
|
||||
hist.begin[bin].Add(stat);
|
||||
p_hist[bin].Add(stat);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -555,24 +553,27 @@ void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow pa
|
||||
#if defined(_OPENMP)
|
||||
const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
|
||||
#endif
|
||||
tree::GradStats* p_self = self.data();
|
||||
tree::GradStats* p_sibling = sibling.data();
|
||||
tree::GradStats* p_parent = parent.data();
|
||||
|
||||
#pragma omp parallel for num_threads(nthread) schedule(static)
|
||||
#pragma omp parallel for num_threads(nthread) schedule(static)
|
||||
for (bst_omp_uint bin_id = 0;
|
||||
bin_id < static_cast<bst_omp_uint>(nbins - rest); bin_id += kUnroll) {
|
||||
GHistEntry pb[kUnroll];
|
||||
GHistEntry sb[kUnroll];
|
||||
tree::GradStats pb[kUnroll];
|
||||
tree::GradStats sb[kUnroll];
|
||||
for (int k = 0; k < kUnroll; ++k) {
|
||||
pb[k] = parent.begin[bin_id + k];
|
||||
pb[k] = p_parent[bin_id + k];
|
||||
}
|
||||
for (int k = 0; k < kUnroll; ++k) {
|
||||
sb[k] = sibling.begin[bin_id + k];
|
||||
sb[k] = p_sibling[bin_id + k];
|
||||
}
|
||||
for (int k = 0; k < kUnroll; ++k) {
|
||||
self.begin[bin_id + k].SetSubtract(pb[k], sb[k]);
|
||||
p_self[bin_id + k].SetSubstract(pb[k], sb[k]);
|
||||
}
|
||||
}
|
||||
for (uint32_t bin_id = nbins - rest; bin_id < nbins; ++bin_id) {
|
||||
self.begin[bin_id].SetSubtract(parent.begin[bin_id], sibling.begin[bin_id]);
|
||||
p_self[bin_id].SetSubstract(p_parent[bin_id], p_sibling[bin_id]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -16,45 +16,8 @@
|
||||
#include "../include/rabit/rabit.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
namespace common {
|
||||
|
||||
/*! \brief sums of gradient statistics corresponding to a histogram bin */
|
||||
struct GHistEntry {
|
||||
/*! \brief sum of first-order gradient statistics */
|
||||
double sum_grad{0};
|
||||
/*! \brief sum of second-order gradient statistics */
|
||||
double sum_hess{0};
|
||||
|
||||
GHistEntry() = default;
|
||||
|
||||
inline void Clear() {
|
||||
sum_grad = sum_hess = 0;
|
||||
}
|
||||
|
||||
/*! \brief add a GradientPair to the sum */
|
||||
inline void Add(const GradientPair& e) {
|
||||
sum_grad += e.GetGrad();
|
||||
sum_hess += e.GetHess();
|
||||
}
|
||||
|
||||
/*! \brief add a GHistEntry to the sum */
|
||||
inline void Add(const GHistEntry& e) {
|
||||
sum_grad += e.sum_grad;
|
||||
sum_hess += e.sum_hess;
|
||||
}
|
||||
|
||||
inline static void Reduce(GHistEntry& a, const GHistEntry& b) { // NOLINT(*)
|
||||
a.Add(b);
|
||||
}
|
||||
|
||||
/*! \brief set sum to be difference of two GHistEntry's */
|
||||
inline void SetSubtract(const GHistEntry& a, const GHistEntry& b) {
|
||||
sum_grad = a.sum_grad - b.sum_grad;
|
||||
sum_hess = a.sum_hess - b.sum_hess;
|
||||
}
|
||||
};
|
||||
|
||||
/*! \brief Cut configuration for all the features. */
|
||||
struct HistCutMatrix {
|
||||
/*! \brief Unit pointer to rows by element position */
|
||||
@@ -83,15 +46,7 @@ void DeviceSketch
|
||||
* \brief A single row in global histogram index.
|
||||
* Directly represent the global index in the histogram entry.
|
||||
*/
|
||||
struct GHistIndexRow {
|
||||
/*! \brief The index of the histogram */
|
||||
const uint32_t* index;
|
||||
/*! \brief The size of the histogram */
|
||||
size_t size;
|
||||
GHistIndexRow() = default;
|
||||
GHistIndexRow(const uint32_t* index, size_t size)
|
||||
: index(index), size(size) {}
|
||||
};
|
||||
using GHistIndexRow = Span<uint32_t const>;
|
||||
|
||||
/*!
|
||||
* \brief preprocessed global index matrix, in CSR format
|
||||
@@ -111,7 +66,9 @@ struct GHistIndexMatrix {
|
||||
void Init(DMatrix* p_fmat, int max_num_bins);
|
||||
// get i-th row
|
||||
inline GHistIndexRow operator[](size_t i) const {
|
||||
return {&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]};
|
||||
return {&index[0] + row_ptr[i],
|
||||
static_cast<GHistIndexRow::index_type>(
|
||||
row_ptr[i + 1] - row_ptr[i])};
|
||||
}
|
||||
inline void GetFeatureCounts(size_t* counts) const {
|
||||
auto nfeature = cut.row_ptr.size() - 1;
|
||||
@@ -134,11 +91,6 @@ struct GHistIndexBlock {
|
||||
|
||||
inline GHistIndexBlock(const size_t* row_ptr, const uint32_t* index)
|
||||
: row_ptr(row_ptr), index(index) {}
|
||||
|
||||
// get i-th row
|
||||
inline GHistIndexRow operator[](size_t i) const {
|
||||
return {&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]};
|
||||
}
|
||||
};
|
||||
|
||||
class ColumnMatrix;
|
||||
@@ -171,21 +123,12 @@ class GHistIndexBlockMatrix {
|
||||
};
|
||||
|
||||
/*!
|
||||
* \brief histogram of gradient statistics for a single node.
|
||||
* Consists of multiple GHistEntry's, each entry showing total graident statistics
|
||||
* \brief histogram of graident statistics for a single node.
|
||||
* Consists of multiple GradStats, each entry showing total graident statistics
|
||||
* for that particular bin
|
||||
* Uses global bin id so as to represent all features simultaneously
|
||||
*/
|
||||
struct GHistRow {
|
||||
/*! \brief base pointer to first entry */
|
||||
GHistEntry* begin;
|
||||
/*! \brief number of entries */
|
||||
uint32_t size;
|
||||
|
||||
GHistRow() = default;
|
||||
GHistRow(GHistEntry* begin, uint32_t size)
|
||||
: begin(begin), size(size) {}
|
||||
};
|
||||
using GHistRow = Span<tree::GradStats>;
|
||||
|
||||
/*!
|
||||
* \brief histogram of gradient statistics for multiple nodes
|
||||
@@ -193,27 +136,29 @@ struct GHistRow {
|
||||
class HistCollection {
|
||||
public:
|
||||
// access histogram for i-th node
|
||||
inline GHistRow operator[](bst_uint nid) const {
|
||||
GHistRow operator[](bst_uint nid) const {
|
||||
constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
|
||||
CHECK_NE(row_ptr_[nid], kMax);
|
||||
return {const_cast<GHistEntry*>(dmlc::BeginPtr(data_) + row_ptr_[nid]), nbins_};
|
||||
tree::GradStats* ptr =
|
||||
const_cast<tree::GradStats*>(dmlc::BeginPtr(data_) + row_ptr_[nid]);
|
||||
return {ptr, nbins_};
|
||||
}
|
||||
|
||||
// have we computed a histogram for i-th node?
|
||||
inline bool RowExists(bst_uint nid) const {
|
||||
bool RowExists(bst_uint nid) const {
|
||||
const uint32_t k_max = std::numeric_limits<uint32_t>::max();
|
||||
return (nid < row_ptr_.size() && row_ptr_[nid] != k_max);
|
||||
}
|
||||
|
||||
// initialize histogram collection
|
||||
inline void Init(uint32_t nbins) {
|
||||
void Init(uint32_t nbins) {
|
||||
nbins_ = nbins;
|
||||
row_ptr_.clear();
|
||||
data_.clear();
|
||||
}
|
||||
|
||||
// create an empty histogram for i-th node
|
||||
inline void AddHistRow(bst_uint nid) {
|
||||
void AddHistRow(bst_uint nid) {
|
||||
constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
|
||||
if (nid >= row_ptr_.size()) {
|
||||
row_ptr_.resize(nid + 1, kMax);
|
||||
@@ -228,7 +173,7 @@ class HistCollection {
|
||||
/*! \brief number of all bins over all features */
|
||||
uint32_t nbins_;
|
||||
|
||||
std::vector<GHistEntry> data_;
|
||||
std::vector<tree::GradStats> data_;
|
||||
|
||||
/*! \brief row_ptr_[nid] locates bin for historgram of node nid */
|
||||
std::vector<size_t> row_ptr_;
|
||||
@@ -268,8 +213,8 @@ class GHistBuilder {
|
||||
size_t nthread_;
|
||||
/*! \brief number of all bins over all features */
|
||||
uint32_t nbins_;
|
||||
std::vector<GHistEntry> data_;
|
||||
std::vector<size_t> thread_init_;
|
||||
std::vector<tree::GradStats> data_;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -140,7 +140,7 @@ class GPUDistribution {
|
||||
return begin;
|
||||
}
|
||||
|
||||
size_t ShardSize(size_t size, int index) const {
|
||||
size_t ShardSize(size_t size, size_t index) const {
|
||||
if (size == 0) { return 0; }
|
||||
if (offsets_.size() > 0) {
|
||||
// explicit offsets are provided
|
||||
@@ -154,7 +154,7 @@ class GPUDistribution {
|
||||
return end - begin;
|
||||
}
|
||||
|
||||
size_t ShardProperSize(size_t size, int index) const {
|
||||
size_t ShardProperSize(size_t size, size_t index) const {
|
||||
if (size == 0) { return 0; }
|
||||
return ShardSize(size, index) - (devices_.Size() - 1 > index ? overlap_ : 0);
|
||||
}
|
||||
|
||||
@@ -554,8 +554,8 @@ class Span {
|
||||
detail::ptrdiff_t _offset,
|
||||
detail::ptrdiff_t _count = dynamic_extent) const {
|
||||
SPAN_CHECK(_offset >= 0 && _offset < size());
|
||||
SPAN_CHECK(_count == dynamic_extent ||
|
||||
_count >= 0 && _offset + _count <= size());
|
||||
SPAN_CHECK((_count == dynamic_extent) ||
|
||||
(_count >= 0 && _offset + _count <= size()));
|
||||
|
||||
return {data() + _offset, _count ==
|
||||
dynamic_extent ? size() - _offset : _count};
|
||||
|
||||
@@ -58,12 +58,12 @@ class Transform {
|
||||
public:
|
||||
Evaluator(Functor func, Range range, GPUSet devices, bool reshard) :
|
||||
func_(func), range_{std::move(range)},
|
||||
distribution_{std::move(GPUDistribution::Block(devices))},
|
||||
reshard_{reshard} {}
|
||||
reshard_{reshard},
|
||||
distribution_{std::move(GPUDistribution::Block(devices))} {}
|
||||
Evaluator(Functor func, Range range, GPUDistribution dist,
|
||||
bool reshard) :
|
||||
func_(func), range_{std::move(range)}, distribution_{std::move(dist)},
|
||||
reshard_{reshard} {}
|
||||
func_(func), range_{std::move(range)}, reshard_{reshard},
|
||||
distribution_{std::move(dist)} {}
|
||||
|
||||
/*!
|
||||
* \brief Evaluate the functor with input pointers to HostDeviceVector.
|
||||
@@ -159,7 +159,7 @@ class Transform {
|
||||
|
||||
template <typename... HDV>
|
||||
void LaunchCPU(Functor func, HDV*... vectors) const {
|
||||
auto end = *(range_.end());
|
||||
omp_ulong end = static_cast<omp_ulong>(*(range_.end()));
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (omp_ulong idx = 0; idx < end; ++idx) {
|
||||
func(idx, UnpackHDV(vectors)...);
|
||||
|
||||
Reference in New Issue
Block a user