Extract partial sum into an independent function. (#7889)
This commit is contained in:
parent
db80671d6b
commit
11d65fcb21
@ -274,24 +274,6 @@ template <typename Indexable>
|
||||
XGBOOST_DEVICE size_t LastOf(size_t group, Indexable const &indptr) {
|
||||
return indptr[group + 1] - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Run length encode on CPU, input must be sorted.
|
||||
*/
|
||||
template <typename Iter, typename Idx>
|
||||
void RunLengthEncode(Iter begin, Iter end, std::vector<Idx> *p_out) {
|
||||
auto &out = *p_out;
|
||||
out = std::vector<Idx>{0};
|
||||
size_t n = std::distance(begin, end);
|
||||
for (size_t i = 1; i < n; ++i) {
|
||||
if (begin[i] != begin[i - 1]) {
|
||||
out.push_back(i);
|
||||
}
|
||||
}
|
||||
if (out.back() != n) {
|
||||
out.push_back(n);
|
||||
}
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_COMMON_COMMON_H_
|
||||
|
||||
96
src/common/numeric.h
Normal file
96
src/common/numeric.h
Normal file
@ -0,0 +1,96 @@
|
||||
/*!
|
||||
* Copyright 2022, XGBoost contributors.
|
||||
*/
|
||||
#ifndef XGBOOST_COMMON_NUMERIC_H_
|
||||
#define XGBOOST_COMMON_NUMERIC_H_
|
||||
|
||||
#include <algorithm> // std::max
|
||||
#include <iterator> // std::iterator_traits
|
||||
#include <vector>
|
||||
|
||||
#include "threading_utils.h"
|
||||
#include "xgboost/generic_parameters.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
/**
|
||||
* \brief Run length encode on CPU, input must be sorted.
|
||||
*/
|
||||
template <typename Iter, typename Idx>
|
||||
void RunLengthEncode(Iter begin, Iter end, std::vector<Idx> *p_out) {
|
||||
auto &out = *p_out;
|
||||
out = std::vector<Idx>{0};
|
||||
size_t n = std::distance(begin, end);
|
||||
for (size_t i = 1; i < n; ++i) {
|
||||
if (begin[i] != begin[i - 1]) {
|
||||
out.push_back(i);
|
||||
}
|
||||
}
|
||||
if (out.back() != n) {
|
||||
out.push_back(n);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Varient of std::partial_sum, out_it should point to a container that has n + 1
|
||||
* elements. Useful for constructing a CSR indptr.
|
||||
*/
|
||||
template <typename InIt, typename OutIt, typename T>
|
||||
void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
|
||||
static_assert(std::is_same<T, typename std::iterator_traits<InIt>::value_type>::value, "");
|
||||
static_assert(std::is_same<T, typename std::iterator_traits<OutIt>::value_type>::value, "");
|
||||
// The number of threads is pegged to the batch size. If the OMP block is parallelized
|
||||
// on anything other than the batch/block size, it should be reassigned
|
||||
auto n = static_cast<size_t>(std::distance(begin, end));
|
||||
const size_t batch_threads =
|
||||
std::max(static_cast<size_t>(1), std::min(n, static_cast<size_t>(n_threads)));
|
||||
common::MemStackAllocator<T, 128> partial_sums(batch_threads);
|
||||
|
||||
size_t block_size = n / batch_threads;
|
||||
|
||||
dmlc::OMPException exc;
|
||||
#pragma omp parallel num_threads(batch_threads)
|
||||
{
|
||||
#pragma omp for
|
||||
for (omp_ulong tid = 0; tid < batch_threads; ++tid) {
|
||||
exc.Run([&]() {
|
||||
size_t ibegin = block_size * tid;
|
||||
size_t iend = (tid == (batch_threads - 1) ? n : (block_size * (tid + 1)));
|
||||
|
||||
T running_sum = 0;
|
||||
for (size_t ridx = ibegin; ridx < iend; ++ridx) {
|
||||
running_sum += *(begin + ridx);
|
||||
*(out_it + 1 + ridx) = running_sum;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma omp single
|
||||
{
|
||||
exc.Run([&]() {
|
||||
partial_sums[0] = init;
|
||||
for (size_t i = 1; i < batch_threads; ++i) {
|
||||
partial_sums[i] = partial_sums[i - 1] + *(out_it + i * block_size);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma omp for
|
||||
for (omp_ulong tid = 0; tid < batch_threads; ++tid) {
|
||||
exc.Run([&]() {
|
||||
size_t ibegin = block_size * tid;
|
||||
size_t iend = (tid == (batch_threads - 1) ? n : (block_size * (tid + 1)));
|
||||
|
||||
for (size_t i = ibegin; i < iend; ++i) {
|
||||
*(out_it + 1 + i) += partial_sums[tid];
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
exc.Rethrow();
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // XGBOOST_COMMON_NUMERIC_H_
|
||||
@ -21,6 +21,7 @@
|
||||
#include "../common/io.h"
|
||||
#include "../common/linalg_op.h"
|
||||
#include "../common/math.h"
|
||||
#include "../common/numeric.h"
|
||||
#include "../common/version.h"
|
||||
#include "../common/group_data.h"
|
||||
#include "../common/threading_utils.h"
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
|
||||
#include "../common/column_matrix.h"
|
||||
#include "../common/hist_util.h"
|
||||
#include "../common/numeric.h"
|
||||
#include "../common/threading_utils.h"
|
||||
|
||||
namespace xgboost {
|
||||
@ -28,58 +29,13 @@ void GHistIndexMatrix::PushBatch(SparsePage const &batch,
|
||||
common::Span<FeatureType const> ft,
|
||||
size_t rbegin, size_t prev_sum, uint32_t nbins,
|
||||
int32_t n_threads) {
|
||||
// The number of threads is pegged to the batch size. If the OMP
|
||||
// block is parallelized on anything other than the batch/block size,
|
||||
// it should be reassigned
|
||||
auto page = batch.GetView();
|
||||
auto it = common::MakeIndexTransformIter([&](size_t ridx) { return page[ridx].size(); });
|
||||
common::PartialSum(n_threads, it, it + page.Size(), prev_sum, row_ptr.begin() + rbegin);
|
||||
// The number of threads is pegged to the batch size. If the OMP block is parallelized
|
||||
// on anything other than the batch/block size, it should be reassigned
|
||||
const size_t batch_threads =
|
||||
std::max(static_cast<size_t>(1), std::min(batch.Size(), static_cast<size_t>(n_threads)));
|
||||
auto page = batch.GetView();
|
||||
common::MemStackAllocator<size_t, 128> partial_sums(batch_threads);
|
||||
|
||||
size_t block_size = batch.Size() / batch_threads;
|
||||
|
||||
dmlc::OMPException exc;
|
||||
#pragma omp parallel num_threads(batch_threads)
|
||||
{
|
||||
#pragma omp for
|
||||
for (omp_ulong tid = 0; tid < batch_threads; ++tid) {
|
||||
exc.Run([&]() {
|
||||
size_t ibegin = block_size * tid;
|
||||
size_t iend = (tid == (batch_threads - 1) ? batch.Size()
|
||||
: (block_size * (tid + 1)));
|
||||
|
||||
size_t running_sum = 0;
|
||||
for (size_t ridx = ibegin; ridx < iend; ++ridx) {
|
||||
running_sum += page[ridx].size();
|
||||
row_ptr[rbegin + 1 + ridx] = running_sum;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma omp single
|
||||
{
|
||||
exc.Run([&]() {
|
||||
partial_sums[0] = prev_sum;
|
||||
for (size_t i = 1; i < batch_threads; ++i) {
|
||||
partial_sums[i] = partial_sums[i - 1] + row_ptr[rbegin + i * block_size];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma omp for
|
||||
for (omp_ulong tid = 0; tid < batch_threads; ++tid) {
|
||||
exc.Run([&]() {
|
||||
size_t ibegin = block_size * tid;
|
||||
size_t iend = (tid == (batch_threads - 1) ? batch.Size()
|
||||
: (block_size * (tid + 1)));
|
||||
|
||||
for (size_t i = ibegin; i < iend; ++i) {
|
||||
row_ptr[rbegin + 1 + i] += partial_sums[tid];
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
exc.Rethrow();
|
||||
|
||||
const size_t n_index = row_ptr[rbegin + batch.Size()]; // number of entries in this page
|
||||
ResizeIndex(n_index, isDense_);
|
||||
|
||||
@ -7,6 +7,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "../common/common.h"
|
||||
#include "../common/numeric.h"
|
||||
#include "../common/stats.h"
|
||||
#include "../common/threading_utils.h"
|
||||
#include "xgboost/tree_model.h"
|
||||
|
||||
33
tests/cpp/common/test_numeric.cc
Normal file
33
tests/cpp/common/test_numeric.cc
Normal file
@ -0,0 +1,33 @@
|
||||
/*!
|
||||
* Copyright 2022, XGBoost contributors.
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include "../../../src/common/numeric.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST(Numeric, PartialSum) {
|
||||
{
|
||||
std::vector<size_t> values{1, 2, 3, 4};
|
||||
std::vector<size_t> result(values.size() + 1);
|
||||
Context ctx;
|
||||
PartialSum(ctx.Threads(), values.begin(), values.end(), static_cast<size_t>(0), result.begin());
|
||||
std::vector<size_t> sol(values.size() + 1, 0);
|
||||
std::partial_sum(values.begin(), values.end(), sol.begin() + 1);
|
||||
ASSERT_EQ(sol, result);
|
||||
}
|
||||
{
|
||||
std::vector<double> values{1.5, 2.5, 3.5, 4.5};
|
||||
std::vector<double> result(values.size() + 1);
|
||||
Context ctx;
|
||||
PartialSum(ctx.Threads(), values.begin(), values.end(), 0.0, result.begin());
|
||||
std::vector<double> sol(values.size() + 1, 0.0);
|
||||
std::partial_sum(values.begin(), values.end(), sol.begin() + 1);
|
||||
ASSERT_EQ(sol, result);
|
||||
}
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
@ -3,6 +3,7 @@
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../../../src/common/numeric.h"
|
||||
#include "../../../src/tree/updater_approx.h"
|
||||
#include "../helpers.h"
|
||||
#include "test_partitioner.h"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user