Optimized BuildHist function (#5156)

This commit is contained in:
Egor Smirnov
2020-01-30 10:32:57 +03:00
committed by GitHub
parent 4240daed4e
commit c67163250e
8 changed files with 610 additions and 184 deletions

View File

@@ -9,6 +9,123 @@
namespace xgboost {
namespace common {
size_t GetNThreads() {
size_t nthreads;
#pragma omp parallel
{
#pragma omp master
nthreads = omp_get_num_threads();
}
return nthreads;
}
TEST(ParallelGHistBuilder, Reset) {
constexpr size_t kBins = 10;
constexpr size_t kNodes = 5;
constexpr size_t kNodesExtended = 10;
constexpr size_t kTasksPerNode = 10;
constexpr double kValue = 1.0;
const size_t nthreads = GetNThreads();
HistCollection collection;
collection.Init(kBins);
for(size_t inode = 0; inode < kNodesExtended; inode++) {
collection.AddHistRow(inode);
}
ParallelGHistBuilder hist_builder;
hist_builder.Init(kBins);
std::vector<GHistRow> target_hist(kNodes);
for(size_t i = 0; i < target_hist.size(); ++i) {
target_hist[i] = collection[i];
}
common::BlockedSpace2d space(kNodes, [&](size_t node) { return kTasksPerNode; }, 1);
hist_builder.Reset(nthreads, kNodes, space, target_hist);
common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d r) {
const size_t itask = r.begin();
const size_t tid = omp_get_thread_num();
GHistRow hist = hist_builder.GetInitializedHist(tid, inode);
// fill hist by some non-null values
for(size_t j = 0; j < kBins; ++j) {
hist[j].Add(kValue, kValue);
}
});
// reset and extend buffer
target_hist.resize(kNodesExtended);
for(size_t i = 0; i < target_hist.size(); ++i) {
target_hist[i] = collection[i];
}
common::BlockedSpace2d space2(kNodesExtended, [&](size_t node) { return kTasksPerNode; }, 1);
hist_builder.Reset(nthreads, kNodesExtended, space2, target_hist);
common::ParallelFor2d(space2, nthreads, [&](size_t inode, common::Range1d r) {
const size_t itask = r.begin();
const size_t tid = omp_get_thread_num();
GHistRow hist = hist_builder.GetInitializedHist(tid, inode);
// fill hist by some non-null values
for(size_t j = 0; j < kBins; ++j) {
ASSERT_EQ(0.0, hist[j].GetGrad());
ASSERT_EQ(0.0, hist[j].GetHess());
}
});
}
TEST(ParallelGHistBuilder, ReduceHist) {
constexpr size_t kBins = 10;
constexpr size_t kNodes = 5;
constexpr size_t kNodesExtended = 10;
constexpr size_t kTasksPerNode = 10;
constexpr double kValue = 1.0;
const size_t nthreads = GetNThreads();
HistCollection collection;
collection.Init(kBins);
for(size_t inode = 0; inode < kNodes; inode++) {
collection.AddHistRow(inode);
}
ParallelGHistBuilder hist_builder;
hist_builder.Init(kBins);
std::vector<GHistRow> target_hist(kNodes);
for(size_t i = 0; i < target_hist.size(); ++i) {
target_hist[i] = collection[i];
}
common::BlockedSpace2d space(kNodes, [&](size_t node) { return kTasksPerNode; }, 1);
hist_builder.Reset(nthreads, kNodes, space, target_hist);
// Simple analog of BuildHist function, works in parallel for both tree-nodes and data in node
common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d r) {
const size_t itask = r.begin();
const size_t tid = omp_get_thread_num();
GHistRow hist = hist_builder.GetInitializedHist(tid, inode);
for(size_t i = 0; i < kBins; ++i) {
hist[i].Add(kValue, kValue);
}
});
for(size_t inode = 0; inode < kNodes; inode++) {
hist_builder.ReduceHist(inode, 0, kBins);
// We had kTasksPerNode tasks to add kValue to each bin for each node
// So, after reducing we expect to have (kValue * kTasksPerNode) in each node
for(size_t i = 0; i < kBins; ++i) {
ASSERT_EQ(kValue * kTasksPerNode, collection[inode][i].GetGrad());
ASSERT_EQ(kValue * kTasksPerNode, collection[inode][i].GetHess());
}
}
}
TEST(CutsBuilder, SearchGroupInd) {
size_t constexpr kNumGroups = 4;
size_t constexpr kRows = 17;

View File

@@ -37,7 +37,7 @@ TEST(ParallelFor2d, Test) {
return kDim2;
}, kGrainSize);
ParallelFor2d(space, [&](size_t i, Range1d r) {
ParallelFor2d(space, 4, [&](size_t i, Range1d r) {
for (auto j = r.begin(); j < r.end(); ++j) {
matrix[i*kDim2 + j] += 1;
}
@@ -65,7 +65,7 @@ TEST(ParallelFor2dNonUniform, Test) {
working_space[i].resize(dim2[i], 0);
}
ParallelFor2d(space, [&](size_t i, Range1d r) {
ParallelFor2d(space, 4, [&](size_t i, Range1d r) {
for (auto j = r.begin(); j < r.end(); ++j) {
working_space[i][j] += 1;
}

View File

@@ -107,7 +107,7 @@ class QuantileHistMock : public QuantileHistMaker {
GHistIndexBlockMatrix dummy;
hist_.AddHistRow(nid);
BuildHist(gpair, row_set_collection_[nid],
gmat, dummy, hist_[nid], false);
gmat, dummy, hist_[nid]);
// Check if number of histogram bins is correct
ASSERT_EQ(hist_[nid].size(), gmat.cut.Ptrs().back());
@@ -149,7 +149,7 @@ class QuantileHistMock : public QuantileHistMaker {
hist_.AddHistRow(0);
BuildHist(row_gpairs, row_set_collection_[0],
gmat, quantile_index_block, hist_[0], false);
gmat, quantile_index_block, hist_[0]);
RealImpl::InitNewNode(0, gmat, row_gpairs, *(*dmat), tree);
@@ -211,7 +211,8 @@ class QuantileHistMock : public QuantileHistMaker {
}
/* Now compare against result given by EvaluateSplit() */
ExpandEntry node(0, tree.GetDepth(0), snode_[0].best.loss_chg, 0);
ExpandEntry node(ExpandEntry::kRootNid, ExpandEntry::kEmptyNid,
tree.GetDepth(0), snode_[0].best.loss_chg, 0);
RealImpl::EvaluateSplit({node}, gmat, hist_, *(*dmat), tree);
ASSERT_EQ(snode_[0].best.SplitIndex(), best_split_feature);
ASSERT_EQ(snode_[0].best.split_value, gmat.cut.Values()[best_split_threshold]);