Optimized BuildHist function (#5156)
This commit is contained in:
@@ -9,6 +9,123 @@
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
size_t GetNThreads() {
|
||||
size_t nthreads;
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp master
|
||||
nthreads = omp_get_num_threads();
|
||||
}
|
||||
return nthreads;
|
||||
}
|
||||
|
||||
|
||||
TEST(ParallelGHistBuilder, Reset) {
|
||||
constexpr size_t kBins = 10;
|
||||
constexpr size_t kNodes = 5;
|
||||
constexpr size_t kNodesExtended = 10;
|
||||
constexpr size_t kTasksPerNode = 10;
|
||||
constexpr double kValue = 1.0;
|
||||
const size_t nthreads = GetNThreads();
|
||||
|
||||
HistCollection collection;
|
||||
collection.Init(kBins);
|
||||
|
||||
for(size_t inode = 0; inode < kNodesExtended; inode++) {
|
||||
collection.AddHistRow(inode);
|
||||
}
|
||||
|
||||
ParallelGHistBuilder hist_builder;
|
||||
hist_builder.Init(kBins);
|
||||
std::vector<GHistRow> target_hist(kNodes);
|
||||
for(size_t i = 0; i < target_hist.size(); ++i) {
|
||||
target_hist[i] = collection[i];
|
||||
}
|
||||
|
||||
common::BlockedSpace2d space(kNodes, [&](size_t node) { return kTasksPerNode; }, 1);
|
||||
hist_builder.Reset(nthreads, kNodes, space, target_hist);
|
||||
|
||||
common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d r) {
|
||||
const size_t itask = r.begin();
|
||||
const size_t tid = omp_get_thread_num();
|
||||
|
||||
GHistRow hist = hist_builder.GetInitializedHist(tid, inode);
|
||||
// fill hist by some non-null values
|
||||
for(size_t j = 0; j < kBins; ++j) {
|
||||
hist[j].Add(kValue, kValue);
|
||||
}
|
||||
});
|
||||
|
||||
// reset and extend buffer
|
||||
target_hist.resize(kNodesExtended);
|
||||
for(size_t i = 0; i < target_hist.size(); ++i) {
|
||||
target_hist[i] = collection[i];
|
||||
}
|
||||
common::BlockedSpace2d space2(kNodesExtended, [&](size_t node) { return kTasksPerNode; }, 1);
|
||||
hist_builder.Reset(nthreads, kNodesExtended, space2, target_hist);
|
||||
|
||||
common::ParallelFor2d(space2, nthreads, [&](size_t inode, common::Range1d r) {
|
||||
const size_t itask = r.begin();
|
||||
const size_t tid = omp_get_thread_num();
|
||||
|
||||
GHistRow hist = hist_builder.GetInitializedHist(tid, inode);
|
||||
// fill hist by some non-null values
|
||||
for(size_t j = 0; j < kBins; ++j) {
|
||||
ASSERT_EQ(0.0, hist[j].GetGrad());
|
||||
ASSERT_EQ(0.0, hist[j].GetHess());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
TEST(ParallelGHistBuilder, ReduceHist) {
|
||||
constexpr size_t kBins = 10;
|
||||
constexpr size_t kNodes = 5;
|
||||
constexpr size_t kNodesExtended = 10;
|
||||
constexpr size_t kTasksPerNode = 10;
|
||||
constexpr double kValue = 1.0;
|
||||
const size_t nthreads = GetNThreads();
|
||||
|
||||
HistCollection collection;
|
||||
collection.Init(kBins);
|
||||
|
||||
for(size_t inode = 0; inode < kNodes; inode++) {
|
||||
collection.AddHistRow(inode);
|
||||
}
|
||||
|
||||
ParallelGHistBuilder hist_builder;
|
||||
hist_builder.Init(kBins);
|
||||
std::vector<GHistRow> target_hist(kNodes);
|
||||
for(size_t i = 0; i < target_hist.size(); ++i) {
|
||||
target_hist[i] = collection[i];
|
||||
}
|
||||
|
||||
common::BlockedSpace2d space(kNodes, [&](size_t node) { return kTasksPerNode; }, 1);
|
||||
hist_builder.Reset(nthreads, kNodes, space, target_hist);
|
||||
|
||||
// Simple analog of BuildHist function, works in parallel for both tree-nodes and data in node
|
||||
common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d r) {
|
||||
const size_t itask = r.begin();
|
||||
const size_t tid = omp_get_thread_num();
|
||||
|
||||
GHistRow hist = hist_builder.GetInitializedHist(tid, inode);
|
||||
for(size_t i = 0; i < kBins; ++i) {
|
||||
hist[i].Add(kValue, kValue);
|
||||
}
|
||||
});
|
||||
|
||||
for(size_t inode = 0; inode < kNodes; inode++) {
|
||||
hist_builder.ReduceHist(inode, 0, kBins);
|
||||
|
||||
// We had kTasksPerNode tasks to add kValue to each bin for each node
|
||||
// So, after reducing we expect to have (kValue * kTasksPerNode) in each node
|
||||
for(size_t i = 0; i < kBins; ++i) {
|
||||
ASSERT_EQ(kValue * kTasksPerNode, collection[inode][i].GetGrad());
|
||||
ASSERT_EQ(kValue * kTasksPerNode, collection[inode][i].GetHess());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(CutsBuilder, SearchGroupInd) {
|
||||
size_t constexpr kNumGroups = 4;
|
||||
size_t constexpr kRows = 17;
|
||||
|
||||
@@ -37,7 +37,7 @@ TEST(ParallelFor2d, Test) {
|
||||
return kDim2;
|
||||
}, kGrainSize);
|
||||
|
||||
ParallelFor2d(space, [&](size_t i, Range1d r) {
|
||||
ParallelFor2d(space, 4, [&](size_t i, Range1d r) {
|
||||
for (auto j = r.begin(); j < r.end(); ++j) {
|
||||
matrix[i*kDim2 + j] += 1;
|
||||
}
|
||||
@@ -65,7 +65,7 @@ TEST(ParallelFor2dNonUniform, Test) {
|
||||
working_space[i].resize(dim2[i], 0);
|
||||
}
|
||||
|
||||
ParallelFor2d(space, [&](size_t i, Range1d r) {
|
||||
ParallelFor2d(space, 4, [&](size_t i, Range1d r) {
|
||||
for (auto j = r.begin(); j < r.end(); ++j) {
|
||||
working_space[i][j] += 1;
|
||||
}
|
||||
|
||||
@@ -107,7 +107,7 @@ class QuantileHistMock : public QuantileHistMaker {
|
||||
GHistIndexBlockMatrix dummy;
|
||||
hist_.AddHistRow(nid);
|
||||
BuildHist(gpair, row_set_collection_[nid],
|
||||
gmat, dummy, hist_[nid], false);
|
||||
gmat, dummy, hist_[nid]);
|
||||
|
||||
// Check if number of histogram bins is correct
|
||||
ASSERT_EQ(hist_[nid].size(), gmat.cut.Ptrs().back());
|
||||
@@ -149,7 +149,7 @@ class QuantileHistMock : public QuantileHistMaker {
|
||||
hist_.AddHistRow(0);
|
||||
|
||||
BuildHist(row_gpairs, row_set_collection_[0],
|
||||
gmat, quantile_index_block, hist_[0], false);
|
||||
gmat, quantile_index_block, hist_[0]);
|
||||
|
||||
RealImpl::InitNewNode(0, gmat, row_gpairs, *(*dmat), tree);
|
||||
|
||||
@@ -211,7 +211,8 @@ class QuantileHistMock : public QuantileHistMaker {
|
||||
}
|
||||
|
||||
/* Now compare against result given by EvaluateSplit() */
|
||||
ExpandEntry node(0, tree.GetDepth(0), snode_[0].best.loss_chg, 0);
|
||||
ExpandEntry node(ExpandEntry::kRootNid, ExpandEntry::kEmptyNid,
|
||||
tree.GetDepth(0), snode_[0].best.loss_chg, 0);
|
||||
RealImpl::EvaluateSplit({node}, gmat, hist_, *(*dmat), tree);
|
||||
ASSERT_EQ(snode_[0].best.SplitIndex(), best_split_feature);
|
||||
ASSERT_EQ(snode_[0].best.split_value, gmat.cut.Values()[best_split_threshold]);
|
||||
|
||||
Reference in New Issue
Block a user