Combine thread launches into single launch per tree for gpu_hist (#4343)
* Combine thread launches into single launch per tree for gpu_hist algorithm. * Address deprecation warning * Add manual column sampler constructor * Turn off omp dynamic to get a guaranteed number of threads * Enable openmp in cuda code
This commit is contained in:
@@ -89,7 +89,7 @@ TEST(GpuHist, BuildGidxDense) {
|
||||
param.n_gpus = 1;
|
||||
param.max_leaves = 0;
|
||||
|
||||
DeviceShard<GradientPairPrecise> shard(0, 0, kNRows, param);
|
||||
DeviceShard<GradientPairPrecise> shard(0, 0, 0, kNRows, param, kNCols);
|
||||
BuildGidx(&shard, kNRows, kNCols);
|
||||
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer(shard.gidx_buffer.size());
|
||||
@@ -128,7 +128,7 @@ TEST(GpuHist, BuildGidxSparse) {
|
||||
param.n_gpus = 1;
|
||||
param.max_leaves = 0;
|
||||
|
||||
DeviceShard<GradientPairPrecise> shard(0, 0, kNRows, param);
|
||||
DeviceShard<GradientPairPrecise> shard(0, 0, 0, kNRows, param, kNCols);
|
||||
BuildGidx(&shard, kNRows, kNCols, 0.9f);
|
||||
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer(shard.gidx_buffer.size());
|
||||
@@ -172,7 +172,7 @@ void TestBuildHist(GPUHistBuilderBase<GradientSumT>& builder) {
|
||||
param.n_gpus = 1;
|
||||
param.max_leaves = 0;
|
||||
|
||||
DeviceShard<GradientSumT> shard(0, 0, kNRows, param);
|
||||
DeviceShard<GradientSumT> shard(0, 0, 0, kNRows, param, kNCols);
|
||||
|
||||
BuildGidx(&shard, kNRows, kNCols);
|
||||
|
||||
@@ -282,8 +282,8 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
int max_bins = 4;
|
||||
|
||||
// Initialize DeviceShard
|
||||
std::unique_ptr<DeviceShard<GradientPairPrecise>> shard {
|
||||
new DeviceShard<GradientPairPrecise>(0, 0, kNRows, param)};
|
||||
std::unique_ptr<DeviceShard<GradientPairPrecise>> shard{
|
||||
new DeviceShard<GradientPairPrecise>(0, 0, 0, kNRows, param, kNCols)};
|
||||
// Initialize DeviceShard::node_sum_gradients
|
||||
shard->node_sum_gradients = {{6.4f, 12.8f}};
|
||||
|
||||
@@ -321,12 +321,7 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
thrust::copy(hist.begin(), hist.end(),
|
||||
shard->hist.Data().begin());
|
||||
|
||||
// Initialize GPUHistMaker
|
||||
GPUHistMakerSpecialised<GradientPairPrecise> hist_maker =
|
||||
GPUHistMakerSpecialised<GradientPairPrecise>();
|
||||
hist_maker.param_ = param;
|
||||
hist_maker.shards_.push_back(std::move(shard));
|
||||
hist_maker.column_sampler_.Init(kNCols,
|
||||
shard->column_sampler.Init(kNCols,
|
||||
param.colsample_bynode,
|
||||
param.colsample_bylevel,
|
||||
param.colsample_bytree,
|
||||
@@ -337,13 +332,12 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
info.num_row_ = kNRows;
|
||||
info.num_col_ = kNCols;
|
||||
|
||||
hist_maker.info_ = &info;
|
||||
hist_maker.node_value_constraints_.resize(1);
|
||||
hist_maker.node_value_constraints_[0].lower_bound = -1.0;
|
||||
hist_maker.node_value_constraints_[0].upper_bound = 1.0;
|
||||
shard->node_value_constraints.resize(1);
|
||||
shard->node_value_constraints[0].lower_bound = -1.0;
|
||||
shard->node_value_constraints[0].upper_bound = 1.0;
|
||||
|
||||
std::vector<DeviceSplitCandidate> res =
|
||||
hist_maker.EvaluateSplits({ 0,0 }, &tree);
|
||||
shard->EvaluateSplits({ 0,0 }, tree, kNCols);
|
||||
|
||||
ASSERT_EQ(res[0].findex, 7);
|
||||
ASSERT_EQ(res[1].findex, 7);
|
||||
@@ -368,7 +362,8 @@ TEST(GpuHist, ApplySplit) {
|
||||
}
|
||||
|
||||
hist_maker.shards_.resize(1);
|
||||
hist_maker.shards_[0].reset(new DeviceShard<GradientPairPrecise>(0, 0, kNRows, param));
|
||||
hist_maker.shards_[0].reset(
|
||||
new DeviceShard<GradientPairPrecise>(0, 0, 0, kNRows, param, kNCols));
|
||||
|
||||
auto& shard = hist_maker.shards_.at(0);
|
||||
shard->ridx_segments.resize(3); // 3 nodes.
|
||||
@@ -435,8 +430,8 @@ TEST(GpuHist, ApplySplit) {
|
||||
shard->gidx_buffer.data(), num_symbols);
|
||||
|
||||
hist_maker.info_ = &info;
|
||||
hist_maker.ApplySplit(candidate_entry, &tree);
|
||||
hist_maker.UpdatePosition(candidate_entry, &tree);
|
||||
shard->ApplySplit(candidate_entry, &tree);
|
||||
shard->UpdatePosition(candidate_entry.nid, tree[candidate_entry.nid]);
|
||||
|
||||
ASSERT_FALSE(tree[kNId].IsLeaf());
|
||||
|
||||
|
||||
Reference in New Issue
Block a user