further cleanup of single process multi-GPU code (#4810)
* use subspan in gpu predictor instead of copying * Revise `HostDeviceVector`
This commit is contained in:
@@ -113,7 +113,7 @@ TEST(GpuHist, BuildGidxDense) {
|
||||
{"max_leaves", "0"},
|
||||
};
|
||||
param.Init(args);
|
||||
DeviceShard<GradientPairPrecise> shard(0, 0, 0, kNRows, param, kNCols, kNCols);
|
||||
DeviceShard<GradientPairPrecise> shard(0, kNRows, param, kNCols, kNCols);
|
||||
BuildGidx(&shard, kNRows, kNCols);
|
||||
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer(shard.gidx_buffer.size());
|
||||
@@ -154,8 +154,7 @@ TEST(GpuHist, BuildGidxSparse) {
|
||||
};
|
||||
param.Init(args);
|
||||
|
||||
DeviceShard<GradientPairPrecise> shard(0, 0, 0, kNRows, param, kNCols,
|
||||
kNCols);
|
||||
DeviceShard<GradientPairPrecise> shard(0, kNRows, param, kNCols, kNCols);
|
||||
BuildGidx(&shard, kNRows, kNCols, 0.9f);
|
||||
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer(shard.gidx_buffer.size());
|
||||
@@ -200,8 +199,7 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
{"max_leaves", "0"},
|
||||
};
|
||||
param.Init(args);
|
||||
DeviceShard<GradientSumT> shard(0, 0, 0, kNRows, param, kNCols,
|
||||
kNCols);
|
||||
DeviceShard<GradientSumT> shard(0, kNRows, param, kNCols, kNCols);
|
||||
BuildGidx(&shard, kNRows, kNCols);
|
||||
|
||||
xgboost::SimpleLCG gen;
|
||||
@@ -303,8 +301,7 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
|
||||
// Initialize DeviceShard
|
||||
std::unique_ptr<DeviceShard<GradientPairPrecise>> shard{
|
||||
new DeviceShard<GradientPairPrecise>(0, 0, 0, kNRows, param, kNCols,
|
||||
kNCols)};
|
||||
new DeviceShard<GradientPairPrecise>(0, kNRows, param, kNCols, kNCols)};
|
||||
// Initialize DeviceShard::node_sum_gradients
|
||||
shard->node_sum_gradients = {{6.4f, 12.8f}};
|
||||
|
||||
@@ -391,24 +388,20 @@ void TestHistogramIndexImpl() {
|
||||
hist_maker_ext.Configure(training_params, &generic_param);
|
||||
hist_maker_ext.InitDataOnce(hist_maker_ext_dmat.get());
|
||||
|
||||
ASSERT_EQ(hist_maker.shards_.size(), hist_maker_ext.shards_.size());
|
||||
|
||||
// Extract the device shards from the histogram makers and from that its compressed
|
||||
// Extract the device shard from the histogram makers and from that its compressed
|
||||
// histogram index
|
||||
for (size_t i = 0; i < hist_maker.shards_.size(); ++i) {
|
||||
const auto &dev_shard = hist_maker.shards_[i];
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer(dev_shard->gidx_buffer.size());
|
||||
dh::CopyDeviceSpanToVector(&h_gidx_buffer, dev_shard->gidx_buffer);
|
||||
const auto &dev_shard = hist_maker.shard_;
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer(dev_shard->gidx_buffer.size());
|
||||
dh::CopyDeviceSpanToVector(&h_gidx_buffer, dev_shard->gidx_buffer);
|
||||
|
||||
const auto &dev_shard_ext = hist_maker_ext.shards_[i];
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer_ext(dev_shard_ext->gidx_buffer.size());
|
||||
dh::CopyDeviceSpanToVector(&h_gidx_buffer_ext, dev_shard_ext->gidx_buffer);
|
||||
const auto &dev_shard_ext = hist_maker_ext.shard_;
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer_ext(dev_shard_ext->gidx_buffer.size());
|
||||
dh::CopyDeviceSpanToVector(&h_gidx_buffer_ext, dev_shard_ext->gidx_buffer);
|
||||
|
||||
ASSERT_EQ(dev_shard->n_bins, dev_shard_ext->n_bins);
|
||||
ASSERT_EQ(dev_shard->gidx_buffer.size(), dev_shard_ext->gidx_buffer.size());
|
||||
ASSERT_EQ(dev_shard->n_bins, dev_shard_ext->n_bins);
|
||||
ASSERT_EQ(dev_shard->gidx_buffer.size(), dev_shard_ext->gidx_buffer.size());
|
||||
|
||||
ASSERT_EQ(h_gidx_buffer, h_gidx_buffer_ext);
|
||||
}
|
||||
ASSERT_EQ(h_gidx_buffer, h_gidx_buffer_ext);
|
||||
}
|
||||
|
||||
TEST(GpuHist, TestHistogramIndex) {
|
||||
|
||||
Reference in New Issue
Block a user