Make HostDeviceVector single gpu only (#4773)

* Make HostDeviceVector single gpu only
This commit is contained in:
Rong Ou
2019-08-25 14:51:13 -07:00
committed by Rory Mitchell
parent 41227d1933
commit 38ab79f889
54 changed files with 641 additions and 1621 deletions

View File

@@ -702,7 +702,7 @@ struct DeviceShard {
row_partitioner.reset(new RowPartitioner(device_id, n_rows));
dh::safe_cuda(cudaMemcpyAsync(
gpair.data(), dh_gpair->ConstDevicePointer(device_id),
gpair.data(), dh_gpair->ConstDevicePointer(),
gpair.size() * sizeof(GradientPair), cudaMemcpyHostToHost));
SubsampleGradientPair(device_id, gpair, param.subsample, row_begin_idx);
hist.Reset();
@@ -745,8 +745,8 @@ struct DeviceShard {
for (auto i = 0ull; i < nidxs.size(); i++) {
auto nidx = nidxs[i];
auto p_feature_set = column_sampler.GetFeatureSet(tree.GetDepth(nidx));
p_feature_set->Shard(GPUSet(device_id, 1));
auto d_sampled_features = p_feature_set->DeviceSpan(device_id);
p_feature_set->SetDevice(device_id);
auto d_sampled_features = p_feature_set->DeviceSpan();
common::Span<int32_t> d_feature_set =
interaction_constraints.Query(d_sampled_features, nidx);
auto d_split_candidates =
@@ -1016,7 +1016,7 @@ struct DeviceShard {
dh::AllReducer* reducer, int64_t num_columns) {
constexpr int kRootNIdx = 0;
const auto &gpair = gpair_all->DeviceSpan(device_id);
const auto &gpair = gpair_all->DeviceSpan();
dh::SumReduction(temp_memory, gpair, node_sum_gradients_d,
gpair.size());
@@ -1294,11 +1294,8 @@ class GPUHistMakerSpecialised {
param_.InitAllowUnknown(args);
generic_param_ = generic_param;
hist_maker_param_.InitAllowUnknown(args);
auto devices = GPUSet::All(generic_param_->gpu_id,
generic_param_->n_gpus);
n_devices_ = devices.Size();
CHECK(n_devices_ != 0) << "Must have at least one device";
dist_ = GPUDistribution::Block(devices);
device_ = generic_param_->gpu_id;
CHECK_GE(device_, 0) << "Must have at least one device";
dh::CheckComputeCapability();
@@ -1330,30 +1327,22 @@ class GPUHistMakerSpecialised {
void InitDataOnce(DMatrix* dmat) {
info_ = &dmat->Info();
int n_devices = dist_.Devices().Size();
device_list_.resize(n_devices);
for (int index = 0; index < n_devices; ++index) {
int device_id = dist_.Devices().DeviceId(index);
device_list_[index] = device_id;
}
reducer_.Init(device_list_);
reducer_.Init({device_});
// Synchronise the column sampling seed
uint32_t column_sampling_seed = common::GlobalRandom()();
rabit::Broadcast(&column_sampling_seed, sizeof(column_sampling_seed), 0);
// Create device shards
shards_.resize(n_devices);
shards_.resize(1);
dh::ExecuteIndexShards(
&shards_,
[&](int idx, std::unique_ptr<DeviceShard<GradientSumT>>& shard) {
dh::safe_cuda(cudaSetDevice(dist_.Devices().DeviceId(idx)));
size_t start = dist_.ShardStart(info_->num_row_, idx);
size_t size = dist_.ShardSize(info_->num_row_, idx);
dh::safe_cuda(cudaSetDevice(device_));
size_t start = 0;
size_t size = info_->num_row_;
shard = std::unique_ptr<DeviceShard<GradientSumT>>(
new DeviceShard<GradientSumT>(dist_.Devices().DeviceId(idx), idx,
new DeviceShard<GradientSumT>(device_, idx,
start, start + size, param_,
column_sampling_seed,
info_->num_col_));
@@ -1436,7 +1425,7 @@ class GPUHistMakerSpecialised {
for (auto& tree : trees) {
tree = *p_tree;
}
gpair->Reshard(dist_);
gpair->SetDevice(device_);
// Launch one thread for each device "shard" containing a subset of rows.
// Threads will cooperatively build the tree, synchronising over histograms.
@@ -1462,13 +1451,13 @@ class GPUHistMakerSpecialised {
return false;
}
monitor_.StartCuda("UpdatePredictionCache");
p_out_preds->Shard(dist_.Devices());
p_out_preds->SetDevice(device_);
dh::ExecuteIndexShards(
&shards_,
[&](int idx, std::unique_ptr<DeviceShard<GradientSumT>>& shard) {
dh::safe_cuda(cudaSetDevice(shard->device_id));
shard->UpdatePredictionCache(
p_out_preds->DevicePointer(shard->device_id));
p_out_preds->DevicePointer());
});
monitor_.StopCuda("UpdatePredictionCache");
return true;
@@ -1483,7 +1472,6 @@ class GPUHistMakerSpecialised {
private:
bool initialised_;
int n_devices_;
int n_bins_;
GPUHistMakerTrainParam hist_maker_param_;
@@ -1492,11 +1480,9 @@ class GPUHistMakerSpecialised {
dh::AllReducer reducer_;
DMatrix* p_last_fmat_;
GPUDistribution dist_;
int device_;
common::Monitor monitor_;
/*! List storing device id. */
std::vector<int> device_list_;
};
class GPUHistMaker : public TreeUpdater {