More explict sharding methods for device memory (#4396)

* Rename the Reshard method to Shard * Add a new Reshard method for sharding a vector that's already sharded
2019-04-30 16:47:23 -07:00
parent 797ba8e72d
commit eaab364a63
12 changed files with 154 additions and 77 deletions
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -327,11 +327,11 @@ class GPUPredictor : public xgboost::Predictor {

    for (const auto &batch : dmat->GetRowBatches()) {
      CHECK_EQ(i_batch, 0) << "External memory not supported";
-      // out_preds have been resharded and resized in InitOutPredictions()
-      batch.offset.Reshard(GPUDistribution::Overlap(devices_, 1));
+      // out_preds have been sharded and resized in InitOutPredictions()
+      batch.offset.Shard(GPUDistribution::Overlap(devices_, 1));
      std::vector<size_t> device_offsets;
      DeviceOffsets(batch.offset, &device_offsets);
-      batch.data.Reshard(GPUDistribution::Explicit(devices_, device_offsets));
+      batch.data.Shard(GPUDistribution::Explicit(devices_, device_offsets));
      dh::ExecuteIndexShards(&shards_, [&](int idx, DeviceShard& shard) {
        shard.PredictInternal(batch, dmat->Info(), out_preds, model,
                              h_tree_segments, h_nodes, tree_begin, tree_end);
@@ -373,7 +373,7 @@ class GPUPredictor : public xgboost::Predictor {
    size_t n_classes = model.param.num_output_group;
    size_t n = n_classes * info.num_row_;
    const HostDeviceVector<bst_float>& base_margin = info.base_margin_;
-    out_preds->Reshard(GPUDistribution::Granular(devices_, n_classes));
+    out_preds->Shard(GPUDistribution::Granular(devices_, n_classes));
    out_preds->Resize(n);
    if (base_margin.Size() != 0) {
      CHECK_EQ(out_preds->Size(), n);
@@ -392,7 +392,7 @@ class GPUPredictor : public xgboost::Predictor {
        const HostDeviceVector<bst_float>& y = it->second.predictions;
        if (y.Size() != 0) {
          monitor_.StartCuda("PredictFromCache");
-          out_preds->Reshard(y.Distribution());
+          out_preds->Shard(y.Distribution());
          out_preds->Resize(y.Size());
          out_preds->Copy(y);
          monitor_.StopCuda("PredictFromCache");