Fixed copy constructor for HostDeviceVectorImpl. (#3657)

- previously, vec_ in DeviceShard wasn't updated on copy; as a result, the shards continued to refer to the old HostDeviceVectorImpl object, which resulted in a dangling pointer once that object was deallocated
2018-09-01 01:38:09 +02:00 · 2018-09-01 01:38:09 +02:00 · dee0b69674
commit dee0b69674
parent 86d88c0758
2 changed files with 42 additions and 1 deletions
--- a/src/common/host_device_vector.cu
+++ b/src/common/host_device_vector.cu
@ -58,6 +58,19 @@ struct HostDeviceVectorImpl {
      perm_d_ = vec_->perm_h_.Complementary();
    }
    void Init(HostDeviceVectorImpl<T>* vec, const DeviceShard& other) {
      if (vec_ == nullptr) { vec_ = vec; }
      CHECK_EQ(vec, vec_);
      device_ = other.device_;
      index_ = other.index_;
      cached_size_ = other.cached_size_;
      start_ = other.start_;
      proper_size_ = other.proper_size_;
      SetDevice();
      data_.resize(other.data_.size());
      perm_d_ = other.perm_d_;
    }
    void ScatterFrom(const T* begin) {
      // TODO(canonizer): avoid full copy of host data
      LazySyncDevice(GPUAccess::kWrite);
@ -166,7 +179,12 @@ struct HostDeviceVectorImpl {
  // required, as a new std::mutex has to be created
  HostDeviceVectorImpl(const HostDeviceVectorImpl<T>& other)
    : data_h_(other.data_h_), perm_h_(other.perm_h_), size_d_(other.size_d_),
-      distribution_(other.distribution_), mutex_(), shards_(other.shards_) {}
+      distribution_(other.distribution_), mutex_() {
    shards_.resize(other.shards_.size());
    dh::ExecuteIndexShards(&shards_, [&](int i, DeviceShard& shard) {
        shard.Init(this, other.shards_[i]);
      });
  }
  // Init can be std::vector<T> or std::initializer_list<T>
  template <class Init>
--- a/tests/cpp/common/test_host_device_vector.cu
+++ b/tests/cpp/common/test_host_device_vector.cu
@ -155,6 +155,29 @@ TEST(HostDeviceVector, TestExplicit) {
  TestHostDeviceVector(n, distribution, starts, sizes);
 }
 TEST(HostDeviceVector, TestCopy) {
  size_t n = 1001;
  int n_devices = 2;
  auto distribution = GPUDistribution::Block(GPUSet::Range(0, n_devices));
  std::vector<size_t> starts{0, 501};
  std::vector<size_t> sizes{501, 500};
  SetCudaSetDeviceHandler(SetDevice);
  HostDeviceVector<int> v;
  {
    // a separate scope to ensure that v1 is gone before further checks
    HostDeviceVector<int> v1;
    InitHostDeviceVector(n, distribution, &v1);
    v = v1;
  }
  CheckDevice(&v, starts, sizes, 0, GPUAccess::kRead);
  PlusOne(&v);
  CheckDevice(&v, starts, sizes, 1, GPUAccess::kWrite);
  CheckHost(&v, GPUAccess::kRead);
  CheckHost(&v, GPUAccess::kWrite);
  SetCudaSetDeviceHandler(nullptr);
 }
 TEST(HostDeviceVector, Span) {
  HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
  vec.Reshard(GPUSet{0, 1});