Fixed copy constructor for HostDeviceVectorImpl. (#3657)
- previously, vec_ in DeviceShard wasn't updated on copy; as a result, the shards continued to refer to the old HostDeviceVectorImpl object, which resulted in a dangling pointer once that object was deallocated
This commit is contained in:
parent
86d88c0758
commit
dee0b69674
@ -58,6 +58,19 @@ struct HostDeviceVectorImpl {
|
||||
perm_d_ = vec_->perm_h_.Complementary();
|
||||
}
|
||||
|
||||
void Init(HostDeviceVectorImpl<T>* vec, const DeviceShard& other) {
|
||||
if (vec_ == nullptr) { vec_ = vec; }
|
||||
CHECK_EQ(vec, vec_);
|
||||
device_ = other.device_;
|
||||
index_ = other.index_;
|
||||
cached_size_ = other.cached_size_;
|
||||
start_ = other.start_;
|
||||
proper_size_ = other.proper_size_;
|
||||
SetDevice();
|
||||
data_.resize(other.data_.size());
|
||||
perm_d_ = other.perm_d_;
|
||||
}
|
||||
|
||||
void ScatterFrom(const T* begin) {
|
||||
// TODO(canonizer): avoid full copy of host data
|
||||
LazySyncDevice(GPUAccess::kWrite);
|
||||
@ -166,7 +179,12 @@ struct HostDeviceVectorImpl {
|
||||
// required, as a new std::mutex has to be created
|
||||
HostDeviceVectorImpl(const HostDeviceVectorImpl<T>& other)
|
||||
: data_h_(other.data_h_), perm_h_(other.perm_h_), size_d_(other.size_d_),
|
||||
distribution_(other.distribution_), mutex_(), shards_(other.shards_) {}
|
||||
distribution_(other.distribution_), mutex_() {
|
||||
shards_.resize(other.shards_.size());
|
||||
dh::ExecuteIndexShards(&shards_, [&](int i, DeviceShard& shard) {
|
||||
shard.Init(this, other.shards_[i]);
|
||||
});
|
||||
}
|
||||
|
||||
// Init can be std::vector<T> or std::initializer_list<T>
|
||||
template <class Init>
|
||||
|
||||
@ -155,6 +155,29 @@ TEST(HostDeviceVector, TestExplicit) {
|
||||
TestHostDeviceVector(n, distribution, starts, sizes);
|
||||
}
|
||||
|
||||
TEST(HostDeviceVector, TestCopy) {
|
||||
size_t n = 1001;
|
||||
int n_devices = 2;
|
||||
auto distribution = GPUDistribution::Block(GPUSet::Range(0, n_devices));
|
||||
std::vector<size_t> starts{0, 501};
|
||||
std::vector<size_t> sizes{501, 500};
|
||||
SetCudaSetDeviceHandler(SetDevice);
|
||||
|
||||
HostDeviceVector<int> v;
|
||||
{
|
||||
// a separate scope to ensure that v1 is gone before further checks
|
||||
HostDeviceVector<int> v1;
|
||||
InitHostDeviceVector(n, distribution, &v1);
|
||||
v = v1;
|
||||
}
|
||||
CheckDevice(&v, starts, sizes, 0, GPUAccess::kRead);
|
||||
PlusOne(&v);
|
||||
CheckDevice(&v, starts, sizes, 1, GPUAccess::kWrite);
|
||||
CheckHost(&v, GPUAccess::kRead);
|
||||
CheckHost(&v, GPUAccess::kWrite);
|
||||
SetCudaSetDeviceHandler(nullptr);
|
||||
}
|
||||
|
||||
TEST(HostDeviceVector, Span) {
|
||||
HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
|
||||
vec.Reshard(GPUSet{0, 1});
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user