Fixed copy constructor for HostDeviceVectorImpl. (#3657)
- previously, vec_ in DeviceShard wasn't updated on copy; as a result, the shards continued to refer to the old HostDeviceVectorImpl object, which resulted in a dangling pointer once that object was deallocated
This commit is contained in:
parent
86d88c0758
commit
dee0b69674
@ -58,6 +58,19 @@ struct HostDeviceVectorImpl {
|
|||||||
perm_d_ = vec_->perm_h_.Complementary();
|
perm_d_ = vec_->perm_h_.Complementary();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Init(HostDeviceVectorImpl<T>* vec, const DeviceShard& other) {
|
||||||
|
if (vec_ == nullptr) { vec_ = vec; }
|
||||||
|
CHECK_EQ(vec, vec_);
|
||||||
|
device_ = other.device_;
|
||||||
|
index_ = other.index_;
|
||||||
|
cached_size_ = other.cached_size_;
|
||||||
|
start_ = other.start_;
|
||||||
|
proper_size_ = other.proper_size_;
|
||||||
|
SetDevice();
|
||||||
|
data_.resize(other.data_.size());
|
||||||
|
perm_d_ = other.perm_d_;
|
||||||
|
}
|
||||||
|
|
||||||
void ScatterFrom(const T* begin) {
|
void ScatterFrom(const T* begin) {
|
||||||
// TODO(canonizer): avoid full copy of host data
|
// TODO(canonizer): avoid full copy of host data
|
||||||
LazySyncDevice(GPUAccess::kWrite);
|
LazySyncDevice(GPUAccess::kWrite);
|
||||||
@ -166,7 +179,12 @@ struct HostDeviceVectorImpl {
|
|||||||
// required, as a new std::mutex has to be created
|
// required, as a new std::mutex has to be created
|
||||||
HostDeviceVectorImpl(const HostDeviceVectorImpl<T>& other)
|
HostDeviceVectorImpl(const HostDeviceVectorImpl<T>& other)
|
||||||
: data_h_(other.data_h_), perm_h_(other.perm_h_), size_d_(other.size_d_),
|
: data_h_(other.data_h_), perm_h_(other.perm_h_), size_d_(other.size_d_),
|
||||||
distribution_(other.distribution_), mutex_(), shards_(other.shards_) {}
|
distribution_(other.distribution_), mutex_() {
|
||||||
|
shards_.resize(other.shards_.size());
|
||||||
|
dh::ExecuteIndexShards(&shards_, [&](int i, DeviceShard& shard) {
|
||||||
|
shard.Init(this, other.shards_[i]);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Init can be std::vector<T> or std::initializer_list<T>
|
// Init can be std::vector<T> or std::initializer_list<T>
|
||||||
template <class Init>
|
template <class Init>
|
||||||
|
|||||||
@ -155,6 +155,29 @@ TEST(HostDeviceVector, TestExplicit) {
|
|||||||
TestHostDeviceVector(n, distribution, starts, sizes);
|
TestHostDeviceVector(n, distribution, starts, sizes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(HostDeviceVector, TestCopy) {
|
||||||
|
size_t n = 1001;
|
||||||
|
int n_devices = 2;
|
||||||
|
auto distribution = GPUDistribution::Block(GPUSet::Range(0, n_devices));
|
||||||
|
std::vector<size_t> starts{0, 501};
|
||||||
|
std::vector<size_t> sizes{501, 500};
|
||||||
|
SetCudaSetDeviceHandler(SetDevice);
|
||||||
|
|
||||||
|
HostDeviceVector<int> v;
|
||||||
|
{
|
||||||
|
// a separate scope to ensure that v1 is gone before further checks
|
||||||
|
HostDeviceVector<int> v1;
|
||||||
|
InitHostDeviceVector(n, distribution, &v1);
|
||||||
|
v = v1;
|
||||||
|
}
|
||||||
|
CheckDevice(&v, starts, sizes, 0, GPUAccess::kRead);
|
||||||
|
PlusOne(&v);
|
||||||
|
CheckDevice(&v, starts, sizes, 1, GPUAccess::kWrite);
|
||||||
|
CheckHost(&v, GPUAccess::kRead);
|
||||||
|
CheckHost(&v, GPUAccess::kWrite);
|
||||||
|
SetCudaSetDeviceHandler(nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(HostDeviceVector, Span) {
|
TEST(HostDeviceVector, Span) {
|
||||||
HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
|
HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
|
||||||
vec.Reshard(GPUSet{0, 1});
|
vec.Reshard(GPUSet{0, 1});
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user