diff --git a/include/xgboost/data.h b/include/xgboost/data.h index 31525ee72..a3b870712 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -59,8 +59,25 @@ class MetaInfo { * can be used to specify initial prediction to boost from. */ HostDeviceVector base_margin_; + /*! \brief default constructor */ MetaInfo() = default; + MetaInfo& operator=(MetaInfo const& that) { + this->num_row_ = that.num_row_; + this->num_col_ = that.num_col_; + this->num_nonzero_ = that.num_nonzero_; + + this->labels_.Resize(that.labels_.Size()); + this->labels_.Copy(that.labels_); + + this->group_ptr_ = that.group_ptr_; + + this->weights_.Resize(that.weights_.Size()); + this->weights_.Copy(that.weights_); + this->base_margin_.Resize(that.base_margin_.Size()); + this->base_margin_.Copy(that.base_margin_); + return *this; + } /*! * \brief Get weight of each instances. * \param i Instance index. @@ -246,10 +263,10 @@ class SparsePage { /** * \brief Pushes external data batch onto this page * - * \tparam AdapterBatchT - * \param batch - * \param missing - * \param nthread + * \tparam AdapterBatchT + * \param batch + * \param missing + * \param nthread * * \return The maximum number of columns encountered in this input batch. Useful when pushing many adapter batches to work out the total number of columns. */ diff --git a/include/xgboost/host_device_vector.h b/include/xgboost/host_device_vector.h index e533d134b..7a2ea3d6d 100644 --- a/include/xgboost/host_device_vector.h +++ b/include/xgboost/host_device_vector.h @@ -88,8 +88,13 @@ class HostDeviceVector { HostDeviceVector(std::initializer_list init, int device = -1); explicit HostDeviceVector(const std::vector& init, int device = -1); ~HostDeviceVector(); - HostDeviceVector(const HostDeviceVector&); - HostDeviceVector& operator=(const HostDeviceVector&); + + HostDeviceVector(const HostDeviceVector&) = delete; + HostDeviceVector(HostDeviceVector&&); + + HostDeviceVector& operator=(const HostDeviceVector&) = delete; + HostDeviceVector& operator=(HostDeviceVector&&); + size_t Size() const; int DeviceIdx() const; common::Span DeviceSpan(); @@ -116,6 +121,7 @@ class HostDeviceVector { bool HostCanWrite() const; bool DeviceCanRead() const; bool DeviceCanWrite() const; + GPUAccess DeviceAccess() const; void SetDevice(int device) const; diff --git a/src/common/host_device_vector.cc b/src/common/host_device_vector.cc index db31b2fe9..1dd9997fa 100644 --- a/src/common/host_device_vector.cc +++ b/src/common/host_device_vector.cc @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "xgboost/host_device_vector.h" @@ -18,6 +19,7 @@ struct HostDeviceVectorImpl { explicit HostDeviceVectorImpl(size_t size, T v) : data_h_(size, v) {} HostDeviceVectorImpl(std::initializer_list init) : data_h_(init) {} explicit HostDeviceVectorImpl(std::vector init) : data_h_(std::move(init)) {} + HostDeviceVectorImpl(HostDeviceVectorImpl&& that) : data_h_(std::move(that.data_h_)) {} void Swap(HostDeviceVectorImpl &other) { data_h_.swap(other.data_h_); @@ -47,6 +49,22 @@ HostDeviceVector::HostDeviceVector(const std::vector& init, int device) impl_ = new HostDeviceVectorImpl(init); } +template +HostDeviceVector::HostDeviceVector(HostDeviceVector&& that) { + impl_ = new HostDeviceVectorImpl(std::move(*that.impl_)); +} + +template +HostDeviceVector& HostDeviceVector::operator=(HostDeviceVector&& that) { + if (this == &that) { return *this; } + + std::unique_ptr> new_impl( + new HostDeviceVectorImpl(std::move(*that.impl_))); + delete impl_; + impl_ = new_impl.release(); + return *this; +} + template HostDeviceVector::~HostDeviceVector() { delete impl_; @@ -54,21 +72,8 @@ HostDeviceVector::~HostDeviceVector() { } template -HostDeviceVector::HostDeviceVector(const HostDeviceVector& other) - : impl_(nullptr) { - impl_ = new HostDeviceVectorImpl(*other.impl_); -} - -template -HostDeviceVector& HostDeviceVector::operator=(const HostDeviceVector& other) { - if (this == &other) { - return *this; - } - - HostDeviceVectorImpl newInstance(*other.impl_); - newInstance.Swap(*impl_); - - return *this; +GPUAccess HostDeviceVector::DeviceAccess() const { + return kNone; } template diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu index b27d811f2..6180b6312 100644 --- a/src/common/host_device_vector.cu +++ b/src/common/host_device_vector.cu @@ -29,7 +29,7 @@ class HostDeviceVectorImpl { if (device >= 0) { gpu_access_ = GPUAccess::kWrite; SetDevice(); - data_d_.resize(size, v); + data_d_->resize(size, v); } else { data_h_.resize(size, v); } @@ -47,34 +47,40 @@ class HostDeviceVectorImpl { } } + HostDeviceVectorImpl(HostDeviceVectorImpl&& that) : + device_{that.device_}, + data_h_{std::move(that.data_h_)}, + data_d_{std::move(that.data_d_)}, + gpu_access_{that.gpu_access_} {} + ~HostDeviceVectorImpl() { if (device_ >= 0) { SetDevice(); } } - size_t Size() const { return HostCanRead() ? data_h_.size() : data_d_.size(); } + size_t Size() const { return HostCanRead() ? data_h_.size() : data_d_->size(); } int DeviceIdx() const { return device_; } T* DevicePointer() { LazySyncDevice(GPUAccess::kWrite); - return data_d_.data().get(); + return data_d_->data().get(); } const T* ConstDevicePointer() { LazySyncDevice(GPUAccess::kRead); - return data_d_.data().get(); + return data_d_->data().get(); } common::Span DeviceSpan() { LazySyncDevice(GPUAccess::kWrite); - return {data_d_.data().get(), Size()}; + return {data_d_->data().get(), Size()}; } common::Span ConstDeviceSpan() { LazySyncDevice(GPUAccess::kRead); - return {data_d_.data().get(), Size()}; + return {data_d_->data().get(), Size()}; } void Fill(T v) { // NOLINT @@ -83,17 +89,19 @@ class HostDeviceVectorImpl { } else { gpu_access_ = GPUAccess::kWrite; SetDevice(); - thrust::fill(data_d_.begin(), data_d_.end(), v); + thrust::fill(data_d_->begin(), data_d_->end(), v); } } void Copy(HostDeviceVectorImpl* other) { CHECK_EQ(Size(), other->Size()); + SetDevice(other->device_); // Data is on host. if (HostCanWrite() && other->HostCanWrite()) { std::copy(other->data_h_.begin(), other->data_h_.end(), data_h_.begin()); return; } + SetDevice(); CopyToDevice(other); } @@ -138,11 +146,11 @@ class HostDeviceVectorImpl { void Resize(size_t new_size, T v) { if (new_size == Size()) { return; } - if (Size() == 0 && device_ >= 0) { + if ((Size() == 0 && device_ >= 0) || (DeviceCanWrite() && device_ >= 0)) { // fast on-device resize gpu_access_ = GPUAccess::kWrite; SetDevice(); - data_d_.resize(new_size, v); + data_d_->resize(new_size, v); } else { // resize on host LazySyncHost(GPUAccess::kNone); @@ -158,11 +166,11 @@ class HostDeviceVectorImpl { return; } gpu_access_ = access; - if (data_h_.size() != data_d_.size()) { data_h_.resize(data_d_.size()); } + if (data_h_.size() != data_d_->size()) { data_h_.resize(data_d_->size()); } SetDevice(); dh::safe_cuda(cudaMemcpy(data_h_.data(), - data_d_.data().get(), - data_d_.size() * sizeof(T), + data_d_->data().get(), + data_d_->size() * sizeof(T), cudaMemcpyDeviceToHost)); } @@ -176,9 +184,9 @@ class HostDeviceVectorImpl { // data is on the host LazyResizeDevice(data_h_.size()); SetDevice(); - dh::safe_cuda(cudaMemcpy(data_d_.data().get(), + dh::safe_cuda(cudaMemcpy(data_d_->data().get(), data_h_.data(), - data_d_.size() * sizeof(T), + data_d_->size() * sizeof(T), cudaMemcpyHostToDevice)); gpu_access_ = access; } @@ -189,11 +197,12 @@ class HostDeviceVectorImpl { bool DeviceCanAccess(GPUAccess access) const { return gpu_access_ >= access; } bool DeviceCanRead() const { return DeviceCanAccess(GPUAccess::kRead); } bool DeviceCanWrite() const { return DeviceCanAccess(GPUAccess::kWrite); } + GPUAccess Access() const { return gpu_access_; } private: int device_{-1}; std::vector data_h_{}; - dh::device_vector data_d_{}; + std::unique_ptr> data_d_{}; GPUAccess gpu_access_{GPUAccess::kNone}; void CopyToDevice(HostDeviceVectorImpl* other) { @@ -203,8 +212,8 @@ class HostDeviceVectorImpl { LazyResizeDevice(Size()); gpu_access_ = GPUAccess::kWrite; SetDevice(); - dh::safe_cuda(cudaMemcpyAsync(data_d_.data().get(), other->data_d_.data().get(), - data_d_.size() * sizeof(T), cudaMemcpyDefault)); + dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(), other->data_d_->data().get(), + data_d_->size() * sizeof(T), cudaMemcpyDefault)); } } @@ -212,14 +221,14 @@ class HostDeviceVectorImpl { LazyResizeDevice(Size()); gpu_access_ = GPUAccess::kWrite; SetDevice(); - dh::safe_cuda(cudaMemcpyAsync(data_d_.data().get(), begin, - data_d_.size() * sizeof(T), cudaMemcpyDefault)); + dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(), begin, + data_d_->size() * sizeof(T), cudaMemcpyDefault)); } void LazyResizeDevice(size_t new_size) { - if (new_size == data_d_.size()) { return; } + if (data_d_ && new_size == data_d_->size()) { return; } SetDevice(); - data_d_.resize(new_size); + data_d_->resize(new_size); } void SetDevice() { @@ -229,6 +238,10 @@ class HostDeviceVectorImpl { } else { (*cudaSetDeviceHandler)(device_); } + + if (!data_d_) { + data_d_.reset(new dh::device_vector); + } } }; @@ -245,16 +258,17 @@ HostDeviceVector::HostDeviceVector(const std::vector& init, int device) : impl_(new HostDeviceVectorImpl(init, device)) {} template -HostDeviceVector::HostDeviceVector(const HostDeviceVector& other) - : impl_(new HostDeviceVectorImpl(*other.impl_)) {} +HostDeviceVector::HostDeviceVector(HostDeviceVector&& other) + : impl_(new HostDeviceVectorImpl(std::move(*other.impl_))) {} template -HostDeviceVector& HostDeviceVector::operator=(const HostDeviceVector& other) { +HostDeviceVector& HostDeviceVector::operator=(HostDeviceVector&& other) { if (this == &other) { return *this; } - std::unique_ptr> newImpl(new HostDeviceVectorImpl(*other.impl_)); + std::unique_ptr> new_impl( + new HostDeviceVectorImpl(std::move(*other.impl_))); delete impl_; - impl_ = newImpl.release(); + impl_ = new_impl.release(); return *this; } @@ -338,6 +352,11 @@ bool HostDeviceVector::DeviceCanWrite() const { return impl_->DeviceCanWrite(); } +template +GPUAccess HostDeviceVector::DeviceAccess() const { + return impl_->Access(); +} + template void HostDeviceVector::SetDevice(int device) const { impl_->SetDevice(device); diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu index 92baa4ebb..1ff8d796d 100644 --- a/src/predictor/gpu_predictor.cu +++ b/src/predictor/gpu_predictor.cu @@ -339,6 +339,7 @@ class GPUPredictor : public xgboost::Predictor { // the first step only modifies prediction store in learner without following code. InitOutPredictions(cache_emtry->second.data->Info(), &(cache_emtry->second.predictions), model); + CHECK_EQ(cache_emtry->second.predictions.Size(), out_preds->Size()); cache_emtry->second.predictions.Copy(*out_preds); } } diff --git a/tests/cpp/common/test_host_device_vector.cu b/tests/cpp/common/test_host_device_vector.cu index cb328dfdd..777f8f7c2 100644 --- a/tests/cpp/common/test_host_device_vector.cu +++ b/tests/cpp/common/test_host_device_vector.cu @@ -60,6 +60,7 @@ void PlusOne(HostDeviceVector *v) { SetDevice(device); thrust::transform(dh::tcbegin(*v), dh::tcend(*v), dh::tbegin(*v), [=]__device__(unsigned int a){ return a + 1; }); + ASSERT_TRUE(v->DeviceCanWrite()); } void CheckDevice(HostDeviceVector* v, @@ -125,7 +126,8 @@ TEST(HostDeviceVector, Copy) { // a separate scope to ensure that v1 is gone before further checks HostDeviceVector v1; InitHostDeviceVector(n, device, &v1); - v = v1; + v.Resize(v1.Size()); + v.Copy(v1); } CheckDevice(&v, n, 0, GPUAccess::kRead); PlusOne(&v); diff --git a/tests/cpp/common/test_random.cc b/tests/cpp/common/test_random.cc index 03f6251d5..dc7b38554 100644 --- a/tests/cpp/common/test_random.cc +++ b/tests/cpp/common/test_random.cc @@ -11,26 +11,26 @@ TEST(ColumnSampler, Test) { // No node sampling cs.Init(n, 1.0f, 0.5f, 0.5f); - auto set0 = *cs.GetFeatureSet(0); - ASSERT_EQ(set0.Size(), 32); + auto set0 = cs.GetFeatureSet(0); + ASSERT_EQ(set0->Size(), 32); - auto set1 = *cs.GetFeatureSet(0); + auto set1 = cs.GetFeatureSet(0); - ASSERT_EQ(set0.HostVector(), set1.HostVector()); + ASSERT_EQ(set0->HostVector(), set1->HostVector()); - auto set2 = *cs.GetFeatureSet(1); - ASSERT_NE(set1.HostVector(), set2.HostVector()); - ASSERT_EQ(set2.Size(), 32); + auto set2 = cs.GetFeatureSet(1); + ASSERT_NE(set1->HostVector(), set2->HostVector()); + ASSERT_EQ(set2->Size(), 32); // Node sampling cs.Init(n, 0.5f, 1.0f, 0.5f); - auto set3 = *cs.GetFeatureSet(0); - ASSERT_EQ(set3.Size(), 32); + auto set3 = cs.GetFeatureSet(0); + ASSERT_EQ(set3->Size(), 32); - auto set4 = *cs.GetFeatureSet(0); + auto set4 = cs.GetFeatureSet(0); - ASSERT_NE(set3.HostVector(), set4.HostVector()); - ASSERT_EQ(set4.Size(), 32); + ASSERT_NE(set3->HostVector(), set4->HostVector()); + ASSERT_EQ(set4->Size(), 32); // No level or node sampling, should be the same at different depth cs.Init(n, 1.0f, 1.0f, 0.5f); @@ -38,11 +38,11 @@ TEST(ColumnSampler, Test) { cs.GetFeatureSet(1)->HostVector()); cs.Init(n, 1.0f, 1.0f, 1.0f); - auto set5 = *cs.GetFeatureSet(0); - ASSERT_EQ(set5.Size(), n); + auto set5 = cs.GetFeatureSet(0); + ASSERT_EQ(set5->Size(), n); cs.Init(n, 1.0f, 1.0f, 1.0f); - auto set6 = *cs.GetFeatureSet(0); - ASSERT_EQ(set5.HostVector(), set6.HostVector()); + auto set6 = cs.GetFeatureSet(0); + ASSERT_EQ(set5->HostVector(), set6->HostVector()); // Should always be a minimum of one feature cs.Init(n, 1e-16f, 1e-16f, 1e-16f);