/** * Copyright 2017-2024 by XGBoost contributors */ #ifdef XGBOOST_USE_SYCL // implementation of HostDeviceVector with sycl support #include #include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-W#pragma-messages" #include "xgboost/host_device_vector.h" #pragma GCC diagnostic pop #include "../device_manager.h" #include "../data.h" namespace xgboost { template class HostDeviceVectorImpl { using DeviceStorage = sycl::USMVector; public: explicit HostDeviceVectorImpl(size_t size, T v, DeviceOrd device) : device_(device) { if (device.IsSycl()) { device_access_ = GPUAccess::kWrite; SetDevice(); data_d_->Resize(qu_, size, v); } else { data_h_.resize(size, v); } } template HostDeviceVectorImpl(const Initializer& init, DeviceOrd device) : device_(device) { if (device.IsSycl()) { device_access_ = GPUAccess::kWrite; ResizeDevice(init.size()); Copy(init); } else { data_h_ = init; } } HostDeviceVectorImpl(HostDeviceVectorImpl&& that) : device_{that.device_}, data_h_{std::move(that.data_h_)}, data_d_{std::move(that.data_d_)}, device_access_{that.device_access_} {} std::vector& HostVector() { SyncHost(GPUAccess::kNone); return data_h_; } const std::vector& ConstHostVector() { SyncHost(GPUAccess::kRead); return data_h_; } void SetDevice(DeviceOrd device) { if (device_ == device) { return; } if (device_.IsSycl()) { SyncHost(GPUAccess::kNone); } if (device_.IsSycl() && device.IsSycl()) { CHECK_EQ(device_, device) << "New device is different from previous one."; } device_ = device; if (device_.IsSycl()) { ResizeDevice(data_h_.size()); } } template void Resize(size_t new_size, U&&... args) { if (new_size == Size()) { return; } if ((Size() == 0 && device_.IsSycl()) || (DeviceCanWrite() && device_.IsSycl())) { // fast on-device resize device_access_ = GPUAccess::kWrite; SetDevice(); auto old_size = data_d_->Size(); data_d_->Resize(qu_, new_size, std::forward(args)...); } else { // resize on host SyncHost(GPUAccess::kNone); auto old_size = data_h_.size(); data_h_.resize(new_size, std::forward(args)...); } } void SyncHost(GPUAccess access) { if (HostCanAccess(access)) { return; } if (HostCanRead()) { // data is present, just need to deny access to the device device_access_ = access; return; } device_access_ = access; if (data_h_.size() != data_d_->Size()) { data_h_.resize(data_d_->Size()); } SetDevice(); qu_->memcpy(data_h_.data(), data_d_->Data(), data_d_->Size() * sizeof(T)).wait(); } void SyncDevice(GPUAccess access) { if (DeviceCanAccess(access)) { return; } if (DeviceCanRead()) { device_access_ = access; return; } // data is on the host ResizeDevice(data_h_.size()); SetDevice(); qu_->memcpy(data_d_->Data(), data_h_.data(), data_d_->Size() * sizeof(T)).wait(); device_access_ = access; } bool HostCanAccess(GPUAccess access) const { return device_access_ <= access; } bool HostCanRead() const { return HostCanAccess(GPUAccess::kRead); } bool HostCanWrite() const { return HostCanAccess(GPUAccess::kNone); } bool DeviceCanAccess(GPUAccess access) const { return device_access_ >= access; } bool DeviceCanRead() const { return DeviceCanAccess(GPUAccess::kRead); } bool DeviceCanWrite() const { return DeviceCanAccess(GPUAccess::kWrite); } GPUAccess Access() const { return device_access_; } size_t Size() const { return HostCanRead() ? data_h_.size() : data_d_ ? data_d_->Size() : 0; } DeviceOrd Device() const { return device_; } T* DevicePointer() { SyncDevice(GPUAccess::kWrite); return data_d_->Data(); } const T* ConstDevicePointer() { SyncDevice(GPUAccess::kRead); return data_d_->DataConst(); } common::Span DeviceSpan() { SyncDevice(GPUAccess::kWrite); return {this->DevicePointer(), Size()}; } common::Span ConstDeviceSpan() { SyncDevice(GPUAccess::kRead); return {this->ConstDevicePointer(), Size()}; } void Fill(T v) { if (HostCanWrite()) { std::fill(data_h_.begin(), data_h_.end(), v); } else { device_access_ = GPUAccess::kWrite; SetDevice(); qu_->fill(data_d_->Data(), v, data_d_->Size()).wait(); } } void Copy(HostDeviceVectorImpl* other) { CHECK_EQ(Size(), other->Size()); SetDevice(other->device_); // Data is on host. if (HostCanWrite() && other->HostCanWrite()) { std::copy(other->data_h_.begin(), other->data_h_.end(), data_h_.begin()); return; } SetDevice(); CopyToDevice(other); } void Copy(const std::vector& other) { CHECK_EQ(Size(), other.size()); if (HostCanWrite()) { std::copy(other.begin(), other.end(), data_h_.begin()); } else { CopyToDevice(other.data()); } } void Copy(std::initializer_list other) { CHECK_EQ(Size(), other.size()); if (HostCanWrite()) { std::copy(other.begin(), other.end(), data_h_.begin()); } else { CopyToDevice(other.begin()); } } void Extend(HostDeviceVectorImpl* other) { auto ori_size = this->Size(); this->Resize(ori_size + other->Size(), T{}); if (HostCanWrite() && other->HostCanRead()) { auto& h_vec = this->HostVector(); auto& other_vec = other->HostVector(); CHECK_EQ(h_vec.size(), ori_size + other->Size()); std::copy(other_vec.cbegin(), other_vec.cend(), h_vec.begin() + ori_size); } else { auto ptr = other->ConstDevicePointer(); SetDevice(); CHECK_EQ(this->Device(), other->Device()); qu_->memcpy(this->DevicePointer() + ori_size, ptr, other->Size() * sizeof(T)).wait(); } } private: void ResizeDevice(size_t new_size) { if (data_d_ && new_size == data_d_->Size()) { return; } SetDevice(); data_d_->Resize(qu_, new_size); } void SetDevice() { if (!qu_) { qu_ = device_manager_.GetQueue(device_); } if (!data_d_) { data_d_.reset(new DeviceStorage()); } } void CopyToDevice(HostDeviceVectorImpl* other) { if (other->HostCanWrite()) { CopyToDevice(other->data_h_.data()); } else { ResizeDevice(Size()); device_access_ = GPUAccess::kWrite; SetDevice(); qu_->memcpy(data_d_->Data(), other->data_d_->Data(), data_d_->Size() * sizeof(T)).wait(); } } void CopyToDevice(const T* begin) { data_d_->ResizeNoCopy(qu_, Size()); qu_->memcpy(data_d_->Data(), begin, data_d_->Size() * sizeof(T)).wait(); device_access_ = GPUAccess::kWrite; } sycl::DeviceManager device_manager_; ::sycl::queue* qu_ = nullptr; DeviceOrd device_{DeviceOrd::CPU()}; std::vector data_h_{}; std::unique_ptr data_d_{}; GPUAccess device_access_{GPUAccess::kNone}; }; template HostDeviceVector::HostDeviceVector(size_t size, T v, DeviceOrd device) : impl_(nullptr) { impl_ = new HostDeviceVectorImpl(size, v, device); } template HostDeviceVector::HostDeviceVector(std::initializer_list init, DeviceOrd device) : impl_(nullptr) { impl_ = new HostDeviceVectorImpl(init, device); } template HostDeviceVector::HostDeviceVector(const std::vector& init, DeviceOrd device) : impl_(nullptr) { impl_ = new HostDeviceVectorImpl(init, device); } template HostDeviceVector::HostDeviceVector(HostDeviceVector&& that) { impl_ = new HostDeviceVectorImpl(std::move(*that.impl_)); } template HostDeviceVector& HostDeviceVector::operator=(HostDeviceVector&& that) { if (this == &that) { return *this; } std::unique_ptr> new_impl( new HostDeviceVectorImpl(std::move(*that.impl_))); delete impl_; impl_ = new_impl.release(); return *this; } template HostDeviceVector::~HostDeviceVector() { delete impl_; impl_ = nullptr; } template size_t HostDeviceVector::Size() const { return impl_->Size(); } template DeviceOrd HostDeviceVector::Device() const { return impl_->Device(); } template T* HostDeviceVector::DevicePointer() { return impl_->DevicePointer(); } template const T* HostDeviceVector::ConstDevicePointer() const { return impl_->ConstDevicePointer(); } template common::Span HostDeviceVector::DeviceSpan() { return impl_->DeviceSpan(); } template common::Span HostDeviceVector::ConstDeviceSpan() const { return impl_->ConstDeviceSpan(); } template std::vector& HostDeviceVector::HostVector() { return impl_->HostVector(); } template const std::vector& HostDeviceVector::ConstHostVector() const { return impl_->ConstHostVector(); } template void HostDeviceVector::Resize(size_t new_size, T v) { impl_->Resize(new_size, v); } template void HostDeviceVector::Resize(size_t new_size) { impl_->Resize(new_size); } template void HostDeviceVector::Fill(T v) { impl_->Fill(v); } template void HostDeviceVector::Copy(const HostDeviceVector& other) { impl_->Copy(other.impl_); } template void HostDeviceVector::Copy(const std::vector& other) { impl_->Copy(other); } template void HostDeviceVector::Copy(std::initializer_list other) { impl_->Copy(other); } template void HostDeviceVector::Extend(HostDeviceVector const& other) { impl_->Extend(other.impl_); } template bool HostDeviceVector::HostCanRead() const { return impl_->HostCanRead(); } template bool HostDeviceVector::HostCanWrite() const { return impl_->HostCanWrite(); } template bool HostDeviceVector::DeviceCanRead() const { return impl_->DeviceCanRead(); } template bool HostDeviceVector::DeviceCanWrite() const { return impl_->DeviceCanWrite(); } template GPUAccess HostDeviceVector::DeviceAccess() const { return impl_->Access(); } template void HostDeviceVector::SetDevice(DeviceOrd device) const { impl_->SetDevice(device); } // explicit instantiations are required, as HostDeviceVector isn't header-only template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; // bst_node_t template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; // bst_feature_t } // namespace xgboost #endif // XGBOOST_USE_SYCL