/*! * Copyright 2017-2019 XGBoost contributors */ /** * @file host_device_vector.h * @brief A device-and-host vector abstraction layer. * * Why HostDeviceVector?
* With CUDA, one has to explicitly manage memory through 'cudaMemcpy' calls. * This wrapper class hides this management from the users, thereby making it * easy to integrate GPU/CPU usage under a single interface. * * Initialization/Allocation:
* One can choose to initialize the vector on CPU or GPU during constructor. * (use the 'devices' argument) Or, can choose to use the 'Resize' method to * allocate/resize memory explicitly, and use the 'SetDevice' method * to specify the device. * * Accessing underlying data:
* Use 'HostVector' method to explicitly query for the underlying std::vector. * If you need the raw device pointer, use the 'DevicePointer' method. For perf * implications of these calls, see below. * * Accessing underling data and their perf implications:
* There are 4 scenarios to be considered here: * HostVector and data on CPU --> no problems, std::vector returned immediately * HostVector but data on GPU --> this causes a cudaMemcpy to be issued internally. * subsequent calls to HostVector, will NOT incur this penalty. * (assuming 'DevicePointer' is not called in between) * DevicePointer but data on CPU --> this causes a cudaMemcpy to be issued internally. * subsequent calls to DevicePointer, will NOT incur this penalty. * (assuming 'HostVector' is not called in between) * DevicePointer and data on GPU --> no problems, the device ptr * will be returned immediately. * * What if xgboost is compiled without CUDA?
* In that case, there's a special implementation which always falls-back to * working with std::vector. This logic can be found in host_device_vector.cc * * Why not consider CUDA unified memory?
* We did consider. However, it poses complications if we need to support both * compiling with and without CUDA toolkit. It was easier to have * 'HostDeviceVector' with a special-case implementation in host_device_vector.cc * * @note: Size and Devices methods are thread-safe. */ #ifndef XGBOOST_HOST_DEVICE_VECTOR_H_ #define XGBOOST_HOST_DEVICE_VECTOR_H_ #include #include #include "span.h" namespace xgboost { #ifdef __CUDACC__ // Sets a function to call instead of cudaSetDevice(); // only added for testing void SetCudaSetDeviceHandler(void (*handler)(int)); #endif // __CUDACC__ template struct HostDeviceVectorImpl; /*! * \brief Controls data access from the GPU. * * Since a `HostDeviceVector` can have data on both the host and device, access control needs to be * maintained to keep the data consistent. * * There are 3 scenarios supported: * - Data is being manipulated on device. GPU has write access, host doesn't have access. * - Data is read-only on both the host and device. * - Data is being manipulated on the host. Host has write access, device doesn't have access. */ enum GPUAccess { kNone, kRead, // write implies read kWrite }; template class HostDeviceVector { public: explicit HostDeviceVector(size_t size = 0, T v = T(), int device = -1); HostDeviceVector(std::initializer_list init, int device = -1); explicit HostDeviceVector(const std::vector& init, int device = -1); ~HostDeviceVector(); HostDeviceVector(const HostDeviceVector&) = delete; HostDeviceVector(HostDeviceVector&&); HostDeviceVector& operator=(const HostDeviceVector&) = delete; HostDeviceVector& operator=(HostDeviceVector&&); size_t Size() const; int DeviceIdx() const; common::Span DeviceSpan(); common::Span ConstDeviceSpan() const; common::Span DeviceSpan() const { return ConstDeviceSpan(); } T* DevicePointer(); const T* ConstDevicePointer() const; const T* DevicePointer() const { return ConstDevicePointer(); } T* HostPointer() { return HostVector().data(); } common::Span HostSpan() { return common::Span{HostVector()}; } common::Span HostSpan() const { return common::Span{HostVector()}; } common::Span ConstHostSpan() const { return HostSpan(); } const T* ConstHostPointer() const { return ConstHostVector().data(); } const T* HostPointer() const { return ConstHostPointer(); } void Fill(T v); void Copy(const HostDeviceVector& other); void Copy(const std::vector& other); void Copy(std::initializer_list other); std::vector& HostVector(); const std::vector& ConstHostVector() const; const std::vector& HostVector() const {return ConstHostVector(); } bool HostCanRead() const; bool HostCanWrite() const; bool DeviceCanRead() const; bool DeviceCanWrite() const; GPUAccess DeviceAccess() const; void SetDevice(int device) const; void Resize(size_t new_size, T v = T()); using value_type = T; // NOLINT private: HostDeviceVectorImpl* impl_; }; } // namespace xgboost #endif // XGBOOST_HOST_DEVICE_VECTOR_H_