diff --git a/src/common/common.cc b/src/common/common.cc index fdada302d..c9899bc99 100644 --- a/src/common/common.cc +++ b/src/common/common.cc @@ -20,4 +20,11 @@ GlobalRandomEngine& GlobalRandom() { return RandomThreadLocalStore::Get()->engine; } } // namespace common + +#if !defined(XGBOOST_USE_CUDA) +int AllVisibleImpl::AllVisible() { + return 0; +} +#endif + } // namespace xgboost diff --git a/src/common/common.cu b/src/common/common.cu new file mode 100644 index 000000000..bffc69486 --- /dev/null +++ b/src/common/common.cu @@ -0,0 +1,18 @@ +/*! + * Copyright 2018 XGBoost contributors + */ +#include "common.h" + +namespace xgboost { + +int AllVisibleImpl::AllVisible() { + int n_visgpus = 0; + try { + dh::safe_cuda(cudaGetDeviceCount(&n_visgpus)); + } catch(const std::exception& e) { + return 0; + } + return n_visgpus; +} + +} // namespace xgboost diff --git a/src/common/common.h b/src/common/common.h index 0ead15c3f..ead260000 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -7,11 +7,39 @@ #define XGBOOST_COMMON_COMMON_H_ #include +#include +#include +#include +#include #include #include #include +#if defined(__CUDACC__) +#include +#include +#endif + +namespace dh { +#if defined(__CUDACC__) +/* + * Error handling functions + */ +#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__) + +inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file, + int line) { + if (code != cudaSuccess) { + throw thrust::system_error(code, thrust::cuda_category(), + std::string{file} + "(" + // NOLINT + std::to_string(line) + ")"); + } + return code; +} +#endif +} // namespace dh + namespace xgboost { namespace common { /*! @@ -103,5 +131,87 @@ class Range { }; } // namespace common +struct AllVisibleImpl { + static int AllVisible(); +}; +/* \brief set of devices across which HostDeviceVector can be distributed. + * + * Currently implemented as a range, but can be changed later to something else, + * e.g. a bitset + */ +class GPUSet { + public: + explicit GPUSet(int start = 0, int ndevices = 0) + : devices_(start, start + ndevices) {} + + static GPUSet Empty() { return GPUSet(); } + + static GPUSet Range(int start, int ndevices) { + return ndevices <= 0 ? Empty() : GPUSet{start, ndevices}; + } + /*! \brief ndevices and num_rows both are upper bounds. */ + static GPUSet All(int ndevices, int num_rows = std::numeric_limits::max()) { + int n_devices_visible = AllVisible().Size(); + if (ndevices < 0 || ndevices > n_devices_visible) { + ndevices = n_devices_visible; + } + // fix-up device number to be limited by number of rows + ndevices = ndevices > num_rows ? num_rows : ndevices; + return Range(0, ndevices); + } + static GPUSet AllVisible() { + int n = AllVisibleImpl::AllVisible(); + return Range(0, n); + } + /*! \brief Ensure gpu_id is correct, so not dependent upon user knowing details */ + static int GetDeviceIdx(int gpu_id) { + auto devices = AllVisible(); + CHECK(!devices.IsEmpty()) << "Empty device."; + return (std::abs(gpu_id) + 0) % devices.Size(); + } + /*! \brief Counting from gpu_id */ + GPUSet Normalised(int gpu_id) const { + return Range(gpu_id, Size()); + } + /*! \brief Counting from 0 */ + GPUSet Unnormalised() const { + return Range(0, Size()); + } + + int Size() const { + int res = *devices_.end() - *devices_.begin(); + return res < 0 ? 0 : res; + } + /*! \brief Get normalised device id. */ + int operator[](int index) const { + CHECK(index >= 0 && index < Size()); + return *devices_.begin() + index; + } + + bool IsEmpty() const { return Size() == 0; } + /*! \brief Get un-normalised index. */ + int Index(int device) const { + CHECK(Contains(device)); + return device - *devices_.begin(); + } + + bool Contains(int device) const { + return *devices_.begin() <= device && device < *devices_.end(); + } + + common::Range::Iterator begin() const { return devices_.begin(); } // NOLINT + common::Range::Iterator end() const { return devices_.end(); } // NOLINT + + friend bool operator==(const GPUSet& lhs, const GPUSet& rhs) { + return lhs.devices_ == rhs.devices_; + } + friend bool operator!=(const GPUSet& lhs, const GPUSet& rhs) { + return !(lhs == rhs); + } + + private: + common::Range devices_; +}; + } // namespace xgboost #endif // XGBOOST_COMMON_COMMON_H_ diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh index 844edad59..c954fcac3 100644 --- a/src/common/device_helpers.cuh +++ b/src/common/device_helpers.cuh @@ -9,7 +9,6 @@ #include #include "common.h" -#include "gpu_set.h" #include #include diff --git a/src/common/gpu_set.h b/src/common/gpu_set.h deleted file mode 100644 index ed9b595a1..000000000 --- a/src/common/gpu_set.h +++ /dev/null @@ -1,122 +0,0 @@ -/*! - * Copyright 2018 XGBoost contributors - */ -#ifndef XGBOOST_COMMON_GPU_SET_H_ -#define XGBOOST_COMMON_GPU_SET_H_ - -#include -#include - -#include -#include - -#include "common.h" -#include "span.h" - -#if defined(__CUDACC__) -#include -#include -#endif - -namespace dh { -#if defined(__CUDACC__) -/* - * Error handling functions - */ -#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__) - -inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file, - int line) { - if (code != cudaSuccess) { - throw thrust::system_error(code, thrust::cuda_category(), - std::string{file} + "(" + // NOLINT - std::to_string(line) + ")"); - } - return code; -} -#endif -} // namespace dh - -namespace xgboost { - -/* \brief set of devices across which HostDeviceVector can be distributed. - * - * Currently implemented as a range, but can be changed later to something else, - * e.g. a bitset - */ -class GPUSet { - public: - explicit GPUSet(int start = 0, int ndevices = 0) - : devices_(start, start + ndevices) {} - - static GPUSet Empty() { return GPUSet(); } - - static GPUSet Range(int start, int ndevices) { - return ndevices <= 0 ? Empty() : GPUSet{start, ndevices}; - } - /* \brief ndevices and num_rows both are upper bounds. */ - static GPUSet All(int ndevices, int num_rows = std::numeric_limits::max()) { - int n_devices_visible = AllVisible().Size(); - ndevices = ndevices < 0 ? n_devices_visible : ndevices; - // fix-up device number to be limited by number of rows - ndevices = ndevices > num_rows ? num_rows : ndevices; - return Range(0, ndevices); - } - - static GPUSet AllVisible() { - int n_visgpus = 0; -#if defined(__CUDACC__) - dh::safe_cuda(cudaGetDeviceCount(&n_visgpus)); -#endif - return Range(0, n_visgpus); - } - /* \brief Ensure gpu_id is correct, so not dependent upon user knowing details */ - static int GetDeviceIdx(int gpu_id) { - return (std::abs(gpu_id) + 0) % AllVisible().Size(); - } - /* \brief Counting from gpu_id */ - GPUSet Normalised(int gpu_id) const { - return Range(gpu_id, *devices_.end() + gpu_id); - } - /* \brief Counting from 0 */ - GPUSet Unnormalised() const { - return Range(0, *devices_.end() - *devices_.begin()); - } - - int Size() const { - int res = *devices_.end() - *devices_.begin(); - return res < 0 ? 0 : res; - } - - int operator[](int index) const { - CHECK(index >= 0 && index < *(devices_.end())); - return *devices_.begin() + index; - } - - bool IsEmpty() const { return Size() == 0; } // NOLINT - - int Index(int device) const { - CHECK(Contains(device)); - return device - *devices_.begin(); - } - - bool Contains(int device) const { - return *devices_.begin() <= device && device < *devices_.end(); - } - - common::Range::Iterator begin() const { return devices_.begin(); } // NOLINT - common::Range::Iterator end() const { return devices_.end(); } // NOLINT - - friend bool operator==(const GPUSet& lhs, const GPUSet& rhs) { - return lhs.devices_ == rhs.devices_; - } - friend bool operator!=(const GPUSet& lhs, const GPUSet& rhs) { - return !(lhs == rhs); - } - - private: - common::Range devices_; -}; -} // namespace xgboost - -#endif // XGBOOST_COMMON_GPU_SET_H_ diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu index 358e9ecdd..179b1ecbd 100644 --- a/src/common/host_device_vector.cu +++ b/src/common/host_device_vector.cu @@ -230,7 +230,7 @@ struct HostDeviceVectorImpl { CHECK(devices.Contains(device)); LazySyncDevice(device, GPUAccess::kWrite); return {shards_[devices.Index(device)].data_.data().get(), - static_cast::index_type>(Size())}; + static_cast::index_type>(DeviceSize(device))}; } common::Span ConstDeviceSpan(int device) { @@ -238,7 +238,7 @@ struct HostDeviceVectorImpl { CHECK(devices.Contains(device)); LazySyncDevice(device, GPUAccess::kRead); return {shards_[devices.Index(device)].data_.data().get(), - static_cast::index_type>(Size())}; + static_cast::index_type>(DeviceSize(device))}; } size_t DeviceSize(int device) { diff --git a/src/common/host_device_vector.h b/src/common/host_device_vector.h index a3ef3082b..3b62bda1b 100644 --- a/src/common/host_device_vector.h +++ b/src/common/host_device_vector.h @@ -14,7 +14,7 @@ * Initialization/Allocation:
* One can choose to initialize the vector on CPU or GPU during constructor. * (use the 'devices' argument) Or, can choose to use the 'Resize' method to - * allocate/resize memory explicitly, and use the 'Reshard' method + * allocate/resize memory explicitly, and use the 'Reshard' method * to specify the devices. * * Accessing underlying data:
@@ -31,7 +31,7 @@ * DevicePointer but data on CPU --> this causes a cudaMemcpy to be issued internally. * subsequent calls to DevicePointer, will NOT incur this penalty. * (assuming 'HostVector' is not called in between) - * DevicePointer and data on GPU --> no problems, the device ptr + * DevicePointer and data on GPU --> no problems, the device ptr * will be returned immediately. * * What if xgboost is compiled without CUDA?
@@ -40,13 +40,13 @@ * * Why not consider CUDA unified memory?
* We did consider. However, it poses complications if we need to support both - * compiling with and without CUDA toolkit. It was easier to have + * compiling with and without CUDA toolkit. It was easier to have * 'HostDeviceVector' with a special-case implementation in host_device_vector.cc * * @note: Size and Devices methods are thread-safe. - * DevicePointer, DeviceStart, DeviceSize, tbegin and tend methods are thread-safe + * DevicePointer, DeviceStart, DeviceSize, tbegin and tend methods are thread-safe * if different threads call these methods with different values of the device argument. - * All other methods are not thread safe. + * All other methods are not thread safe. */ #ifndef XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_ @@ -59,7 +59,7 @@ #include #include -#include "gpu_set.h" +#include "common.h" #include "span.h" // only include thrust-related files if host_device_vector.h diff --git a/src/common/timer.h b/src/common/timer.h index 22c0cf31a..4a7f3cf87 100644 --- a/src/common/timer.h +++ b/src/common/timer.h @@ -8,7 +8,7 @@ #include #include -#include "gpu_set.h" +#include "common.h" namespace xgboost { namespace common { diff --git a/src/linear/updater_gpu_coordinate.cu b/src/linear/updater_gpu_coordinate.cu index fe1f5b5fc..a05ddcba2 100644 --- a/src/linear/updater_gpu_coordinate.cu +++ b/src/linear/updater_gpu_coordinate.cu @@ -6,7 +6,7 @@ #include #include #include -#include "../common/gpu_set.h" +#include "../common/common.h" #include "../common/device_helpers.cuh" #include "../common/timer.h" #include "coordinate_common.h" diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu index b59564f96..4c11643c6 100644 --- a/src/predictor/gpu_predictor.cu +++ b/src/predictor/gpu_predictor.cu @@ -11,7 +11,7 @@ #include #include #include -#include "../common/gpu_set.h" +#include "../common/common.h" #include "../common/device_helpers.cuh" #include "../common/host_device_vector.h" diff --git a/src/tree/updater_gpu.cu b/src/tree/updater_gpu.cu index e92c3545e..05368a82b 100644 --- a/src/tree/updater_gpu.cu +++ b/src/tree/updater_gpu.cu @@ -4,7 +4,7 @@ #include #include #include -#include "../common/gpu_set.h" +#include "../common/common.h" #include "param.h" #include "updater_gpu_common.cuh" diff --git a/tests/cpp/common/test_gpuset.cc b/tests/cpp/common/test_common.cc similarity index 81% rename from tests/cpp/common/test_gpuset.cc rename to tests/cpp/common/test_common.cc index 3d74ba270..655e9a000 100644 --- a/tests/cpp/common/test_gpuset.cc +++ b/tests/cpp/common/test_common.cc @@ -1,8 +1,7 @@ -#include "../../../src/common/gpu_set.h" +#include "../../../src/common/common.h" #include namespace xgboost { - TEST(GPUSet, Basic) { GPUSet devices = GPUSet::Empty(); ASSERT_TRUE(devices.IsEmpty()); @@ -28,10 +27,16 @@ TEST(GPUSet, Basic) { devices = GPUSet::Range(2, 8); EXPECT_EQ(devices.Size(), 8); + EXPECT_ANY_THROW(devices[8]); + EXPECT_ANY_THROW(devices.Index(0)); + devices = devices.Unnormalised(); EXPECT_EQ(*devices.begin(), 0); EXPECT_EQ(*devices.end(), devices.Size()); +#ifndef XGBOOST_USE_CUDA + EXPECT_EQ(GPUSet::AllVisible(), GPUSet::Empty()); +#endif } - } // namespace xgboost + diff --git a/tests/cpp/common/test_common.cu b/tests/cpp/common/test_common.cu new file mode 100644 index 000000000..90ad56a14 --- /dev/null +++ b/tests/cpp/common/test_common.cu @@ -0,0 +1,44 @@ +#include "../../../src/common/common.h" +#include + +namespace xgboost { + +TEST(GPUSet, GPUBasic) { + GPUSet devices = GPUSet::Empty(); + ASSERT_TRUE(devices.IsEmpty()); + + devices = GPUSet{0, 1}; + ASSERT_TRUE(devices != GPUSet::Empty()); + EXPECT_EQ(devices.Size(), 1); + + EXPECT_ANY_THROW(devices.Index(1)); + EXPECT_ANY_THROW(devices.Index(-1)); + + devices = GPUSet::Range(1, 0); + EXPECT_EQ(devices, GPUSet::Empty()); + EXPECT_EQ(devices.Size(), 0); + EXPECT_TRUE(devices.IsEmpty()); + + EXPECT_FALSE(devices.Contains(1)); + + devices = GPUSet::Range(2, -1); + EXPECT_EQ(devices, GPUSet::Empty()); + EXPECT_EQ(devices.Size(), 0); + EXPECT_TRUE(devices.IsEmpty()); + + devices = GPUSet::Range(2, 8); + EXPECT_EQ(devices.Size(), 8); + devices = devices.Unnormalised(); + + EXPECT_EQ(*devices.begin(), 0); + EXPECT_EQ(*devices.end(), devices.Size()); + EXPECT_EQ(8, devices.Size()); + + ASSERT_NO_THROW(GPUSet::AllVisible()); + devices = GPUSet::AllVisible(); + if (devices.IsEmpty()) { + LOG(WARNING) << "Empty devices."; + } +} + +} // namespace xgboost