Fix gpu devices. (#3693)

* Fix gpu_set normalized and unnormalized.
* Fix DeviceSpan.
This commit is contained in:
trivialfis 2018-09-19 17:39:42 +12:00 committed by Rory Mitchell
parent 0f99cdfe0e
commit 9119f9e369
13 changed files with 199 additions and 138 deletions

View File

@ -20,4 +20,11 @@ GlobalRandomEngine& GlobalRandom() {
return RandomThreadLocalStore::Get()->engine; return RandomThreadLocalStore::Get()->engine;
} }
} // namespace common } // namespace common
#if !defined(XGBOOST_USE_CUDA)
int AllVisibleImpl::AllVisible() {
return 0;
}
#endif
} // namespace xgboost } // namespace xgboost

18
src/common/common.cu Normal file
View File

@ -0,0 +1,18 @@
/*!
* Copyright 2018 XGBoost contributors
*/
#include "common.h"
namespace xgboost {
int AllVisibleImpl::AllVisible() {
int n_visgpus = 0;
try {
dh::safe_cuda(cudaGetDeviceCount(&n_visgpus));
} catch(const std::exception& e) {
return 0;
}
return n_visgpus;
}
} // namespace xgboost

View File

@ -7,11 +7,39 @@
#define XGBOOST_COMMON_COMMON_H_ #define XGBOOST_COMMON_COMMON_H_
#include <xgboost/base.h> #include <xgboost/base.h>
#include <xgboost/logging.h>
#include <exception>
#include <limits>
#include <type_traits>
#include <vector> #include <vector>
#include <string> #include <string>
#include <sstream> #include <sstream>
#if defined(__CUDACC__)
#include <thrust/system/cuda/error.h>
#include <thrust/system_error.h>
#endif
namespace dh {
#if defined(__CUDACC__)
/*
* Error handling functions
*/
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
int line) {
if (code != cudaSuccess) {
throw thrust::system_error(code, thrust::cuda_category(),
std::string{file} + "(" + // NOLINT
std::to_string(line) + ")");
}
return code;
}
#endif
} // namespace dh
namespace xgboost { namespace xgboost {
namespace common { namespace common {
/*! /*!
@ -103,5 +131,87 @@ class Range {
}; };
} // namespace common } // namespace common
struct AllVisibleImpl {
static int AllVisible();
};
/* \brief set of devices across which HostDeviceVector can be distributed.
*
* Currently implemented as a range, but can be changed later to something else,
* e.g. a bitset
*/
class GPUSet {
public:
explicit GPUSet(int start = 0, int ndevices = 0)
: devices_(start, start + ndevices) {}
static GPUSet Empty() { return GPUSet(); }
static GPUSet Range(int start, int ndevices) {
return ndevices <= 0 ? Empty() : GPUSet{start, ndevices};
}
/*! \brief ndevices and num_rows both are upper bounds. */
static GPUSet All(int ndevices, int num_rows = std::numeric_limits<int>::max()) {
int n_devices_visible = AllVisible().Size();
if (ndevices < 0 || ndevices > n_devices_visible) {
ndevices = n_devices_visible;
}
// fix-up device number to be limited by number of rows
ndevices = ndevices > num_rows ? num_rows : ndevices;
return Range(0, ndevices);
}
static GPUSet AllVisible() {
int n = AllVisibleImpl::AllVisible();
return Range(0, n);
}
/*! \brief Ensure gpu_id is correct, so not dependent upon user knowing details */
static int GetDeviceIdx(int gpu_id) {
auto devices = AllVisible();
CHECK(!devices.IsEmpty()) << "Empty device.";
return (std::abs(gpu_id) + 0) % devices.Size();
}
/*! \brief Counting from gpu_id */
GPUSet Normalised(int gpu_id) const {
return Range(gpu_id, Size());
}
/*! \brief Counting from 0 */
GPUSet Unnormalised() const {
return Range(0, Size());
}
int Size() const {
int res = *devices_.end() - *devices_.begin();
return res < 0 ? 0 : res;
}
/*! \brief Get normalised device id. */
int operator[](int index) const {
CHECK(index >= 0 && index < Size());
return *devices_.begin() + index;
}
bool IsEmpty() const { return Size() == 0; }
/*! \brief Get un-normalised index. */
int Index(int device) const {
CHECK(Contains(device));
return device - *devices_.begin();
}
bool Contains(int device) const {
return *devices_.begin() <= device && device < *devices_.end();
}
common::Range::Iterator begin() const { return devices_.begin(); } // NOLINT
common::Range::Iterator end() const { return devices_.end(); } // NOLINT
friend bool operator==(const GPUSet& lhs, const GPUSet& rhs) {
return lhs.devices_ == rhs.devices_;
}
friend bool operator!=(const GPUSet& lhs, const GPUSet& rhs) {
return !(lhs == rhs);
}
private:
common::Range devices_;
};
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_COMMON_COMMON_H_ #endif // XGBOOST_COMMON_COMMON_H_

View File

@ -9,7 +9,6 @@
#include <xgboost/logging.h> #include <xgboost/logging.h>
#include "common.h" #include "common.h"
#include "gpu_set.h"
#include <algorithm> #include <algorithm>
#include <chrono> #include <chrono>

View File

@ -1,122 +0,0 @@
/*!
* Copyright 2018 XGBoost contributors
*/
#ifndef XGBOOST_COMMON_GPU_SET_H_
#define XGBOOST_COMMON_GPU_SET_H_
#include <xgboost/base.h>
#include <xgboost/logging.h>
#include <limits>
#include <string>
#include "common.h"
#include "span.h"
#if defined(__CUDACC__)
#include <thrust/system/cuda/error.h>
#include <thrust/system_error.h>
#endif
namespace dh {
#if defined(__CUDACC__)
/*
* Error handling functions
*/
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
int line) {
if (code != cudaSuccess) {
throw thrust::system_error(code, thrust::cuda_category(),
std::string{file} + "(" + // NOLINT
std::to_string(line) + ")");
}
return code;
}
#endif
} // namespace dh
namespace xgboost {
/* \brief set of devices across which HostDeviceVector can be distributed.
*
* Currently implemented as a range, but can be changed later to something else,
* e.g. a bitset
*/
class GPUSet {
public:
explicit GPUSet(int start = 0, int ndevices = 0)
: devices_(start, start + ndevices) {}
static GPUSet Empty() { return GPUSet(); }
static GPUSet Range(int start, int ndevices) {
return ndevices <= 0 ? Empty() : GPUSet{start, ndevices};
}
/* \brief ndevices and num_rows both are upper bounds. */
static GPUSet All(int ndevices, int num_rows = std::numeric_limits<int>::max()) {
int n_devices_visible = AllVisible().Size();
ndevices = ndevices < 0 ? n_devices_visible : ndevices;
// fix-up device number to be limited by number of rows
ndevices = ndevices > num_rows ? num_rows : ndevices;
return Range(0, ndevices);
}
static GPUSet AllVisible() {
int n_visgpus = 0;
#if defined(__CUDACC__)
dh::safe_cuda(cudaGetDeviceCount(&n_visgpus));
#endif
return Range(0, n_visgpus);
}
/* \brief Ensure gpu_id is correct, so not dependent upon user knowing details */
static int GetDeviceIdx(int gpu_id) {
return (std::abs(gpu_id) + 0) % AllVisible().Size();
}
/* \brief Counting from gpu_id */
GPUSet Normalised(int gpu_id) const {
return Range(gpu_id, *devices_.end() + gpu_id);
}
/* \brief Counting from 0 */
GPUSet Unnormalised() const {
return Range(0, *devices_.end() - *devices_.begin());
}
int Size() const {
int res = *devices_.end() - *devices_.begin();
return res < 0 ? 0 : res;
}
int operator[](int index) const {
CHECK(index >= 0 && index < *(devices_.end()));
return *devices_.begin() + index;
}
bool IsEmpty() const { return Size() == 0; } // NOLINT
int Index(int device) const {
CHECK(Contains(device));
return device - *devices_.begin();
}
bool Contains(int device) const {
return *devices_.begin() <= device && device < *devices_.end();
}
common::Range::Iterator begin() const { return devices_.begin(); } // NOLINT
common::Range::Iterator end() const { return devices_.end(); } // NOLINT
friend bool operator==(const GPUSet& lhs, const GPUSet& rhs) {
return lhs.devices_ == rhs.devices_;
}
friend bool operator!=(const GPUSet& lhs, const GPUSet& rhs) {
return !(lhs == rhs);
}
private:
common::Range devices_;
};
} // namespace xgboost
#endif // XGBOOST_COMMON_GPU_SET_H_

View File

@ -230,7 +230,7 @@ struct HostDeviceVectorImpl {
CHECK(devices.Contains(device)); CHECK(devices.Contains(device));
LazySyncDevice(device, GPUAccess::kWrite); LazySyncDevice(device, GPUAccess::kWrite);
return {shards_[devices.Index(device)].data_.data().get(), return {shards_[devices.Index(device)].data_.data().get(),
static_cast<typename common::Span<T>::index_type>(Size())}; static_cast<typename common::Span<T>::index_type>(DeviceSize(device))};
} }
common::Span<const T> ConstDeviceSpan(int device) { common::Span<const T> ConstDeviceSpan(int device) {
@ -238,7 +238,7 @@ struct HostDeviceVectorImpl {
CHECK(devices.Contains(device)); CHECK(devices.Contains(device));
LazySyncDevice(device, GPUAccess::kRead); LazySyncDevice(device, GPUAccess::kRead);
return {shards_[devices.Index(device)].data_.data().get(), return {shards_[devices.Index(device)].data_.data().get(),
static_cast<typename common::Span<const T>::index_type>(Size())}; static_cast<typename common::Span<const T>::index_type>(DeviceSize(device))};
} }
size_t DeviceSize(int device) { size_t DeviceSize(int device) {

View File

@ -59,7 +59,7 @@
#include <initializer_list> #include <initializer_list>
#include <vector> #include <vector>
#include "gpu_set.h" #include "common.h"
#include "span.h" #include "span.h"
// only include thrust-related files if host_device_vector.h // only include thrust-related files if host_device_vector.h

View File

@ -8,7 +8,7 @@
#include <map> #include <map>
#include <string> #include <string>
#include "gpu_set.h" #include "common.h"
namespace xgboost { namespace xgboost {
namespace common { namespace common {

View File

@ -6,7 +6,7 @@
#include <thrust/execution_policy.h> #include <thrust/execution_policy.h>
#include <thrust/inner_product.h> #include <thrust/inner_product.h>
#include <xgboost/linear_updater.h> #include <xgboost/linear_updater.h>
#include "../common/gpu_set.h" #include "../common/common.h"
#include "../common/device_helpers.cuh" #include "../common/device_helpers.cuh"
#include "../common/timer.h" #include "../common/timer.h"
#include "coordinate_common.h" #include "coordinate_common.h"

View File

@ -11,7 +11,7 @@
#include <xgboost/tree_model.h> #include <xgboost/tree_model.h>
#include <xgboost/tree_updater.h> #include <xgboost/tree_updater.h>
#include <memory> #include <memory>
#include "../common/gpu_set.h" #include "../common/common.h"
#include "../common/device_helpers.cuh" #include "../common/device_helpers.cuh"
#include "../common/host_device_vector.h" #include "../common/host_device_vector.h"

View File

@ -4,7 +4,7 @@
#include <xgboost/tree_updater.h> #include <xgboost/tree_updater.h>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "../common/gpu_set.h" #include "../common/common.h"
#include "param.h" #include "param.h"
#include "updater_gpu_common.cuh" #include "updater_gpu_common.cuh"

View File

@ -1,8 +1,7 @@
#include "../../../src/common/gpu_set.h" #include "../../../src/common/common.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
namespace xgboost { namespace xgboost {
TEST(GPUSet, Basic) { TEST(GPUSet, Basic) {
GPUSet devices = GPUSet::Empty(); GPUSet devices = GPUSet::Empty();
ASSERT_TRUE(devices.IsEmpty()); ASSERT_TRUE(devices.IsEmpty());
@ -28,10 +27,16 @@ TEST(GPUSet, Basic) {
devices = GPUSet::Range(2, 8); devices = GPUSet::Range(2, 8);
EXPECT_EQ(devices.Size(), 8); EXPECT_EQ(devices.Size(), 8);
EXPECT_ANY_THROW(devices[8]);
EXPECT_ANY_THROW(devices.Index(0));
devices = devices.Unnormalised(); devices = devices.Unnormalised();
EXPECT_EQ(*devices.begin(), 0); EXPECT_EQ(*devices.begin(), 0);
EXPECT_EQ(*devices.end(), devices.Size()); EXPECT_EQ(*devices.end(), devices.Size());
#ifndef XGBOOST_USE_CUDA
EXPECT_EQ(GPUSet::AllVisible(), GPUSet::Empty());
#endif
} }
} // namespace xgboost } // namespace xgboost

View File

@ -0,0 +1,44 @@
#include "../../../src/common/common.h"
#include <gtest/gtest.h>
namespace xgboost {
TEST(GPUSet, GPUBasic) {
GPUSet devices = GPUSet::Empty();
ASSERT_TRUE(devices.IsEmpty());
devices = GPUSet{0, 1};
ASSERT_TRUE(devices != GPUSet::Empty());
EXPECT_EQ(devices.Size(), 1);
EXPECT_ANY_THROW(devices.Index(1));
EXPECT_ANY_THROW(devices.Index(-1));
devices = GPUSet::Range(1, 0);
EXPECT_EQ(devices, GPUSet::Empty());
EXPECT_EQ(devices.Size(), 0);
EXPECT_TRUE(devices.IsEmpty());
EXPECT_FALSE(devices.Contains(1));
devices = GPUSet::Range(2, -1);
EXPECT_EQ(devices, GPUSet::Empty());
EXPECT_EQ(devices.Size(), 0);
EXPECT_TRUE(devices.IsEmpty());
devices = GPUSet::Range(2, 8);
EXPECT_EQ(devices.Size(), 8);
devices = devices.Unnormalised();
EXPECT_EQ(*devices.begin(), 0);
EXPECT_EQ(*devices.end(), devices.Size());
EXPECT_EQ(8, devices.Size());
ASSERT_NO_THROW(GPUSet::AllVisible());
devices = GPUSet::AllVisible();
if (devices.IsEmpty()) {
LOG(WARNING) << "Empty devices.";
}
}
} // namespace xgboost