Remove internal use of gpu_id. (#9568)

This commit is contained in:
Jiaming Yuan
2023-09-20 23:29:51 +08:00
committed by GitHub
parent 38ac52dd87
commit 8c676c889d
121 changed files with 1012 additions and 1044 deletions

View File

@@ -29,31 +29,37 @@ struct DeviceSym {
* viewing types like `linalg::TensorView`.
*/
struct DeviceOrd {
// Constant representing the device ID of CPU.
static bst_d_ordinal_t constexpr CPUOrdinal() { return -1; }
static bst_d_ordinal_t constexpr InvalidOrdinal() { return -2; }
enum Type : std::int16_t { kCPU = 0, kCUDA = 1 } device{kCPU};
// CUDA device ordinal.
bst_d_ordinal_t ordinal{-1};
bst_d_ordinal_t ordinal{CPUOrdinal()};
[[nodiscard]] bool IsCUDA() const { return device == kCUDA; }
[[nodiscard]] bool IsCPU() const { return device == kCPU; }
DeviceOrd() = default;
constexpr DeviceOrd() = default;
constexpr DeviceOrd(Type type, bst_d_ordinal_t ord) : device{type}, ordinal{ord} {}
DeviceOrd(DeviceOrd const& that) = default;
DeviceOrd& operator=(DeviceOrd const& that) = default;
DeviceOrd(DeviceOrd&& that) = default;
DeviceOrd& operator=(DeviceOrd&& that) = default;
constexpr DeviceOrd(DeviceOrd const& that) = default;
constexpr DeviceOrd& operator=(DeviceOrd const& that) = default;
constexpr DeviceOrd(DeviceOrd&& that) = default;
constexpr DeviceOrd& operator=(DeviceOrd&& that) = default;
/**
* @brief Constructor for CPU.
*/
[[nodiscard]] constexpr static auto CPU() { return DeviceOrd{kCPU, -1}; }
[[nodiscard]] constexpr static auto CPU() { return DeviceOrd{kCPU, CPUOrdinal()}; }
/**
* @brief Constructor for CUDA device.
*
* @param ordinal CUDA device ordinal.
*/
[[nodiscard]] static auto CUDA(bst_d_ordinal_t ordinal) { return DeviceOrd{kCUDA, ordinal}; }
[[nodiscard]] static constexpr auto CUDA(bst_d_ordinal_t ordinal) {
return DeviceOrd{kCUDA, ordinal};
}
[[nodiscard]] bool operator==(DeviceOrd const& that) const {
return device == that.device && ordinal == that.ordinal;
@@ -78,25 +84,26 @@ struct DeviceOrd {
static_assert(sizeof(DeviceOrd) == sizeof(std::int32_t));
std::ostream& operator<<(std::ostream& os, DeviceOrd ord);
/**
* @brief Runtime context for XGBoost. Contains information like threads and device.
*/
struct Context : public XGBoostParameter<Context> {
private:
// User interfacing parameter for device ordinal
std::string device{DeviceSym::CPU()}; // NOLINT
// The device object for the current context. We are in the middle of replacing the
// `gpu_id` with this device field.
// The device ordinal set by user
DeviceOrd device_{DeviceOrd::CPU()};
public:
// Constant representing the device ID of CPU.
static bst_d_ordinal_t constexpr kCpuId = -1;
static bst_d_ordinal_t constexpr InvalidOrdinal() { return -2; }
static std::int64_t constexpr kDefaultSeed = 0;
public:
Context();
void Init(Args const& kwargs);
template <typename Container>
Args UpdateAllowUnknown(Container const& kwargs) {
auto args = XGBoostParameter<Context>::UpdateAllowUnknown(kwargs);
@@ -104,7 +111,6 @@ struct Context : public XGBoostParameter<Context> {
return args;
}
std::int32_t gpu_id{kCpuId};
// The number of threads to use if OpenMP is enabled. If equals 0, use the system default.
std::int32_t nthread{0}; // NOLINT
// stored random seed
@@ -116,7 +122,8 @@ struct Context : public XGBoostParameter<Context> {
bool validate_parameters{false};
/**
* @brief Configure the parameter `gpu_id'.
* @brief Configure the parameter `device'. Deprecated, will remove once `gpu_id` is
* removed.
*
* @param require_gpu Whether GPU is explicitly required by the user through other
* configurations.
@@ -212,9 +219,7 @@ struct Context : public XGBoostParameter<Context> {
private:
void SetDeviceOrdinal(Args const& kwargs);
Context& SetDevice(DeviceOrd d) {
this->device_ = d;
this->gpu_id = d.ordinal; // this can be removed once we move away from `gpu_id`.
this->device = d.Name();
this->device = (this->device_ = d).Name();
return *this;
}

View File

@@ -106,10 +106,10 @@ class MetaInfo {
MetaInfo& operator=(MetaInfo&& that) = default;
MetaInfo& operator=(MetaInfo const& that) = delete;
/*!
* \brief Validate all metainfo.
/**
* @brief Validate all metainfo.
*/
void Validate(int32_t device) const;
void Validate(DeviceOrd device) const;
MetaInfo Slice(common::Span<int32_t const> ridxs) const;

View File

@@ -88,9 +88,9 @@ class HostDeviceVector {
static_assert(std::is_standard_layout<T>::value, "HostDeviceVector admits only POD types");
public:
explicit HostDeviceVector(size_t size = 0, T v = T(), int device = -1);
HostDeviceVector(std::initializer_list<T> init, int device = -1);
explicit HostDeviceVector(const std::vector<T>& init, int device = -1);
explicit HostDeviceVector(size_t size = 0, T v = T(), DeviceOrd device = DeviceOrd::CPU());
HostDeviceVector(std::initializer_list<T> init, DeviceOrd device = DeviceOrd::CPU());
explicit HostDeviceVector(const std::vector<T>& init, DeviceOrd device = DeviceOrd::CPU());
~HostDeviceVector();
HostDeviceVector(const HostDeviceVector<T>&) = delete;
@@ -99,17 +99,9 @@ class HostDeviceVector {
HostDeviceVector<T>& operator=(const HostDeviceVector<T>&) = delete;
HostDeviceVector<T>& operator=(HostDeviceVector<T>&&);
bool Empty() const { return Size() == 0; }
size_t Size() const;
int DeviceIdx() const;
DeviceOrd Device() const {
auto idx = this->DeviceIdx();
if (idx == DeviceOrd::CPU().ordinal) {
return DeviceOrd::CPU();
} else {
return DeviceOrd::CUDA(idx);
}
}
[[nodiscard]] bool Empty() const { return Size() == 0; }
[[nodiscard]] std::size_t Size() const;
[[nodiscard]] DeviceOrd Device() const;
common::Span<T> DeviceSpan();
common::Span<const T> ConstDeviceSpan() const;
common::Span<const T> DeviceSpan() const { return ConstDeviceSpan(); }
@@ -135,13 +127,12 @@ class HostDeviceVector {
const std::vector<T>& ConstHostVector() const;
const std::vector<T>& HostVector() const {return ConstHostVector(); }
bool HostCanRead() const;
bool HostCanWrite() const;
bool DeviceCanRead() const;
bool DeviceCanWrite() const;
GPUAccess DeviceAccess() const;
[[nodiscard]] bool HostCanRead() const;
[[nodiscard]] bool HostCanWrite() const;
[[nodiscard]] bool DeviceCanRead() const;
[[nodiscard]] bool DeviceCanWrite() const;
[[nodiscard]] GPUAccess DeviceAccess() const;
void SetDevice(int device) const;
void SetDevice(DeviceOrd device) const;
void Resize(size_t new_size, T v = T());

View File

@@ -659,13 +659,13 @@ auto MakeVec(T *ptr, size_t s, DeviceOrd device = DeviceOrd::CPU()) {
template <typename T>
auto MakeVec(HostDeviceVector<T> *data) {
return MakeVec(data->DeviceIdx() == -1 ? data->HostPointer() : data->DevicePointer(),
data->Size(), data->Device());
return MakeVec(data->Device().IsCPU() ? data->HostPointer() : data->DevicePointer(), data->Size(),
data->Device());
}
template <typename T>
auto MakeVec(HostDeviceVector<T> const *data) {
return MakeVec(data->DeviceIdx() == -1 ? data->ConstHostPointer() : data->ConstDevicePointer(),
return MakeVec(data->Device().IsCPU() ? data->ConstHostPointer() : data->ConstDevicePointer(),
data->Size(), data->Device());
}
@@ -757,13 +757,13 @@ class Tensor {
Order order_{Order::kC};
template <typename I, std::int32_t D>
void Initialize(I const (&shape)[D], std::int32_t device) {
void Initialize(I const (&shape)[D], DeviceOrd device) {
static_assert(D <= kDim, "Invalid shape.");
std::copy(shape, shape + D, shape_);
for (auto i = D; i < kDim; ++i) {
shape_[i] = 1;
}
if (device >= 0) {
if (device.IsCUDA()) {
data_.SetDevice(device);
data_.ConstDevicePointer(); // Pull to device;
}
@@ -780,14 +780,11 @@ class Tensor {
* See \ref TensorView for parameters of this constructor.
*/
template <typename I, int32_t D>
explicit Tensor(I const (&shape)[D], std::int32_t device, Order order = kC)
: Tensor{common::Span<I const, D>{shape}, device, order} {}
template <typename I, int32_t D>
explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC)
: Tensor{common::Span<I const, D>{shape}, device.ordinal, order} {}
: Tensor{common::Span<I const, D>{shape}, device, order} {}
template <typename I, size_t D>
explicit Tensor(common::Span<I const, D> shape, std::int32_t device, Order order = kC)
explicit Tensor(common::Span<I const, D> shape, DeviceOrd device, Order order = kC)
: order_{order} {
// No device unroll as this is a host only function.
std::copy(shape.data(), shape.data() + D, shape_);
@@ -795,11 +792,11 @@ class Tensor {
shape_[i] = 1;
}
auto size = detail::CalcSize(shape_);
if (device >= 0) {
if (device.IsCUDA()) {
data_.SetDevice(device);
}
data_.Resize(size);
if (device >= 0) {
if (device.IsCUDA()) {
data_.DevicePointer(); // Pull to device
}
}
@@ -807,7 +804,7 @@ class Tensor {
* Initialize from 2 host iterators.
*/
template <typename It, typename I, int32_t D>
explicit Tensor(It begin, It end, I const (&shape)[D], std::int32_t device, Order order = kC)
explicit Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order = kC)
: order_{order} {
auto &h_vec = data_.HostVector();
h_vec.insert(h_vec.begin(), begin, end);
@@ -816,7 +813,7 @@ class Tensor {
}
template <typename I, int32_t D>
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], std::int32_t device,
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
Order order = kC)
: order_{order} {
auto &h_vec = data_.HostVector();
@@ -824,10 +821,6 @@ class Tensor {
// shape
this->Initialize(shape, device);
}
template <typename I, int32_t D>
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
Order order = kC)
: Tensor{data, shape, device.ordinal, order} {}
/**
* \brief Index operator. Not thread safe, should not be used in performance critical
* region. For more efficient indexing, consider getting a view first.
@@ -944,9 +937,7 @@ class Tensor {
/**
* \brief Set device ordinal for this tensor.
*/
void SetDevice(int32_t device) const { data_.SetDevice(device); }
void SetDevice(DeviceOrd device) const { data_.SetDevice(device); }
[[nodiscard]] int32_t DeviceIdx() const { return data_.DeviceIdx(); }
[[nodiscard]] DeviceOrd Device() const { return data_.Device(); }
};
@@ -962,7 +953,7 @@ using Vector = Tensor<T, 1>;
template <typename T, typename... Index>
auto Empty(Context const *ctx, Index &&...index) {
Tensor<T, sizeof...(Index)> t;
t.SetDevice(ctx->gpu_id);
t.SetDevice(ctx->Device());
t.Reshape(index...);
return t;
}
@@ -973,7 +964,7 @@ auto Empty(Context const *ctx, Index &&...index) {
template <typename T, typename... Index>
auto Constant(Context const *ctx, T v, Index &&...index) {
Tensor<T, sizeof...(Index)> t;
t.SetDevice(ctx->gpu_id);
t.SetDevice(ctx->Device());
t.Reshape(index...);
t.Data()->Fill(std::move(v));
return t;
@@ -990,8 +981,8 @@ auto Zeros(Context const *ctx, Index &&...index) {
// Only first axis is supported for now.
template <typename T, int32_t D>
void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
if (r.DeviceIdx() >= 0) {
l->SetDevice(r.DeviceIdx());
if (r.Device().IsCUDA()) {
l->SetDevice(r.Device());
}
l->ModifyInplace([&](HostDeviceVector<T> *data, common::Span<size_t, D> shape) {
for (size_t i = 1; i < D; ++i) {

View File

@@ -52,9 +52,9 @@ class PredictionContainer : public DMatrixCache<PredictionCacheEntry> {
public:
PredictionContainer() : DMatrixCache<PredictionCacheEntry>{DefaultSize()} {}
PredictionCacheEntry& Cache(std::shared_ptr<DMatrix> m, std::int32_t device) {
PredictionCacheEntry& Cache(std::shared_ptr<DMatrix> m, DeviceOrd device) {
auto p_cache = this->CacheItem(m);
if (device != Context::kCpuId) {
if (device.IsCUDA()) {
p_cache->predictions.SetDevice(device);
}
return *p_cache;