De-duplicate GPU parameters. (#4454)
* Only define `gpu_id` and `n_gpus` in `LearnerTrainParam` * Pass LearnerTrainParam through XGBoost vid factory method. * Disable all GPU usage when GPU related parameters are not specified (fixes XGBoost choosing GPU over aggressively). * Test learner train param io. * Fix gpu pickling.
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
/*!
|
||||
* Copyright 2015-2018 by Contributors
|
||||
* Copyright 2015-2019 by Contributors
|
||||
* \file common.cc
|
||||
* \brief Enable all kinds of global variables in common.
|
||||
*/
|
||||
#include <dmlc/thread_local.h>
|
||||
#include <xgboost/logging.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "./random.h"
|
||||
@@ -29,4 +30,39 @@ int AllVisibleImpl::AllVisible() {
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
constexpr GPUSet::GpuIdType GPUSet::kAll;
|
||||
|
||||
GPUSet GPUSet::All(GpuIdType gpu_id, GpuIdType n_gpus, int32_t n_rows) {
|
||||
CHECK_GE(gpu_id, 0) << "gpu_id must be >= 0.";
|
||||
CHECK_GE(n_gpus, -1) << "n_gpus must be >= -1.";
|
||||
|
||||
GpuIdType const n_devices_visible = AllVisible().Size();
|
||||
if (n_devices_visible == 0 || n_gpus == 0 || n_rows == 0) {
|
||||
LOG(DEBUG) << "Runing on CPU.";
|
||||
return Empty();
|
||||
}
|
||||
|
||||
GpuIdType const n_available_devices = n_devices_visible - gpu_id;
|
||||
|
||||
if (n_gpus == kAll) { // Use all devices starting from `gpu_id'.
|
||||
CHECK(gpu_id < n_devices_visible)
|
||||
<< "\ngpu_id should be less than number of visible devices.\ngpu_id: "
|
||||
<< gpu_id
|
||||
<< ", number of visible devices: "
|
||||
<< n_devices_visible;
|
||||
GpuIdType n_devices =
|
||||
n_available_devices < n_rows ? n_available_devices : n_rows;
|
||||
LOG(DEBUG) << "GPU ID: " << gpu_id << ", Number of GPUs: " << n_devices;
|
||||
return Range(gpu_id, n_devices);
|
||||
} else { // Use devices in ( gpu_id, gpu_id + n_gpus ).
|
||||
CHECK_LE(n_gpus, n_available_devices)
|
||||
<< "Starting from gpu id: " << gpu_id << ", there are only "
|
||||
<< n_available_devices << " available devices, while n_gpus is set to: "
|
||||
<< n_gpus;
|
||||
GpuIdType n_devices = n_gpus < n_rows ? n_gpus : n_rows;
|
||||
LOG(DEBUG) << "GPU ID: " << gpu_id << ", Number of GPUs: " << n_devices;
|
||||
return Range(gpu_id, n_devices);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -135,8 +135,8 @@ class Range {
|
||||
Iterator begin_;
|
||||
Iterator end_;
|
||||
};
|
||||
|
||||
} // namespace common
|
||||
|
||||
struct AllVisibleImpl {
|
||||
static int AllVisible();
|
||||
};
|
||||
@@ -160,33 +160,7 @@ class GPUSet {
|
||||
}
|
||||
/*! \brief n_gpus and num_rows both are upper bounds. */
|
||||
static GPUSet All(GpuIdType gpu_id, GpuIdType n_gpus,
|
||||
GpuIdType num_rows = std::numeric_limits<GpuIdType>::max()) {
|
||||
CHECK_GE(gpu_id, 0) << "gpu_id must be >= 0.";
|
||||
CHECK_GE(n_gpus, -1) << "n_gpus must be >= -1.";
|
||||
|
||||
GpuIdType const n_devices_visible = AllVisible().Size();
|
||||
if (n_devices_visible == 0 || n_gpus == 0) { return Empty(); }
|
||||
|
||||
GpuIdType const n_available_devices = n_devices_visible - gpu_id;
|
||||
|
||||
if (n_gpus == kAll) { // Use all devices starting from `gpu_id'.
|
||||
CHECK(gpu_id < n_devices_visible)
|
||||
<< "\ngpu_id should be less than number of visible devices.\ngpu_id: "
|
||||
<< gpu_id
|
||||
<< ", number of visible devices: "
|
||||
<< n_devices_visible;
|
||||
GpuIdType n_devices =
|
||||
n_available_devices < num_rows ? n_available_devices : num_rows;
|
||||
return Range(gpu_id, n_devices);
|
||||
} else { // Use devices in ( gpu_id, gpu_id + n_gpus ).
|
||||
CHECK_LE(n_gpus, n_available_devices)
|
||||
<< "Starting from gpu id: " << gpu_id << ", there are only "
|
||||
<< n_available_devices << " available devices, while n_gpus is set to: "
|
||||
<< n_gpus;
|
||||
GpuIdType n_devices = n_gpus < num_rows ? n_gpus : num_rows;
|
||||
return Range(gpu_id, n_devices);
|
||||
}
|
||||
}
|
||||
GpuIdType num_rows = std::numeric_limits<GpuIdType>::max());
|
||||
|
||||
static GPUSet AllVisible() {
|
||||
GpuIdType n = AllVisibleImpl::AllVisible();
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
/*!
|
||||
* Copyright 2018 by Contributors
|
||||
* \file enum_class_param.h
|
||||
* \brief macro for using C++11 enum class as DMLC parameter
|
||||
* \author Hyunsu Philip Cho
|
||||
*/
|
||||
|
||||
#ifndef XGBOOST_COMMON_ENUM_CLASS_PARAM_H_
|
||||
#define XGBOOST_COMMON_ENUM_CLASS_PARAM_H_
|
||||
|
||||
#include <dmlc/parameter.h>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
/*!
|
||||
* \brief Specialization of FieldEntry for enum class (backed by int)
|
||||
*
|
||||
* Use this macro to use C++11 enum class as DMLC parameters
|
||||
*
|
||||
* Usage:
|
||||
*
|
||||
* \code{.cpp}
|
||||
*
|
||||
* // enum class must inherit from int type
|
||||
* enum class Foo : int {
|
||||
* kBar = 0, kFrog = 1, kCat = 2, kDog = 3
|
||||
* };
|
||||
*
|
||||
* // This line is needed to prevent compilation error
|
||||
* DECLARE_FIELD_ENUM_CLASS(Foo);
|
||||
*
|
||||
* // Now define DMLC parameter as usual;
|
||||
* // enum classes can now be members.
|
||||
* struct MyParam : dmlc::Parameter<MyParam> {
|
||||
* Foo foo;
|
||||
* DMLC_DECLARE_PARAMETER(MyParam) {
|
||||
* DMLC_DECLARE_FIELD(foo)
|
||||
* .set_default(Foo::kBar)
|
||||
* .add_enum("bar", Foo::kBar)
|
||||
* .add_enum("frog", Foo::kFrog)
|
||||
* .add_enum("cat", Foo::kCat)
|
||||
* .add_enum("dog", Foo::kDog);
|
||||
* }
|
||||
* };
|
||||
*
|
||||
* DMLC_REGISTER_PARAMETER(MyParam);
|
||||
* \endcode
|
||||
*/
|
||||
#define DECLARE_FIELD_ENUM_CLASS(EnumClass) \
|
||||
namespace dmlc { \
|
||||
namespace parameter { \
|
||||
template <> \
|
||||
class FieldEntry<EnumClass> : public FieldEntry<int> { \
|
||||
public: \
|
||||
FieldEntry<EnumClass>() { \
|
||||
static_assert( \
|
||||
std::is_same<int, typename std::underlying_type<EnumClass>::type>::value, \
|
||||
"enum class must be backed by int"); \
|
||||
is_enum_ = true; \
|
||||
} \
|
||||
using Super = FieldEntry<int>; \
|
||||
void Set(void *head, const std::string &value) const override { \
|
||||
Super::Set(head, value); \
|
||||
} \
|
||||
inline FieldEntry<EnumClass>& add_enum(const std::string &key, EnumClass value) { \
|
||||
Super::add_enum(key, static_cast<int>(value)); \
|
||||
return *this; \
|
||||
} \
|
||||
inline FieldEntry<EnumClass>& set_default(const EnumClass& default_value) { \
|
||||
default_value_ = static_cast<int>(default_value); \
|
||||
has_default_ = true; \
|
||||
return *this; \
|
||||
} \
|
||||
inline void Init(const std::string &key, void *head, EnumClass& ref) { /* NOLINT */ \
|
||||
Super::Init(key, head, *reinterpret_cast<int*>(&ref)); \
|
||||
} \
|
||||
}; \
|
||||
} /* namespace parameter */ \
|
||||
} /* namespace dmlc */
|
||||
|
||||
#endif // XGBOOST_COMMON_ENUM_CLASS_PARAM_H_
|
||||
@@ -383,8 +383,8 @@ struct GPUSketcher {
|
||||
hmat->Init(&sketches, param_.max_bin);
|
||||
}
|
||||
|
||||
GPUSketcher(tree::TrainParam param, size_t n_rows) : param_(std::move(param)) {
|
||||
dist_ = GPUDistribution::Block(GPUSet::All(param_.gpu_id, param_.n_gpus, n_rows));
|
||||
GPUSketcher(tree::TrainParam param, GPUSet const& devices) : param_(std::move(param)) {
|
||||
dist_ = GPUDistribution::Block(devices);
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -395,8 +395,9 @@ struct GPUSketcher {
|
||||
|
||||
void DeviceSketch
|
||||
(const SparsePage& batch, const MetaInfo& info,
|
||||
const tree::TrainParam& param, HistCutMatrix* hmat, int gpu_batch_nrows) {
|
||||
GPUSketcher sketcher(param, info.num_row_);
|
||||
const tree::TrainParam& param, HistCutMatrix* hmat, int gpu_batch_nrows,
|
||||
GPUSet const& devices) {
|
||||
GPUSketcher sketcher(param, devices);
|
||||
sketcher.Sketch(batch, info, hmat, gpu_batch_nrows);
|
||||
}
|
||||
|
||||
|
||||
@@ -118,7 +118,8 @@ struct HistCutMatrix {
|
||||
/*! \brief Builds the cut matrix on the GPU */
|
||||
void DeviceSketch
|
||||
(const SparsePage& batch, const MetaInfo& info,
|
||||
const tree::TrainParam& param, HistCutMatrix* hmat, int gpu_batch_nrows);
|
||||
const tree::TrainParam& param, HistCutMatrix* hmat, int gpu_batch_nrows,
|
||||
GPUSet const& devices);
|
||||
|
||||
/*!
|
||||
* \brief A single row in global histogram index.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017 XGBoost contributors
|
||||
* Copyright 2017-2019 XGBoost contributors
|
||||
*/
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user