Optionaly fail when gpu_id is set to invalid value (#6342)

This commit is contained in:
Honza Sterba 2020-11-28 08:14:12 +01:00 committed by Hyunsu Cho
parent 38c80bcec4
commit 028ec5f028
4 changed files with 25 additions and 1 deletions

View File

@ -11,6 +11,7 @@
#include <string>
namespace xgboost {
struct GenericParameter : public XGBoostParameter<GenericParameter> {
// Constant representing the device ID of CPU.
static int32_t constexpr kCpuId = -1;
@ -26,6 +27,8 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
int nthread;
// primary device, -1 means no gpu.
int gpu_id;
// fail when gpu_id is invalid
bool fail_on_invalid_gpu_id {false};
// gpu page size in external memory mode, 0 means using the default.
size_t gpu_page_size;
bool enable_experimental_json_serialization {true};
@ -64,6 +67,9 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
.set_default(-1)
.set_lower_bound(-1)
.describe("The primary GPU device ordinal.");
DMLC_DECLARE_FIELD(fail_on_invalid_gpu_id)
.set_default(false)
.describe("Fail with error when gpu_id is invalid.");
DMLC_DECLARE_FIELD(gpu_page_size)
.set_default(0)
.set_lower_bound(0)

View File

@ -222,6 +222,10 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
LOG(WARNING) << "No visible GPU is found, setting `gpu_id` to -1";
}
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
} else if (fail_on_invalid_gpu_id) {
CHECK(gpu_id == kCpuId || gpu_id < n_gpus)
<< "Only " << n_gpus << " GPUs are visible, gpu_id "
<< gpu_id << " is invalid.";
} else if (gpu_id != kCpuId && gpu_id >= n_gpus) {
LOG(WARNING) << "Only " << n_gpus
<< " GPUs are visible, setting `gpu_id` to " << gpu_id % n_gpus;

View File

@ -580,7 +580,7 @@ class GPUPredictor : public xgboost::Predictor {
Predictor::Predictor{generic_param} {}
~GPUPredictor() override {
if (generic_param_->gpu_id >= 0) {
if (generic_param_->gpu_id >= 0 && generic_param_->gpu_id < common::AllVisibleGPUs()) {
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
}
}

View File

@ -52,3 +52,17 @@ class TestGPUBasicModels:
model_0, model_1 = self.run_cls(X, y, False)
assert model_0 != model_1
def test_invalid_gpu_id(self):
X = np.random.randn(10, 5) * 1e4
y = np.random.randint(0, 2, size=10) * 1e4
# should pass with invalid gpu id
cls1 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999)
cls1.fit(X, y)
# should throw error with fail_on_invalid_gpu_id enabled
cls2 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999, fail_on_invalid_gpu_id=True)
try:
cls2.fit(X, y)
assert False, "Should have failed with with fail_on_invalid_gpu_id enabled"
except xgb.core.XGBoostError as err:
assert "gpu_id 9999 is invalid" in str(err)