Optionaly fail when gpu_id is set to invalid value (#6342)
This commit is contained in:
parent
38c80bcec4
commit
028ec5f028
@ -11,6 +11,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|
||||||
struct GenericParameter : public XGBoostParameter<GenericParameter> {
|
struct GenericParameter : public XGBoostParameter<GenericParameter> {
|
||||||
// Constant representing the device ID of CPU.
|
// Constant representing the device ID of CPU.
|
||||||
static int32_t constexpr kCpuId = -1;
|
static int32_t constexpr kCpuId = -1;
|
||||||
@ -26,6 +27,8 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
|
|||||||
int nthread;
|
int nthread;
|
||||||
// primary device, -1 means no gpu.
|
// primary device, -1 means no gpu.
|
||||||
int gpu_id;
|
int gpu_id;
|
||||||
|
// fail when gpu_id is invalid
|
||||||
|
bool fail_on_invalid_gpu_id {false};
|
||||||
// gpu page size in external memory mode, 0 means using the default.
|
// gpu page size in external memory mode, 0 means using the default.
|
||||||
size_t gpu_page_size;
|
size_t gpu_page_size;
|
||||||
bool enable_experimental_json_serialization {true};
|
bool enable_experimental_json_serialization {true};
|
||||||
@ -64,6 +67,9 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
|
|||||||
.set_default(-1)
|
.set_default(-1)
|
||||||
.set_lower_bound(-1)
|
.set_lower_bound(-1)
|
||||||
.describe("The primary GPU device ordinal.");
|
.describe("The primary GPU device ordinal.");
|
||||||
|
DMLC_DECLARE_FIELD(fail_on_invalid_gpu_id)
|
||||||
|
.set_default(false)
|
||||||
|
.describe("Fail with error when gpu_id is invalid.");
|
||||||
DMLC_DECLARE_FIELD(gpu_page_size)
|
DMLC_DECLARE_FIELD(gpu_page_size)
|
||||||
.set_default(0)
|
.set_default(0)
|
||||||
.set_lower_bound(0)
|
.set_lower_bound(0)
|
||||||
|
|||||||
@ -222,6 +222,10 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
|
|||||||
LOG(WARNING) << "No visible GPU is found, setting `gpu_id` to -1";
|
LOG(WARNING) << "No visible GPU is found, setting `gpu_id` to -1";
|
||||||
}
|
}
|
||||||
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
|
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
|
||||||
|
} else if (fail_on_invalid_gpu_id) {
|
||||||
|
CHECK(gpu_id == kCpuId || gpu_id < n_gpus)
|
||||||
|
<< "Only " << n_gpus << " GPUs are visible, gpu_id "
|
||||||
|
<< gpu_id << " is invalid.";
|
||||||
} else if (gpu_id != kCpuId && gpu_id >= n_gpus) {
|
} else if (gpu_id != kCpuId && gpu_id >= n_gpus) {
|
||||||
LOG(WARNING) << "Only " << n_gpus
|
LOG(WARNING) << "Only " << n_gpus
|
||||||
<< " GPUs are visible, setting `gpu_id` to " << gpu_id % n_gpus;
|
<< " GPUs are visible, setting `gpu_id` to " << gpu_id % n_gpus;
|
||||||
|
|||||||
@ -580,7 +580,7 @@ class GPUPredictor : public xgboost::Predictor {
|
|||||||
Predictor::Predictor{generic_param} {}
|
Predictor::Predictor{generic_param} {}
|
||||||
|
|
||||||
~GPUPredictor() override {
|
~GPUPredictor() override {
|
||||||
if (generic_param_->gpu_id >= 0) {
|
if (generic_param_->gpu_id >= 0 && generic_param_->gpu_id < common::AllVisibleGPUs()) {
|
||||||
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
|
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -52,3 +52,17 @@ class TestGPUBasicModels:
|
|||||||
|
|
||||||
model_0, model_1 = self.run_cls(X, y, False)
|
model_0, model_1 = self.run_cls(X, y, False)
|
||||||
assert model_0 != model_1
|
assert model_0 != model_1
|
||||||
|
|
||||||
|
def test_invalid_gpu_id(self):
|
||||||
|
X = np.random.randn(10, 5) * 1e4
|
||||||
|
y = np.random.randint(0, 2, size=10) * 1e4
|
||||||
|
# should pass with invalid gpu id
|
||||||
|
cls1 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999)
|
||||||
|
cls1.fit(X, y)
|
||||||
|
# should throw error with fail_on_invalid_gpu_id enabled
|
||||||
|
cls2 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999, fail_on_invalid_gpu_id=True)
|
||||||
|
try:
|
||||||
|
cls2.fit(X, y)
|
||||||
|
assert False, "Should have failed with with fail_on_invalid_gpu_id enabled"
|
||||||
|
except xgb.core.XGBoostError as err:
|
||||||
|
assert "gpu_id 9999 is invalid" in str(err)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user