Optionaly fail when gpu_id is set to invalid value (#6342)
This commit is contained in:
parent
38c80bcec4
commit
028ec5f028
@ -11,6 +11,7 @@
|
||||
#include <string>
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
struct GenericParameter : public XGBoostParameter<GenericParameter> {
|
||||
// Constant representing the device ID of CPU.
|
||||
static int32_t constexpr kCpuId = -1;
|
||||
@ -26,6 +27,8 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
|
||||
int nthread;
|
||||
// primary device, -1 means no gpu.
|
||||
int gpu_id;
|
||||
// fail when gpu_id is invalid
|
||||
bool fail_on_invalid_gpu_id {false};
|
||||
// gpu page size in external memory mode, 0 means using the default.
|
||||
size_t gpu_page_size;
|
||||
bool enable_experimental_json_serialization {true};
|
||||
@ -64,6 +67,9 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
|
||||
.set_default(-1)
|
||||
.set_lower_bound(-1)
|
||||
.describe("The primary GPU device ordinal.");
|
||||
DMLC_DECLARE_FIELD(fail_on_invalid_gpu_id)
|
||||
.set_default(false)
|
||||
.describe("Fail with error when gpu_id is invalid.");
|
||||
DMLC_DECLARE_FIELD(gpu_page_size)
|
||||
.set_default(0)
|
||||
.set_lower_bound(0)
|
||||
|
||||
@ -222,6 +222,10 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
|
||||
LOG(WARNING) << "No visible GPU is found, setting `gpu_id` to -1";
|
||||
}
|
||||
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
|
||||
} else if (fail_on_invalid_gpu_id) {
|
||||
CHECK(gpu_id == kCpuId || gpu_id < n_gpus)
|
||||
<< "Only " << n_gpus << " GPUs are visible, gpu_id "
|
||||
<< gpu_id << " is invalid.";
|
||||
} else if (gpu_id != kCpuId && gpu_id >= n_gpus) {
|
||||
LOG(WARNING) << "Only " << n_gpus
|
||||
<< " GPUs are visible, setting `gpu_id` to " << gpu_id % n_gpus;
|
||||
|
||||
@ -580,7 +580,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
Predictor::Predictor{generic_param} {}
|
||||
|
||||
~GPUPredictor() override {
|
||||
if (generic_param_->gpu_id >= 0) {
|
||||
if (generic_param_->gpu_id >= 0 && generic_param_->gpu_id < common::AllVisibleGPUs()) {
|
||||
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
|
||||
}
|
||||
}
|
||||
|
||||
@ -52,3 +52,17 @@ class TestGPUBasicModels:
|
||||
|
||||
model_0, model_1 = self.run_cls(X, y, False)
|
||||
assert model_0 != model_1
|
||||
|
||||
def test_invalid_gpu_id(self):
|
||||
X = np.random.randn(10, 5) * 1e4
|
||||
y = np.random.randint(0, 2, size=10) * 1e4
|
||||
# should pass with invalid gpu id
|
||||
cls1 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999)
|
||||
cls1.fit(X, y)
|
||||
# should throw error with fail_on_invalid_gpu_id enabled
|
||||
cls2 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999, fail_on_invalid_gpu_id=True)
|
||||
try:
|
||||
cls2.fit(X, y)
|
||||
assert False, "Should have failed with with fail_on_invalid_gpu_id enabled"
|
||||
except xgb.core.XGBoostError as err:
|
||||
assert "gpu_id 9999 is invalid" in str(err)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user