Document for device ordinal. (#9398)

- Rewrite GPU demos. notebook is converted to script to avoid committing additional png plots.
- Add GPU demos into the sphinx gallery.
- Add RMM demos into the sphinx gallery.
- Test for firing threads with different device ordinals.
This commit is contained in:
Jiaming Yuan
2023-07-22 15:26:29 +08:00
committed by GitHub
parent 22b0a55a04
commit 275da176ba
32 changed files with 351 additions and 398 deletions

View File

@@ -234,7 +234,7 @@ Arrow specification.'''
cp.cuda.runtime.setDevice(0)
dtrain = dmatrix_from_cupy(np.float32, xgb.QuantileDMatrix, np.nan)
with pytest.raises(
xgb.core.XGBoostError, match="Data is resided on a different device"
xgb.core.XGBoostError, match="Invalid device ordinal"
):
xgb.train(
{'tree_method': 'gpu_hist', 'gpu_id': 1}, dtrain, num_boost_round=10

View File

@@ -2,6 +2,7 @@ import json
import os
import sys
import tempfile
from concurrent.futures import ThreadPoolExecutor
import numpy as np
import pytest
@@ -23,18 +24,19 @@ def test_gpu_binary_classification():
from sklearn.model_selection import KFold
digits = load_digits(n_class=2)
y = digits['target']
X = digits['data']
y = digits["target"]
X = digits["data"]
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):
for train_index, test_index in kf.split(X, y):
xgb_model = cls(
random_state=42, tree_method='gpu_hist',
n_estimators=4, gpu_id='0').fit(X[train_index], y[train_index])
random_state=42, tree_method="gpu_hist", n_estimators=4, gpu_id="0"
).fit(X[train_index], y[train_index])
preds = xgb_model.predict(X[test_index])
labels = y[test_index]
err = sum(1 for i in range(len(preds))
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
err = sum(
1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
) / float(len(preds))
assert err < 0.1
@@ -133,7 +135,7 @@ def test_classififer():
X, y = load_digits(return_X_y=True)
y *= 10
clf = xgb.XGBClassifier(tree_method="gpu_hist", n_estimators=1)
clf = xgb.XGBClassifier(tree_method="hist", n_estimators=1, device="cuda")
# numpy
with pytest.raises(ValueError, match=r"Invalid classes.*"):
@@ -161,3 +163,46 @@ def test_ranking_qid_df():
import cudf
run_ranking_qid_df(cudf, "gpu_hist")
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_device_ordinal() -> None:
import cupy as cp
n_devices = 2
def worker(ordinal: int, correct_ordinal: bool) -> None:
if correct_ordinal:
cp.cuda.runtime.setDevice(ordinal)
else:
cp.cuda.runtime.setDevice((ordinal + 1) % n_devices)
X, y, w = tm.make_regression(4096, 12, use_cupy=True)
reg = xgb.XGBRegressor(device=f"cuda:{ordinal}", tree_method="hist")
if correct_ordinal:
reg.fit(
X, y, sample_weight=w, eval_set=[(X, y)], sample_weight_eval_set=[w]
)
assert tm.non_increasing(reg.evals_result()["validation_0"]["rmse"])
return
with pytest.raises(ValueError, match="Invalid device ordinal"):
reg.fit(
X, y, sample_weight=w, eval_set=[(X, y)], sample_weight_eval_set=[w]
)
with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
futures = []
n_trials = 32
for i in range(n_trials):
fut = executor.submit(
worker, ordinal=i % n_devices, correct_ordinal=i % 3 != 0
)
futures.append(fut)
for fut in futures:
fut.result()
cp.cuda.runtime.setDevice(0)