Document for device ordinal. (#9398)

- Rewrite GPU demos. notebook is converted to script to avoid committing additional png plots. - Add GPU demos into the sphinx gallery. - Add RMM demos into the sphinx gallery. - Test for firing threads with different device ordinals.
2023-07-22 15:26:29 +08:00
parent 22b0a55a04
commit 275da176ba
32 changed files with 351 additions and 398 deletions
--- a/tests/python-gpu/test_from_cupy.py
+++ b/tests/python-gpu/test_from_cupy.py
@@ -234,7 +234,7 @@ Arrow specification.'''
        cp.cuda.runtime.setDevice(0)
        dtrain = dmatrix_from_cupy(np.float32, xgb.QuantileDMatrix, np.nan)
        with pytest.raises(
-            xgb.core.XGBoostError, match="Data is resided on a different device"
+            xgb.core.XGBoostError, match="Invalid device ordinal"
        ):
            xgb.train(
                {'tree_method': 'gpu_hist', 'gpu_id': 1}, dtrain, num_boost_round=10
--- a/tests/python-gpu/test_gpu_with_sklearn.py
+++ b/tests/python-gpu/test_gpu_with_sklearn.py
@@ -2,6 +2,7 @@ import json
 import os
 import sys
 import tempfile
+from concurrent.futures import ThreadPoolExecutor

 import numpy as np
 import pytest
@@ -23,18 +24,19 @@ def test_gpu_binary_classification():
    from sklearn.model_selection import KFold

    digits = load_digits(n_class=2)
-    y = digits['target']
-    X = digits['data']
+    y = digits["target"]
+    X = digits["data"]
    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
    for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):
        for train_index, test_index in kf.split(X, y):
            xgb_model = cls(
-                random_state=42, tree_method='gpu_hist',
-                n_estimators=4, gpu_id='0').fit(X[train_index], y[train_index])
+                random_state=42, tree_method="gpu_hist", n_estimators=4, gpu_id="0"
+            ).fit(X[train_index], y[train_index])
            preds = xgb_model.predict(X[test_index])
            labels = y[test_index]
-            err = sum(1 for i in range(len(preds))
-                      if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
+            err = sum(
+                1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
+            ) / float(len(preds))
            assert err < 0.1


@@ -133,7 +135,7 @@ def test_classififer():
    X, y = load_digits(return_X_y=True)
    y *= 10

-    clf = xgb.XGBClassifier(tree_method="gpu_hist", n_estimators=1)
+    clf = xgb.XGBClassifier(tree_method="hist", n_estimators=1, device="cuda")

    # numpy
    with pytest.raises(ValueError, match=r"Invalid classes.*"):
@@ -161,3 +163,46 @@ def test_ranking_qid_df():
    import cudf

    run_ranking_qid_df(cudf, "gpu_hist")
+
+
+@pytest.mark.skipif(**tm.no_cupy())
+@pytest.mark.mgpu
+def test_device_ordinal() -> None:
+    import cupy as cp
+
+    n_devices = 2
+
+    def worker(ordinal: int, correct_ordinal: bool) -> None:
+        if correct_ordinal:
+            cp.cuda.runtime.setDevice(ordinal)
+        else:
+            cp.cuda.runtime.setDevice((ordinal + 1) % n_devices)
+
+        X, y, w = tm.make_regression(4096, 12, use_cupy=True)
+        reg = xgb.XGBRegressor(device=f"cuda:{ordinal}", tree_method="hist")
+
+        if correct_ordinal:
+            reg.fit(
+                X, y, sample_weight=w, eval_set=[(X, y)], sample_weight_eval_set=[w]
+            )
+            assert tm.non_increasing(reg.evals_result()["validation_0"]["rmse"])
+            return
+
+        with pytest.raises(ValueError, match="Invalid device ordinal"):
+            reg.fit(
+                X, y, sample_weight=w, eval_set=[(X, y)], sample_weight_eval_set=[w]
+            )
+
+    with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
+        futures = []
+        n_trials = 32
+        for i in range(n_trials):
+            fut = executor.submit(
+                worker, ordinal=i % n_devices, correct_ordinal=i % 3 != 0
+            )
+            futures.append(fut)
+
+        for fut in futures:
+            fut.result()
+
+    cp.cuda.runtime.setDevice(0)