[dask] Fix nthread config with dask sklearn wrapper. (#7633)

2022-02-08 06:38:32 +08:00 · 2022-02-08 06:38:32 +08:00 · 3e693e4f97
commit 3e693e4f97
parent d152c59a9c
2 changed files with 8 additions and 4 deletions
--- a/python-package/xgboost/dask.py
+++ b/python-package/xgboost/dask.py
@ -944,7 +944,7 @@ async def _train_async(
                LOGGER.info("Overriding `nthreads` defined in dask worker.")
                n_threads = local_param[p]
                break
-        if n_threads == 0:
+        if n_threads == 0 or n_threads is None:
            n_threads = worker.nthreads
        local_param.update({"nthread": n_threads, "n_jobs": n_threads})

--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@ -563,10 +563,13 @@ def run_dask_classifier(
    assert list(history.keys())[0] == "validation_0"
    assert list(history["validation_0"].keys())[0] == metric
    assert len(list(history["validation_0"])) == 1
+
+    config = json.loads(classifier.get_booster().save_config())
+    n_threads = int(config["learner"]["generic_param"]["nthread"])
+    assert n_threads != 0 and n_threads != os.cpu_count()
+
    forest = int(
-        json.loads(classifier.get_booster().save_config())["learner"][
-            "gradient_booster"
-        ]["gbtree_train_param"]["num_parallel_tree"]
+        config["learner"]["gradient_booster"]["gbtree_train_param"]["num_parallel_tree"]
    )
    if model == "boosting":
        assert len(history["validation_0"][metric]) == 2
@ -591,6 +594,7 @@ def run_dask_classifier(
            np.testing.assert_allclose(single_node_proba, probas)
        else:
            import cupy
+
            cupy.testing.assert_allclose(single_node_proba, probas)

    # Test with dataframe, not shared with GPU as cupy doesn't work well with da.unique.