[dask] Fix nthread config with dask sklearn wrapper. (#7633)

This commit is contained in:
Jiaming Yuan 2022-02-08 06:38:32 +08:00 committed by GitHub
parent d152c59a9c
commit 3e693e4f97
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 8 additions and 4 deletions

View File

@ -944,7 +944,7 @@ async def _train_async(
LOGGER.info("Overriding `nthreads` defined in dask worker.")
n_threads = local_param[p]
break
if n_threads == 0:
if n_threads == 0 or n_threads is None:
n_threads = worker.nthreads
local_param.update({"nthread": n_threads, "n_jobs": n_threads})

View File

@ -563,10 +563,13 @@ def run_dask_classifier(
assert list(history.keys())[0] == "validation_0"
assert list(history["validation_0"].keys())[0] == metric
assert len(list(history["validation_0"])) == 1
config = json.loads(classifier.get_booster().save_config())
n_threads = int(config["learner"]["generic_param"]["nthread"])
assert n_threads != 0 and n_threads != os.cpu_count()
forest = int(
json.loads(classifier.get_booster().save_config())["learner"][
"gradient_booster"
]["gbtree_train_param"]["num_parallel_tree"]
config["learner"]["gradient_booster"]["gbtree_train_param"]["num_parallel_tree"]
)
if model == "boosting":
assert len(history["validation_0"][metric]) == 2
@ -591,6 +594,7 @@ def run_dask_classifier(
np.testing.assert_allclose(single_node_proba, probas)
else:
import cupy
cupy.testing.assert_allclose(single_node_proba, probas)
# Test with dataframe, not shared with GPU as cupy doesn't work well with da.unique.