Improve doc and demo for dask. (#4907)

* Add a readme with link to doc. * Add more comments in the demonstrations code. * Workaround https://github.com/dask/distributed/issues/3081 .
2019-09-30 23:59:37 -04:00
parent d30e63a0a5
commit 7e24a8d245
5 changed files with 54 additions and 17 deletions
--- a/demo/dask/sklearn_gpu_training.py
+++ b/demo/dask/sklearn_gpu_training.py
@@ -8,18 +8,18 @@ from dask_cuda import LocalCUDACluster
 from dask import array as da
 import xgboost

-if __name__ == '__main__':
-    cluster = LocalCUDACluster()
-    client = Client(cluster)

+def main(client):
+    # generate some random data for demonstration
    n = 100
    m = 1000000
    partition_size = 10000
    X = da.random.random((m, n), partition_size)
    y = da.random.random(m, partition_size)

-    regressor = xgboost.dask.DaskXGBRegressor(verbosity=2)
+    regressor = xgboost.dask.DaskXGBRegressor(verbosity=1)
    regressor.set_params(tree_method='gpu_hist')
+    # assigning client here is optional
    regressor.client = client

    regressor.fit(X, y, eval_set=[(X, y)])
@@ -29,3 +29,14 @@ if __name__ == '__main__':
    history = regressor.evals_result()

    print('Evaluation history:', history)
+    # returned prediction is always a dask array.
+    assert isinstance(prediction, da.Array)
+    return bst                  # returning the trained model
+
+
+if __name__ == '__main__':
+    # With dask cuda, one can scale up XGBoost to arbitrary GPU clusters.
+    # `LocalCUDACluster` used here is only for demonstration purpose.
+    with LocalCUDACluster() as cluster:
+        with Client(cluster) as client:
+            main(client)