Improve doc and demo for dask. (#4907)

* Add a readme with link to doc.
* Add more comments in the demonstrations code.
* Workaround https://github.com/dask/distributed/issues/3081 .
This commit is contained in:
Jiaming Yuan
2019-09-30 23:59:37 -04:00
committed by GitHub
parent d30e63a0a5
commit 7e24a8d245
5 changed files with 54 additions and 17 deletions

View File

@@ -8,18 +8,18 @@ from dask_cuda import LocalCUDACluster
from dask import array as da
import xgboost
if __name__ == '__main__':
cluster = LocalCUDACluster()
client = Client(cluster)
def main(client):
# generate some random data for demonstration
n = 100
m = 1000000
partition_size = 10000
X = da.random.random((m, n), partition_size)
y = da.random.random(m, partition_size)
regressor = xgboost.dask.DaskXGBRegressor(verbosity=2)
regressor = xgboost.dask.DaskXGBRegressor(verbosity=1)
regressor.set_params(tree_method='gpu_hist')
# assigning client here is optional
regressor.client = client
regressor.fit(X, y, eval_set=[(X, y)])
@@ -29,3 +29,14 @@ if __name__ == '__main__':
history = regressor.evals_result()
print('Evaluation history:', history)
# returned prediction is always a dask array.
assert isinstance(prediction, da.Array)
return bst # returning the trained model
if __name__ == '__main__':
# With dask cuda, one can scale up XGBoost to arbitrary GPU clusters.
# `LocalCUDACluster` used here is only for demonstration purpose.
with LocalCUDACluster() as cluster:
with Client(cluster) as client:
main(client)