Fix dask prediction. (#4941)

* Fix dask prediction. * Add better error messages for wrong partition.
2019-10-14 23:19:34 -04:00
parent b61d534472
commit 2ebdec8aa6
5 changed files with 51 additions and 24 deletions
--- a/demo/dask/cpu_training.py
+++ b/demo/dask/cpu_training.py
@@ -7,11 +7,10 @@ from dask import array as da

 def main(client):
    # generate some random data for demonstration
-    n = 100
    m = 100000
-    partition_size = 1000
-    X = da.random.random((m, n), partition_size)
-    y = da.random.random(m, partition_size)
+    n = 100
+    X = da.random.random(size=(m, n), chunks=100)
+    y = da.random.random(size=(m, ), chunks=100)

    # DaskDMatrix acts like normal DMatrix, works as a proxy for local
    # DMatrix scatter around workers.
@@ -38,6 +37,6 @@ def main(client):

 if __name__ == '__main__':
    # or use other clusters for scaling
-    with LocalCluster(n_workers=4, threads_per_worker=1) as cluster:
+    with LocalCluster(n_workers=7, threads_per_worker=1) as cluster:
        with Client(cluster) as client:
            main(client)
--- a/demo/dask/gpu_training.py
+++ b/demo/dask/gpu_training.py
@@ -6,11 +6,11 @@ from xgboost.dask import DaskDMatrix


 def main(client):
-    n = 100
+    # generate some random data for demonstration
    m = 100000
-    partition_size = 1000
-    X = da.random.random((m, n), partition_size)
-    y = da.random.random(m, partition_size)
+    n = 100
+    X = da.random.random(size=(m, n), chunks=100)
+    y = da.random.random(size=(m, ), chunks=100)

    # DaskDMatrix acts like normal DMatrix, works as a proxy for local
    # DMatrix scatter around workers.
@@ -23,6 +23,7 @@ def main(client):
    output = xgb.dask.train(client,
                            {'verbosity': 2,
                             'nthread': 1,
+                             # Golden line for GPU training
                             'tree_method': 'gpu_hist'},
                            dtrain,
                            num_boost_round=4, evals=[(dtrain, 'train')])