Don't set_params at the end of set_state. (#4947)

* Don't set_params at the end of set_state. * Also fix another issue found in dask prediction. * Add note about prediction. Don't support other prediction modes at the moment.
2019-10-15 10:08:26 -04:00 · 2019-10-15 10:08:26 -04:00 · 7e72a12871
commit 7e72a12871
parent 2ebdec8aa6
6 changed files with 70 additions and 8 deletions
--- a/demo/dask/gpu_training.py
+++ b/demo/dask/gpu_training.py
@ -32,6 +32,7 @@ def main(client):

    # you can pass output directly into `predict` too.
    prediction = xgb.dask.predict(client, bst, dtrain)
+    prediction = prediction.compute()
    print('Evaluation history:', history)
    return prediction

--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@ -1125,7 +1125,6 @@ class Booster(object):
            _check_call(_LIB.XGBoosterLoadModelFromBuffer(handle, ptr, length))
            state['handle'] = handle
        self.__dict__.update(state)
-        self.set_param({'seed': 0})

    def __copy__(self):
        return self.__deepcopy__(None)
--- a/python-package/xgboost/dask.py
+++ b/python-package/xgboost/dask.py
@ -395,6 +395,10 @@ def train(client, params, dtrain, *args, evals=(), **kwargs):
 def predict(client, model, data, *args):
    '''Run prediction with a trained booster.

+      .. note::
+
+          Only default prediction mode is supported right now.
+
    Parameters
    ----------
    client: dask.distributed.Client
@ -445,8 +449,8 @@ def predict(client, model, data, *args):
        '''Get shape of data in each worker.'''
        logging.info('Trying to get data shape on %d', worker_id)
        worker = distributed_get_worker()
-        rows, cols = data.get_worker_data_shape(worker)
-        return rows, cols
+        rows, _ = data.get_worker_data_shape(worker)
+        return rows, 1          # default is 1

    # Constructing a dask array from list of numpy arrays
    # See https://docs.dask.org/en/latest/array-creation.html
@ -457,7 +461,7 @@ def predict(client, model, data, *args):
    shapes = client.gather(futures_shape)
    arrays = []
    for i in range(len(futures_shape)):
-        arrays.append(da.from_delayed(futures[i], shape=shapes[i],
+        arrays.append(da.from_delayed(futures[i], shape=(shapes[i][0], ),
                                      dtype=numpy.float32))
    predictions = da.concatenate(arrays, axis=0)
    return predictions
--- a/tests/python-gpu/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask.py
@ -40,3 +40,6 @@ def test_dask_dataframe(client):

    assert isinstance(out['booster'], dxgb.Booster)
    assert len(out['history']['X']['rmse']) == 2
+
+    predictions = dxgb.predict(out, dtrain)
+    predictions = predictions.compute()
--- a/tests/python/test_pickling.py
+++ b/tests/python/test_pickling.py
@ -0,0 +1,48 @@
+import pickle
+import numpy as np
+import xgboost as xgb
+import os
+
+
+kRows = 100
+kCols = 10
+
+
+def generate_data():
+    X = np.random.randn(kRows, kCols)
+    y = np.random.randn(kRows)
+    return X, y
+
+
+def test_model_pickling():
+    xgb_params = {
+        'verbosity': 0,
+        'nthread': 1,
+        'tree_method': 'hist'
+    }
+
+    X, y = generate_data()
+    dtrain = xgb.DMatrix(X, y)
+    bst = xgb.train(xgb_params, dtrain)
+
+    dump_0 = bst.get_dump(dump_format='json')
+    assert dump_0
+
+    filename = 'model.pkl'
+
+    with open(filename, 'wb') as fd:
+        pickle.dump(bst, fd)
+
+    with open(filename, 'rb') as fd:
+        bst = pickle.load(fd)
+
+    with open(filename, 'wb') as fd:
+        pickle.dump(bst, fd)
+
+    with open(filename, 'rb') as fd:
+        bst = pickle.load(fd)
+
+    assert bst.get_dump(dump_format='json') == dump_0
+
+    if os.path.exists(filename):
+        os.remove(filename)
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@ -43,14 +43,17 @@ def test_from_dask_dataframe(client):

    prediction = xgb.dask.predict(client, model=booster, data=dtrain)

+    assert prediction.ndim == 1
    assert isinstance(prediction, da.Array)
-    assert prediction.shape[0] == kRows and prediction.shape[1] == kCols
+    assert prediction.shape[0] == kRows

    with pytest.raises(ValueError):
        # evals_result is not supported in dask interface.
        xgb.dask.train(
            client, {}, dtrain, num_boost_round=2, evals_result={})

+    prediction = prediction.compute()  # force prediction to be computed
+

 def test_from_dask_array(client):
    X, y = generate_array()
@ -59,10 +62,12 @@ def test_from_dask_array(client):
    result = xgb.dask.train(client, {}, dtrain)

    prediction = xgb.dask.predict(client, result, dtrain)
-    assert prediction.shape[0] == kRows and prediction.shape[1] == kCols
+    assert prediction.shape[0] == kRows

    assert isinstance(prediction, da.Array)

+    prediction = prediction.compute()  # force prediction to be computed
+

 def test_regressor(client):
    X, y = generate_array()
@ -72,7 +77,8 @@ def test_regressor(client):
    regressor.fit(X, y, eval_set=[(X, y)])
    prediction = regressor.predict(X)

-    assert prediction.shape[0] == kRows and prediction.shape[1] == kCols
+    assert prediction.ndim == 1
+    assert prediction.shape[0] == kRows

    history = regressor.evals_result()

@ -91,7 +97,8 @@ def test_classifier(client):
    classifier.fit(X, y,  eval_set=[(X, y)])
    prediction = classifier.predict(X)

-    assert prediction.shape[0] == kRows and prediction.shape[1] == kCols
+    assert prediction.ndim == 1
+    assert prediction.shape[0] == kRows

    history = classifier.evals_result()