diff --git a/demo/dask/gpu_training.py b/demo/dask/gpu_training.py index c6530d886..853710e31 100644 --- a/demo/dask/gpu_training.py +++ b/demo/dask/gpu_training.py @@ -32,6 +32,7 @@ def main(client): # you can pass output directly into `predict` too. prediction = xgb.dask.predict(client, bst, dtrain) + prediction = prediction.compute() print('Evaluation history:', history) return prediction diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 6a74e0733..016344062 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -1125,7 +1125,6 @@ class Booster(object): _check_call(_LIB.XGBoosterLoadModelFromBuffer(handle, ptr, length)) state['handle'] = handle self.__dict__.update(state) - self.set_param({'seed': 0}) def __copy__(self): return self.__deepcopy__(None) diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py index 80cb47c07..5a465412f 100644 --- a/python-package/xgboost/dask.py +++ b/python-package/xgboost/dask.py @@ -395,6 +395,10 @@ def train(client, params, dtrain, *args, evals=(), **kwargs): def predict(client, model, data, *args): '''Run prediction with a trained booster. + .. note:: + + Only default prediction mode is supported right now. + Parameters ---------- client: dask.distributed.Client @@ -445,8 +449,8 @@ def predict(client, model, data, *args): '''Get shape of data in each worker.''' logging.info('Trying to get data shape on %d', worker_id) worker = distributed_get_worker() - rows, cols = data.get_worker_data_shape(worker) - return rows, cols + rows, _ = data.get_worker_data_shape(worker) + return rows, 1 # default is 1 # Constructing a dask array from list of numpy arrays # See https://docs.dask.org/en/latest/array-creation.html @@ -457,7 +461,7 @@ def predict(client, model, data, *args): shapes = client.gather(futures_shape) arrays = [] for i in range(len(futures_shape)): - arrays.append(da.from_delayed(futures[i], shape=shapes[i], + arrays.append(da.from_delayed(futures[i], shape=(shapes[i][0], ), dtype=numpy.float32)) predictions = da.concatenate(arrays, axis=0) return predictions diff --git a/tests/python-gpu/test_gpu_with_dask.py b/tests/python-gpu/test_gpu_with_dask.py index ce0beb119..c6a3f8407 100644 --- a/tests/python-gpu/test_gpu_with_dask.py +++ b/tests/python-gpu/test_gpu_with_dask.py @@ -40,3 +40,6 @@ def test_dask_dataframe(client): assert isinstance(out['booster'], dxgb.Booster) assert len(out['history']['X']['rmse']) == 2 + + predictions = dxgb.predict(out, dtrain) + predictions = predictions.compute() diff --git a/tests/python/test_pickling.py b/tests/python/test_pickling.py new file mode 100644 index 000000000..1497688d2 --- /dev/null +++ b/tests/python/test_pickling.py @@ -0,0 +1,48 @@ +import pickle +import numpy as np +import xgboost as xgb +import os + + +kRows = 100 +kCols = 10 + + +def generate_data(): + X = np.random.randn(kRows, kCols) + y = np.random.randn(kRows) + return X, y + + +def test_model_pickling(): + xgb_params = { + 'verbosity': 0, + 'nthread': 1, + 'tree_method': 'hist' + } + + X, y = generate_data() + dtrain = xgb.DMatrix(X, y) + bst = xgb.train(xgb_params, dtrain) + + dump_0 = bst.get_dump(dump_format='json') + assert dump_0 + + filename = 'model.pkl' + + with open(filename, 'wb') as fd: + pickle.dump(bst, fd) + + with open(filename, 'rb') as fd: + bst = pickle.load(fd) + + with open(filename, 'wb') as fd: + pickle.dump(bst, fd) + + with open(filename, 'rb') as fd: + bst = pickle.load(fd) + + assert bst.get_dump(dump_format='json') == dump_0 + + if os.path.exists(filename): + os.remove(filename) diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py index 01b8c3d3f..f2ad44080 100644 --- a/tests/python/test_with_dask.py +++ b/tests/python/test_with_dask.py @@ -43,14 +43,17 @@ def test_from_dask_dataframe(client): prediction = xgb.dask.predict(client, model=booster, data=dtrain) + assert prediction.ndim == 1 assert isinstance(prediction, da.Array) - assert prediction.shape[0] == kRows and prediction.shape[1] == kCols + assert prediction.shape[0] == kRows with pytest.raises(ValueError): # evals_result is not supported in dask interface. xgb.dask.train( client, {}, dtrain, num_boost_round=2, evals_result={}) + prediction = prediction.compute() # force prediction to be computed + def test_from_dask_array(client): X, y = generate_array() @@ -59,10 +62,12 @@ def test_from_dask_array(client): result = xgb.dask.train(client, {}, dtrain) prediction = xgb.dask.predict(client, result, dtrain) - assert prediction.shape[0] == kRows and prediction.shape[1] == kCols + assert prediction.shape[0] == kRows assert isinstance(prediction, da.Array) + prediction = prediction.compute() # force prediction to be computed + def test_regressor(client): X, y = generate_array() @@ -72,7 +77,8 @@ def test_regressor(client): regressor.fit(X, y, eval_set=[(X, y)]) prediction = regressor.predict(X) - assert prediction.shape[0] == kRows and prediction.shape[1] == kCols + assert prediction.ndim == 1 + assert prediction.shape[0] == kRows history = regressor.evals_result() @@ -91,7 +97,8 @@ def test_classifier(client): classifier.fit(X, y, eval_set=[(X, y)]) prediction = classifier.predict(X) - assert prediction.shape[0] == kRows and prediction.shape[1] == kCols + assert prediction.ndim == 1 + assert prediction.shape[0] == kRows history = classifier.evals_result()