diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py index ee0895055..8184d19ea 100644 --- a/python-package/xgboost/dask.py +++ b/python-package/xgboost/dask.py @@ -1062,6 +1062,9 @@ def _maybe_dataframe( prediction, columns=columns, dtype=numpy.float32, index=index ) else: + if prediction.size == 0: + return DataFrame({}, columns=columns, dtype=numpy.float32, index=index) + prediction = DataFrame( prediction, columns=columns, dtype=numpy.float32, index=index ) diff --git a/src/predictor/predictor.cc b/src/predictor/predictor.cc index c4eb5d9db..10d006a83 100644 --- a/src/predictor/predictor.cc +++ b/src/predictor/predictor.cc @@ -85,12 +85,9 @@ void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVectorCopy(*base_margin); } else { - if (out_preds->Empty()) { - out_preds->Resize(n, model.learner_model_param->base_score); - } else { - out_preds->Resize(n); - out_preds->Fill(model.learner_model_param->base_score); - } + out_preds->Resize(n); + // cannot rely on the Resize to fill as it might skip if the size is already correct. + out_preds->Fill(model.learner_model_param->base_score); } } } // namespace xgboost diff --git a/tests/python-gpu/test_gpu_with_dask.py b/tests/python-gpu/test_gpu_with_dask.py index c1795c940..cb8b6a8e7 100644 --- a/tests/python-gpu/test_gpu_with_dask.py +++ b/tests/python-gpu/test_gpu_with_dask.py @@ -7,8 +7,6 @@ import numpy as np import asyncio import xgboost import subprocess -import tempfile -import json from collections import OrderedDict from inspect import signature from hypothesis import given, strategies, settings, note @@ -321,9 +319,9 @@ class TestDistributedGPU: mult = 100 df = cudf.DataFrame( { - "a": [1,2,3,4,5.1] * mult, - "b": [10,15,29.3,30,31] * mult, - "y": [10,20,30,40.,50] * mult, + "a": [1, 2, 3, 4, 5.1] * mult, + "b": [10, 15, 29.3, 30, 31] * mult, + "y": [10, 20, 30, 40., 50] * mult, } ) parameters = {"tree_method": "gpu_hist", "debug_synchronize": True} @@ -350,10 +348,34 @@ class TestDistributedGPU: y = ddf[["y"]] dtrain = dxgb.DaskDeviceQuantileDMatrix(client, X, y) bst = xgb.dask.train(client, parameters, dtrain, evals=[(dtrain, "train")]) - predt = dxgb.predict(client, bst, X).compute().values + predt = dxgb.predict(client, bst, X).compute().values cupy.testing.assert_allclose(predt, predt_empty) + predt = dxgb.predict(client, bst, dtrain).compute() + cupy.testing.assert_allclose(predt, predt_empty) + + predt = dxgb.inplace_predict(client, bst, X).compute().values + cupy.testing.assert_allclose(predt, predt_empty) + + df = df.to_pandas() + empty = df.iloc[:0] + ddf = dd.concat( + [dd.from_pandas(empty, npartitions=1)] + + [dd.from_pandas(df, npartitions=3)] + + [dd.from_pandas(df, npartitions=3)] + ) + X = ddf[ddf.columns.difference(["y"])] + y = ddf[["y"]] + + predt_empty = cupy.asnumpy(predt_empty) + + predt = dxgb.predict(client, bst_empty, X).compute().values + np.testing.assert_allclose(predt, predt_empty) + + in_predt = dxgb.inplace_predict(client, bst_empty, X).compute().values + np.testing.assert_allclose(predt, in_predt) + def test_empty_dmatrix_auc(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: n_workers = len(_get_client_workers(client))