diff --git a/doc/prediction.rst b/doc/prediction.rst index 853254bba..39ca3f739 100644 --- a/doc/prediction.rst +++ b/doc/prediction.rst @@ -54,8 +54,8 @@ After 1.4 release, we added a new parameter called ``strict_shape``, one can set Output is a 4-dim array with ``(n_samples, n_iterations, n_classes, n_trees_in_forest)`` as shape. ``n_trees_in_forest`` is specified by the ``numb_parallel_tree`` during training. When strict shape is set to False, output is a 2-dim array with last 3 dims - concatenated into 1. When using ``apply`` method in scikit learn interface, this is set - to False by default. + concatenated into 1. Also the last dimension is dropped if it eqauls to 1. When using + ``apply`` method in scikit learn interface, this is set to False by default. Other than these prediction types, there's also a parameter called ``iteration_range``, diff --git a/src/c_api/c_api_utils.h b/src/c_api/c_api_utils.h index ecf5b1f34..918830945 100644 --- a/src/c_api/c_api_utils.h +++ b/src/c_api/c_api_utils.h @@ -96,6 +96,10 @@ inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows forest = std::max(static_cast(1), forest); shape[3] = forest; *out_dim = shape.size(); + } else if (chunksize == 1) { + *out_dim = 1; + shape.resize(*out_dim); + shape.front() = rows; } else { *out_dim = 2; shape.resize(*out_dim); diff --git a/tests/python/test_predict.py b/tests/python/test_predict.py index 7562fc609..d451cd831 100644 --- a/tests/python/test_predict.py +++ b/tests/python/test_predict.py @@ -75,6 +75,11 @@ def run_predict_leaf(predictor): first = sliced[0, ...] assert np.prod(first.shape) == classes * num_parallel_tree * ntree_limit + + # When there's only 1 tree, the output is a 1 dim vector + booster = xgb.train({"tree_method": "hist"}, num_boost_round=1, dtrain=m) + assert booster.predict(m, pred_leaf=True).shape == (rows, ) + return leaf