Ensure predict leaf output 1-dim vector where there's only 1 tree. (#6889)

This commit is contained in:
Jiaming Yuan 2021-04-23 15:07:48 +08:00 committed by GitHub
parent 54afa3ac7a
commit 8760ec4827
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 11 additions and 2 deletions

View File

@ -54,8 +54,8 @@ After 1.4 release, we added a new parameter called ``strict_shape``, one can set
Output is a 4-dim array with ``(n_samples, n_iterations, n_classes, n_trees_in_forest)``
as shape. ``n_trees_in_forest`` is specified by the ``numb_parallel_tree`` during
training. When strict shape is set to False, output is a 2-dim array with last 3 dims
concatenated into 1. When using ``apply`` method in scikit learn interface, this is set
to False by default.
concatenated into 1. Also the last dimension is dropped if it eqauls to 1. When using
``apply`` method in scikit learn interface, this is set to False by default.
Other than these prediction types, there's also a parameter called ``iteration_range``,

View File

@ -96,6 +96,10 @@ inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows
forest = std::max(static_cast<decltype(forest)>(1), forest);
shape[3] = forest;
*out_dim = shape.size();
} else if (chunksize == 1) {
*out_dim = 1;
shape.resize(*out_dim);
shape.front() = rows;
} else {
*out_dim = 2;
shape.resize(*out_dim);

View File

@ -75,6 +75,11 @@ def run_predict_leaf(predictor):
first = sliced[0, ...]
assert np.prod(first.shape) == classes * num_parallel_tree * ntree_limit
# When there's only 1 tree, the output is a 1 dim vector
booster = xgb.train({"tree_method": "hist"}, num_boost_round=1, dtrain=m)
assert booster.predict(m, pred_leaf=True).shape == (rows, )
return leaf