From 3ab8f0b13d78c96d6b77ba1dd1bb0fb86a6105d2 Mon Sep 17 00:00:00 2001 From: Titouan Lorieul Date: Wed, 4 May 2016 12:14:11 +0200 Subject: [PATCH] [py] added apply function in sklearn API to return the predicted leaves --- python-package/xgboost/sklearn.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 2fe4e2ee3..524758ae0 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -268,6 +268,29 @@ class XGBModel(XGBModelBase): output_margin=output_margin, ntree_limit=ntree_limit) + def apply(self, X, ntree_limit=0): + """Return the predicted leaf every tree for each sample. + + Parameters + ---------- + X : array_like, shape=[n_samples, n_features] + Input features matrix. + + ntree_limit : int + Limit number of trees in the prediction; defaults to 0 (use all trees). + + Returns + ------- + X_leaves : array_like, shape=[n_samples, n_trees] + For each datapoint x in X and for each tree, return the index of the + leaf x ends up in. Leaves are numbered within + ``[0; 2**(self.max_depth+1))``, possibly with gaps in the numbering. + """ + test_dmatrix = DMatrix(X, missing=self.missing) + return self.booster().predict(test_dmatrix, + pred_leaf=True, + ntree_limit=ntree_limit) + def evals_result(self): """Return the evaluation results.