added the max_features parameter to the plot_importance function. (#1963)

* added the max_features parameter to the plot_importance function. * renamed max_features parameter to max_num_features for better understanding * removed unwanted character in docstring
2017-01-16 23:49:47 +01:00
parent 49ff7c1649
commit a7d2833766
1 changed files with 8 additions and 2 deletions
--- a/python-package/xgboost/plotting.py
+++ b/python-package/xgboost/plotting.py
@@ -14,7 +14,7 @@ from .sklearn import XGBModel
 def plot_importance(booster, ax=None, height=0.2,
                    xlim=None, ylim=None, title='Feature importance',
                    xlabel='F score', ylabel='Features',
-                    importance_type='weight',
+                    importance_type='weight', max_num_features=None,
                    grid=True, **kwargs):

    """Plot importance based on fitted trees.
@@ -31,6 +31,8 @@ def plot_importance(booster, ax=None, height=0.2,
        "gain" is the average gain of splits which use the feature
        "cover" is the average coverage of splits which use the feature
            where coverage is defined as the number of samples affected by the split
+    max_num_features : int, default None
+        Maximum number of top features displayed on plot. If None, all features will be displayed.
    height : float, default 0.2
        Bar height, passed to ax.barh()
    xlim : tuple, default None
@@ -69,7 +71,11 @@ def plot_importance(booster, ax=None, height=0.2,
        raise ValueError('Booster.get_score() results in empty')

    tuples = [(k, importance[k]) for k in importance]
-    tuples = sorted(tuples, key=lambda x: x[1])
+    if max_num_features is not None:
+        tuples = sorted(tuples, key=lambda x: x[1])[-max_num_features:]
+        ylim = (-1, max_num_features)
+    else:
+        tuples = sorted(tuples, key=lambda x: x[1])
    labels, values = zip(*tuples)

    if ax is None: