Separate dependencies and lightweight test env for Python

2016-02-28 20:09:09 -06:00 · 2016-02-28 20:09:09 -06:00 · 803a6fe474
commit 803a6fe474
parent 5f70b4df7a
11 changed files with 301 additions and 286 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -15,6 +15,7 @@ env:
    - TASK=r_test
    # python package test
    - TASK=python_test
    - TASK=python_lightweight_test
    # java package test
    - TASK=java_test
--- a/python-package/xgboost/plotting.py
+++ b/python-package/xgboost/plotting.py
@ -42,7 +42,7 @@ def plot_importance(booster, ax=None, height=0.2,
    -------
    ax : matplotlib Axes
    """
-
+    # TODO: move this to compat.py
    try:
        import matplotlib.pyplot as plt
    except ImportError:
--- a/tests/python/test_basic.py
+++ b/tests/python/test_basic.py
@ -3,10 +3,6 @@ import numpy as np
 import xgboost as xgb
 import unittest
 import matplotlib
 matplotlib.use('Agg')
 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)
@ -102,86 +98,6 @@ class TestBasic(unittest.TestCase):
            dm = xgb.DMatrix(dummy, feature_names=list('abcde'))
            self.assertRaises(ValueError, bst.predict, dm)
    def test_pandas(self):
        import pandas as pd
        df = pd.DataFrame([[1, 2., True], [2, 3., False]], columns=['a', 'b', 'c'])
        dm = xgb.DMatrix(df, label=pd.Series([1, 2]))
        assert dm.feature_names == ['a', 'b', 'c']
        assert dm.feature_types == ['int', 'float', 'i']
        assert dm.num_row() == 2
        assert dm.num_col() == 3
        # overwrite feature_names and feature_types
        dm = xgb.DMatrix(df, label=pd.Series([1, 2]),
                         feature_names=['x', 'y', 'z'], feature_types=['q', 'q', 'q'])
        assert dm.feature_names == ['x', 'y', 'z']
        assert dm.feature_types == ['q', 'q', 'q']
        assert dm.num_row() == 2
        assert dm.num_col() == 3
        # incorrect dtypes
        df = pd.DataFrame([[1, 2., 'x'], [2, 3., 'y']], columns=['a', 'b', 'c'])
        self.assertRaises(ValueError, xgb.DMatrix, df)
        # numeric columns
        df = pd.DataFrame([[1, 2., True], [2, 3., False]])
        dm = xgb.DMatrix(df, label=pd.Series([1, 2]))
        assert dm.feature_names == ['0', '1', '2']
        assert dm.feature_types == ['int', 'float', 'i']
        assert dm.num_row() == 2
        assert dm.num_col() == 3
        df = pd.DataFrame([[1, 2., 1], [2, 3., 1]], columns=[4, 5, 6])
        dm = xgb.DMatrix(df, label=pd.Series([1, 2]))
        assert dm.feature_names == ['4', '5', '6']
        assert dm.feature_types == ['int', 'float', 'int']
        assert dm.num_row() == 2
        assert dm.num_col() == 3
        df = pd.DataFrame({'A': ['X', 'Y', 'Z'], 'B': [1, 2, 3]})
        dummies = pd.get_dummies(df)
        #    B  A_X  A_Y  A_Z
        # 0  1    1    0    0
        # 1  2    0    1    0
        # 2  3    0    0    1
        result, _, _ = xgb.core._maybe_pandas_data(dummies, None, None)
        exp = np.array([[1., 1., 0., 0.],
                        [2., 0., 1., 0.],
                        [3., 0., 0., 1.]])
        np.testing.assert_array_equal(result, exp)
        dm = xgb.DMatrix(dummies)
        assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z']
        assert dm.feature_types == ['int', 'float', 'float', 'float']
        assert dm.num_row() == 3
        assert dm.num_col() == 4
        df = pd.DataFrame({'A=1': [1, 2, 3], 'A=2': [4, 5, 6]})
        dm = xgb.DMatrix(df)
        assert dm.feature_names == ['A=1', 'A=2']
        assert dm.feature_types == ['int', 'int']
        assert dm.num_row() == 3
        assert dm.num_col() == 2
    def test_pandas_label(self):
        import pandas as pd
        # label must be a single column
        df = pd.DataFrame({'A': ['X', 'Y', 'Z'], 'B': [1, 2, 3]})
        self.assertRaises(ValueError, xgb.core._maybe_pandas_label, df)
        # label must be supported dtype
        df = pd.DataFrame({'A': np.array(['a', 'b', 'c'], dtype=object)})
        self.assertRaises(ValueError, xgb.core._maybe_pandas_label, df)
        df = pd.DataFrame({'A': np.array([1, 2, 3], dtype=int)})
        result = xgb.core._maybe_pandas_label(df)
        np.testing.assert_array_equal(result, np.array([[1.], [2.], [3.]], dtype=float))
        dm = xgb.DMatrix(np.random.randn(3, 2), label=df)
        assert dm.num_row() == 3
        assert dm.num_col() == 2
    def test_load_file_invalid(self):
        self.assertRaises(xgb.core.XGBoostError, xgb.Booster,
                          model_file='incorrect_path')
@ -215,168 +131,8 @@ class TestBasic(unittest.TestCase):
        dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
        import pandas as pd
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10)
        assert isinstance(cv, pd.DataFrame)
        exp = pd.Index([u'test-error-mean', u'test-error-std',
                        u'train-error-mean', u'train-error-std'])
        assert cv.columns.equals(exp)
        # show progress log (result is the same as above)
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
                    verbose_eval=True)
        assert isinstance(cv, pd.DataFrame)
        exp = pd.Index([u'test-error-mean', u'test-error-std',
                        u'train-error-mean', u'train-error-std'])
        assert cv.columns.equals(exp)
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
                    verbose_eval=True, show_stdv=False)
        assert isinstance(cv, pd.DataFrame)
        exp = pd.Index([u'test-error-mean', u'test-error-std',
                        u'train-error-mean', u'train-error-std'])
        assert cv.columns.equals(exp)
        # return np.ndarray
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False)
        assert isinstance(cv, np.ndarray)
        assert cv.shape == (10, 4)
        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': 'auc'}
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
        assert 'eval_metric' in params
        assert 'auc' in cv.columns[0]
        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
        assert 'eval_metric' in params
        assert 'auc' in cv.columns[0]
        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, early_stopping_rounds=1)
        assert 'eval_metric' in params
        assert 'auc' in cv.columns[0]
        assert cv.shape[0] < 10
        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics='auc')
        assert 'auc' in cv.columns[0]
        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['auc'])
        assert 'auc' in cv.columns[0]
        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics='error')
        assert 'eval_metric' in params
        assert 'auc' not in cv.columns[0]
        assert 'error' in cv.columns[0]
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['error'])
        assert 'eval_metric' in params
        assert 'auc' not in cv.columns[0]
        assert 'error' in cv.columns[0]
        params = list(params.items())
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['error'])
        assert isinstance(params, list)
        assert 'auc' not in cv.columns[0]
        assert 'error' in cv.columns[0]
    def test_plotting(self):
        bst2 = xgb.Booster(model_file='xgb.model')
        # plotting
        from matplotlib.axes import Axes
        from graphviz import Digraph
        ax = xgb.plot_importance(bst2)
        assert isinstance(ax, Axes)
        assert ax.get_title() == 'Feature importance'
        assert ax.get_xlabel() == 'F score'
        assert ax.get_ylabel() == 'Features'
        assert len(ax.patches) == 4
        ax = xgb.plot_importance(bst2, color='r',
                                 title='t', xlabel='x', ylabel='y')
        assert isinstance(ax, Axes)
        assert ax.get_title() == 't'
        assert ax.get_xlabel() == 'x'
        assert ax.get_ylabel() == 'y'
        assert len(ax.patches) == 4
        for p in ax.patches:
            assert p.get_facecolor() == (1.0, 0, 0, 1.0)  # red
        ax = xgb.plot_importance(bst2, color=['r', 'r', 'b', 'b'],
                                 title=None, xlabel=None, ylabel=None)
        assert isinstance(ax, Axes)
        assert ax.get_title() == ''
        assert ax.get_xlabel() == ''
        assert ax.get_ylabel() == ''
        assert len(ax.patches) == 4
        assert ax.patches[0].get_facecolor() == (1.0, 0, 0, 1.0)  # red
        assert ax.patches[1].get_facecolor() == (1.0, 0, 0, 1.0)  # red
        assert ax.patches[2].get_facecolor() == (0, 0, 1.0, 1.0)  # blue
        assert ax.patches[3].get_facecolor() == (0, 0, 1.0, 1.0)  # blue
        g = xgb.to_graphviz(bst2, num_trees=0)
        assert isinstance(g, Digraph)
        ax = xgb.plot_tree(bst2, num_trees=0)
        assert isinstance(ax, Axes)
    def test_importance_plot_lim(self):
        np.random.seed(1)
        dm = xgb.DMatrix(np.random.randn(100, 100), label=[0, 1] * 50)
        bst = xgb.train({}, dm)
        assert len(bst.get_fscore()) == 71
        ax = xgb.plot_importance(bst)
        assert ax.get_xlim() == (0., 11.)
        assert ax.get_ylim() == (-1., 71.)
        ax = xgb.plot_importance(bst, xlim=(0, 5), ylim=(10, 71))
        assert ax.get_xlim() == (0., 5.)
        assert ax.get_ylim() == (10., 71.)
    def test_sklearn_api(self):
        from sklearn import datasets
        from sklearn.cross_validation import train_test_split
        np.random.seed(1)
        iris = datasets.load_iris()
        tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target, train_size=120)
        classifier = xgb.XGBClassifier()
        classifier.fit(tr_d, tr_l)
        preds = classifier.predict(te_d)
        labels = te_l
        err = sum([1 for p, l in zip(preds, labels) if p != l]) / len(te_l)
        # error must be smaller than 10%
        assert err < 0.1
    def test_sklearn_plotting(self):
        from sklearn import datasets
        iris = datasets.load_iris()
        classifier = xgb.XGBClassifier()
        classifier.fit(iris.data, iris.target)
        import matplotlib
        matplotlib.use('Agg')
        from matplotlib.axes import Axes
        from graphviz import Digraph
        ax = xgb.plot_importance(classifier)
        assert isinstance(ax, Axes)
        assert ax.get_title() == 'Feature importance'
        assert ax.get_xlabel() == 'F score'
        assert ax.get_ylabel() == 'Features'
        assert len(ax.patches) == 4
        g = xgb.to_graphviz(classifier, num_trees=0)
        assert isinstance(g, Digraph)
        ax = xgb.plot_tree(classifier, num_trees=0)
        assert isinstance(ax, Axes)
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
--- a/tests/python/test_cv.py
+++ b/tests/python/test_cv.py
@ -1,37 +0,0 @@
 import xgboost as xgb
 import numpy as np
 from sklearn.datasets import load_digits
 from sklearn.cross_validation import KFold, StratifiedKFold, train_test_split
 from sklearn.metrics import mean_squared_error
 import unittest
 rng = np.random.RandomState(1994)
 class TestCrossValidation(unittest.TestCase):
    def test_cv(self):
        digits = load_digits(3)
        X = digits['data']
        y = digits['target']
        dm = xgb.DMatrix(X, label=y)
        params = {
            'max_depth': 2,
            'eta': 1,
            'silent': 1,
            'objective':
            'multi:softprob',
            'num_class': 3
        }
        seed = 2016
        nfolds = 5
        skf = StratifiedKFold(y, n_folds=nfolds, shuffle=True, random_state=seed)
        import pandas as pd
        cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed)
        cv2 = xgb.cv(params, dm, num_boost_round=10, folds=skf, seed=seed)
        cv3 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, stratified=True, seed=seed)
        assert cv1.shape[0] == cv2.shape[0] and cv2.shape[0] == cv3.shape[0]
        assert cv2.iloc[-1,0] == cv3.iloc[-1,0]
--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@ -29,9 +29,6 @@ class TestEarlyStopping(unittest.TestCase):
                 eval_set=[(X_test, y_test)])
        assert clf3.best_score == 1
        # TODO: parallel test for early stopping
        # TODO: comment out for now. Will re-visit later
    def evalerror(self, preds, dtrain):
        labels = dtrain.get_label()
        return 'rmse', mean_squared_error(labels, preds)
--- a/tests/python/test_plotting.py
+++ b/tests/python/test_plotting.py
@ -0,0 +1,65 @@
 # -*- coding: utf-8 -*-
 import numpy as np
 import xgboost as xgb
 import unittest
 import matplotlib
 from matplotlib.axes import Axes
 from graphviz import Digraph
 matplotlib.use('Agg')
 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)
 class TestPlotting(unittest.TestCase):
    def test_plotting(self):
        bst2 = xgb.Booster(model_file='xgb.model')
        ax = xgb.plot_importance(bst2)
        assert isinstance(ax, Axes)
        assert ax.get_title() == 'Feature importance'
        assert ax.get_xlabel() == 'F score'
        assert ax.get_ylabel() == 'Features'
        assert len(ax.patches) == 4
        ax = xgb.plot_importance(bst2, color='r',
                                 title='t', xlabel='x', ylabel='y')
        assert isinstance(ax, Axes)
        assert ax.get_title() == 't'
        assert ax.get_xlabel() == 'x'
        assert ax.get_ylabel() == 'y'
        assert len(ax.patches) == 4
        for p in ax.patches:
            assert p.get_facecolor() == (1.0, 0, 0, 1.0)  # red
        ax = xgb.plot_importance(bst2, color=['r', 'r', 'b', 'b'],
                                 title=None, xlabel=None, ylabel=None)
        assert isinstance(ax, Axes)
        assert ax.get_title() == ''
        assert ax.get_xlabel() == ''
        assert ax.get_ylabel() == ''
        assert len(ax.patches) == 4
        assert ax.patches[0].get_facecolor() == (1.0, 0, 0, 1.0)  # red
        assert ax.patches[1].get_facecolor() == (1.0, 0, 0, 1.0)  # red
        assert ax.patches[2].get_facecolor() == (0, 0, 1.0, 1.0)  # blue
        assert ax.patches[3].get_facecolor() == (0, 0, 1.0, 1.0)  # blue
        g = xgb.to_graphviz(bst2, num_trees=0)
        assert isinstance(g, Digraph)
        ax = xgb.plot_tree(bst2, num_trees=0)
        assert isinstance(ax, Axes)
    def test_importance_plot_lim(self):
        np.random.seed(1)
        dm = xgb.DMatrix(np.random.randn(100, 100), label=[0, 1] * 50)
        bst = xgb.train({}, dm)
        assert len(bst.get_fscore()) == 71
        ax = xgb.plot_importance(bst)
        assert ax.get_xlim() == (0., 11.)
        assert ax.get_ylim() == (-1., 71.)
        ax = xgb.plot_importance(bst, xlim=(0, 5), ylim=(10, 71))
        assert ax.get_xlim() == (0., 5.)
        assert ax.get_ylim() == (10., 71.)
--- a/tests/python/test_with_pandas.py
+++ b/tests/python/test_with_pandas.py
@ -0,0 +1,153 @@
 # -*- coding: utf-8 -*-
 import numpy as np
 import xgboost as xgb
 import unittest
 import pandas as pd
 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)
 class TestPandas(unittest.TestCase):
    def test_pandas(self):
        df = pd.DataFrame([[1, 2., True], [2, 3., False]], columns=['a', 'b', 'c'])
        dm = xgb.DMatrix(df, label=pd.Series([1, 2]))
        assert dm.feature_names == ['a', 'b', 'c']
        assert dm.feature_types == ['int', 'float', 'i']
        assert dm.num_row() == 2
        assert dm.num_col() == 3
        # overwrite feature_names and feature_types
        dm = xgb.DMatrix(df, label=pd.Series([1, 2]),
                         feature_names=['x', 'y', 'z'], feature_types=['q', 'q', 'q'])
        assert dm.feature_names == ['x', 'y', 'z']
        assert dm.feature_types == ['q', 'q', 'q']
        assert dm.num_row() == 2
        assert dm.num_col() == 3
        # incorrect dtypes
        df = pd.DataFrame([[1, 2., 'x'], [2, 3., 'y']], columns=['a', 'b', 'c'])
        self.assertRaises(ValueError, xgb.DMatrix, df)
        # numeric columns
        df = pd.DataFrame([[1, 2., True], [2, 3., False]])
        dm = xgb.DMatrix(df, label=pd.Series([1, 2]))
        assert dm.feature_names == ['0', '1', '2']
        assert dm.feature_types == ['int', 'float', 'i']
        assert dm.num_row() == 2
        assert dm.num_col() == 3
        df = pd.DataFrame([[1, 2., 1], [2, 3., 1]], columns=[4, 5, 6])
        dm = xgb.DMatrix(df, label=pd.Series([1, 2]))
        assert dm.feature_names == ['4', '5', '6']
        assert dm.feature_types == ['int', 'float', 'int']
        assert dm.num_row() == 2
        assert dm.num_col() == 3
        df = pd.DataFrame({'A': ['X', 'Y', 'Z'], 'B': [1, 2, 3]})
        dummies = pd.get_dummies(df)
        #    B  A_X  A_Y  A_Z
        # 0  1    1    0    0
        # 1  2    0    1    0
        # 2  3    0    0    1
        result, _, _ = xgb.core._maybe_pandas_data(dummies, None, None)
        exp = np.array([[1., 1., 0., 0.],
                        [2., 0., 1., 0.],
                        [3., 0., 0., 1.]])
        np.testing.assert_array_equal(result, exp)
        dm = xgb.DMatrix(dummies)
        assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z']
        assert dm.feature_types == ['int', 'float', 'float', 'float']
        assert dm.num_row() == 3
        assert dm.num_col() == 4
        df = pd.DataFrame({'A=1': [1, 2, 3], 'A=2': [4, 5, 6]})
        dm = xgb.DMatrix(df)
        assert dm.feature_names == ['A=1', 'A=2']
        assert dm.feature_types == ['int', 'int']
        assert dm.num_row() == 3
        assert dm.num_col() == 2
    def test_pandas_label(self):
        # label must be a single column
        df = pd.DataFrame({'A': ['X', 'Y', 'Z'], 'B': [1, 2, 3]})
        self.assertRaises(ValueError, xgb.core._maybe_pandas_label, df)
        # label must be supported dtype
        df = pd.DataFrame({'A': np.array(['a', 'b', 'c'], dtype=object)})
        self.assertRaises(ValueError, xgb.core._maybe_pandas_label, df)
        df = pd.DataFrame({'A': np.array([1, 2, 3], dtype=int)})
        result = xgb.core._maybe_pandas_label(df)
        np.testing.assert_array_equal(result, np.array([[1.], [2.], [3.]], dtype=float))
        dm = xgb.DMatrix(np.random.randn(3, 2), label=df)
        assert dm.num_row() == 3
        assert dm.num_col() == 2
    def test_cv_as_pandas(self):
        dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
        import pandas as pd
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10)
        assert isinstance(cv, pd.DataFrame)
        exp = pd.Index([u'test-error-mean', u'test-error-std',
                        u'train-error-mean', u'train-error-std'])
        assert cv.columns.equals(exp)
        # show progress log (result is the same as above)
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
                    verbose_eval=True)
        assert isinstance(cv, pd.DataFrame)
        exp = pd.Index([u'test-error-mean', u'test-error-std',
                        u'train-error-mean', u'train-error-std'])
        assert cv.columns.equals(exp)
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
                    verbose_eval=True, show_stdv=False)
        assert isinstance(cv, pd.DataFrame)
        exp = pd.Index([u'test-error-mean', u'test-error-std',
                        u'train-error-mean', u'train-error-std'])
        assert cv.columns.equals(exp)
        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': 'auc'}
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
        assert 'eval_metric' in params
        assert 'auc' in cv.columns[0]
        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
        assert 'eval_metric' in params
        assert 'auc' in cv.columns[0]
        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, early_stopping_rounds=1)
        assert 'eval_metric' in params
        assert 'auc' in cv.columns[0]
        assert cv.shape[0] < 10
        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics='auc')
        assert 'auc' in cv.columns[0]
        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['auc'])
        assert 'auc' in cv.columns[0]
        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics='error')
        assert 'eval_metric' in params
        assert 'auc' not in cv.columns[0]
        assert 'error' in cv.columns[0]
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['error'])
        assert 'eval_metric' in params
        assert 'auc' not in cv.columns[0]
        assert 'error' in cv.columns[0]
        params = list(params.items())
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['error'])
        assert isinstance(params, list)
        assert 'auc' not in cv.columns[0]
        assert 'error' in cv.columns[0]
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@ -4,6 +4,7 @@ from sklearn.cross_validation import KFold
 from sklearn.metrics import mean_squared_error
 from sklearn.grid_search import GridSearchCV
 from sklearn.datasets import load_iris, load_digits, load_boston
 from sklearn.cross_validation import KFold, StratifiedKFold, train_test_split
 rng = np.random.RandomState(1994)
@ -130,3 +131,65 @@ def test_classification_with_custom_objective():
        X, y
    )
 def test_sklearn_api():
    iris = load_iris()
    tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target, train_size=120)
    classifier = xgb.XGBClassifier()
    classifier.fit(tr_d, tr_l)
    preds = classifier.predict(te_d)
    labels = te_l
    err = sum([1 for p, l in zip(preds, labels) if p != l]) / len(te_l)
    assert err < 0.2
 def test_sklearn_plotting():
    iris = load_iris()
    classifier = xgb.XGBClassifier()
    classifier.fit(iris.data, iris.target)
    import matplotlib
    matplotlib.use('Agg')
    from matplotlib.axes import Axes
    from graphviz import Digraph
    ax = xgb.plot_importance(classifier)
    assert isinstance(ax, Axes)
    assert ax.get_title() == 'Feature importance'
    assert ax.get_xlabel() == 'F score'
    assert ax.get_ylabel() == 'Features'
    assert len(ax.patches) == 4
    g = xgb.to_graphviz(classifier, num_trees=0)
    assert isinstance(g, Digraph)
    ax = xgb.plot_tree(classifier, num_trees=0)
    assert isinstance(ax, Axes)
 def test_sklearn_nfolds_cv():
    digits = load_digits(3)
    X = digits['data']
    y = digits['target']
    dm = xgb.DMatrix(X, label=y)
    params = {
        'max_depth': 2,
        'eta': 1,
        'silent': 1,
        'objective':
        'multi:softprob',
        'num_class': 3
    }
    seed = 2016
    nfolds = 5
    skf = StratifiedKFold(y, n_folds=nfolds, shuffle=True, random_state=seed)
    import pandas as pd
    cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed)
    cv2 = xgb.cv(params, dm, num_boost_round=10, folds=skf, seed=seed)
    cv3 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, stratified=True, seed=seed)
    assert cv1.shape[0] == cv2.shape[0] and cv2.shape[0] == cv3.shape[0]
    assert cv2.iloc[-1,0] == cv3.iloc[-1,0]
--- a/tests/travis/run_test.sh
+++ b/tests/travis/run_test.sh
@ -38,6 +38,23 @@ if [ ${TASK} == "python_test" ]; then
    exit 0
 fi
 if [ ${TASK} == "python_lightweight_test" ]; then
    make all || exit -1
    echo "-------------------------------"
    source activate python3
    python --version
    conda install numpy scipy nose
    python -m pip install graphviz
    python -m nose tests/python/test_basic*.py || exit -1
    source activate python2
    echo "-------------------------------"
    python --version
    conda install numpy scipy nose
    python -m pip install graphviz
    python -m nose tests/python/test_basic*.py || exit -1
    exit 0
 fi
 if [ ${TASK} == "r_test" ]; then
    set -e
    export _R_CHECK_TIMINGS_=0
--- a/tests/travis/setup.sh
+++ b/tests/travis/setup.sh
@ -10,7 +10,7 @@ if [ ${TASK} == "lint" ]; then
 fi
-if [ ${TASK} == "python_test" ]; then
+if [ ${TASK} == "python_test" ] || [ ${TASK} == "python_lightweight_test" ]; then
    # python2
    if [ ${TRAVIS_OS_NAME} == "osx" ]; then
        wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh