Enable flake8

2016-04-24 16:34:46 +09:00
parent b3c9e6a0db
commit 8fc2456c87
19 changed files with 282 additions and 199 deletions
--- a/tests/python/test_basic.py
+++ b/tests/python/test_basic.py
@@ -8,6 +8,7 @@ rng = np.random.RandomState(1994)


 class TestBasic(unittest.TestCase):
+
    def test_basic(self):
        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
@@ -37,7 +38,7 @@ class TestBasic(unittest.TestCase):
    def test_multiclass(self):
        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
-        param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'num_class' : 2}
+        param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'num_class': 2}
        # specify validations set to watch performance
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 2
@@ -60,7 +61,6 @@ class TestBasic(unittest.TestCase):
        # assert they are the same
        assert np.sum(np.abs(preds2 - preds)) == 0

-
    def test_dmatrix_init(self):
        data = np.random.randn(5, 5)

--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@@ -8,82 +8,94 @@ dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')

 rng = np.random.RandomState(1994)

+
 class TestModels(unittest.TestCase):

-	def test_glm(self):
-		param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', 'alpha': 0.0001, 'lambda': 1 }
-		watchlist  = [(dtest,'eval'), (dtrain,'train')]
-		num_round = 4
-		bst = xgb.train(param, dtrain, num_round, watchlist)
-		assert isinstance(bst, xgb.core.Booster)
-		preds = bst.predict(dtest)
-		labels = dtest.get_label()
-		err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
-		assert err < 0.1
+    def test_glm(self):
+        param = {'silent': 1, 'objective': 'binary:logistic',
+                 'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1}
+        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
+        num_round = 4
+        bst = xgb.train(param, dtrain, num_round, watchlist)
+        assert isinstance(bst, xgb.core.Booster)
+        preds = bst.predict(dtest)
+        labels = dtest.get_label()
+        err = sum(1 for i in range(len(preds))
+                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
+        assert err < 0.1

-	def test_eta_decay(self):
-		param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
-		watchlist  = [(dtest,'eval'), (dtrain,'train')]
-		num_round = 2
-		# learning_rates as a list
-		bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.4, 0.3])
-		assert isinstance(bst, xgb.core.Booster)
+    def test_eta_decay(self):
+        param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
+        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
+        num_round = 2
+        # learning_rates as a list
+        bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.4, 0.3])
+        assert isinstance(bst, xgb.core.Booster)

-		# learning_rates as a customized decay function
-		def eta_decay(ithround, num_boost_round):
-			return num_boost_round / (ithround + 1)
-		bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=eta_decay)
-		assert isinstance(bst, xgb.core.Booster)
+        # learning_rates as a customized decay function
+        def eta_decay(ithround, num_boost_round):
+            return num_boost_round / (ithround + 1)

+        bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=eta_decay)
+        assert isinstance(bst, xgb.core.Booster)

-	def test_custom_objective(self):
-		param = {'max_depth':2, 'eta':1, 'silent':1 }
-		watchlist  = [(dtest,'eval'), (dtrain,'train')]
-		num_round = 2
-		def logregobj(preds, dtrain):
-			labels = dtrain.get_label()
-			preds = 1.0 / (1.0 + np.exp(-preds))
-			grad = preds - labels
-			hess = preds * (1.0-preds)
-			return grad, hess
-		def evalerror(preds, dtrain):
-			labels = dtrain.get_label()
-			return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
+    def test_custom_objective(self):
+        param = {'max_depth': 2, 'eta': 1, 'silent': 1}
+        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
+        num_round = 2

-		# test custom_objective in training
-		bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
-		assert isinstance(bst, xgb.core.Booster)
-		preds = bst.predict(dtest)
-		labels = dtest.get_label()
-		err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
-		assert err < 0.1
+        def logregobj(preds, dtrain):
+            labels = dtrain.get_label()
+            preds = 1.0 / (1.0 + np.exp(-preds))
+            grad = preds - labels
+            hess = preds * (1.0 - preds)
+            return grad, hess

-		# test custom_objective in cross-validation
-		xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
-	       obj = logregobj, feval=evalerror)
+        def evalerror(preds, dtrain):
+            labels = dtrain.get_label()
+            return 'error', float(sum(labels != (preds > 0.0))) / len(labels)

-		# test maximize parameter
-		def neg_evalerror(preds, dtrain):
-			labels = dtrain.get_label()
-			return 'error', float(sum(labels == (preds > 0.0))) / len(labels)
-		bst2 = xgb.train(param, dtrain, num_round, watchlist, logregobj, neg_evalerror, maximize=True)
-		preds2 = bst2.predict(dtest)
-		err2 = sum(1 for i in range(len(preds2)) if int(preds2[i]>0.5)!=labels[i]) / float(len(preds2))
-		assert err == err2
+        # test custom_objective in training
+        bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
+        assert isinstance(bst, xgb.core.Booster)
+        preds = bst.predict(dtest)
+        labels = dtest.get_label()
+        err = sum(1 for i in range(len(preds))
+                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
+        assert err < 0.1

-	def test_fpreproc(self):
-		param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
-		num_round = 2
-		def fpreproc(dtrain, dtest, param):
-			label = dtrain.get_label()
-			ratio = float(np.sum(label == 0)) / np.sum(label==1)
-			param['scale_pos_weight'] = ratio
-			return (dtrain, dtest, param)
-		xgb.cv(param, dtrain, num_round, nfold=5,
-	       metrics={'auc'}, seed = 0, fpreproc = fpreproc)
+        # test custom_objective in cross-validation
+        xgb.cv(param, dtrain, num_round, nfold=5, seed=0,
+               obj=logregobj, feval=evalerror)

-	def test_show_stdv(self):
-		param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
-		num_round = 2
-		xgb.cv(param, dtrain, num_round, nfold=5,
-	       metrics={'error'}, seed = 0, show_stdv = False)
+        # test maximize parameter
+        def neg_evalerror(preds, dtrain):
+            labels = dtrain.get_label()
+            return 'error', float(sum(labels == (preds > 0.0))) / len(labels)
+
+        bst2 = xgb.train(param, dtrain, num_round, watchlist, logregobj, neg_evalerror, maximize=True)
+        preds2 = bst2.predict(dtest)
+        err2 = sum(1 for i in range(len(preds2))
+                   if int(preds2[i] > 0.5) != labels[i]) / float(len(preds2))
+        assert err == err2
+
+    def test_fpreproc(self):
+        param = {'max_depth': 2, 'eta': 1, 'silent': 1,
+                 'objective': 'binary:logistic'}
+        num_round = 2
+
+        def fpreproc(dtrain, dtest, param):
+            label = dtrain.get_label()
+            ratio = float(np.sum(label == 0)) / np.sum(label == 1)
+            param['scale_pos_weight'] = ratio
+            return (dtrain, dtest, param)
+
+        xgb.cv(param, dtrain, num_round, nfold=5,
+               metrics={'auc'}, seed=0, fpreproc=fpreproc)
+
+    def test_show_stdv(self):
+        param = {'max_depth': 2, 'eta': 1, 'silent': 1,
+                 'objective': 'binary:logistic'}
+        num_round = 2
+        xgb.cv(param, dtrain, num_round, nfold=5,
+               metrics={'error'}, seed=0, show_stdv=False)
--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@@ -1,7 +1,7 @@
 import xgboost as xgb
 import numpy as np
 from sklearn.datasets import load_digits
-from sklearn.cross_validation import KFold, train_test_split
+from sklearn.cross_validation import train_test_split
 from sklearn.metrics import mean_squared_error
 import unittest

@@ -40,7 +40,6 @@ class TestEarlyStopping(unittest.TestCase):
        dm = xgb.DMatrix(X, label=y)
        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}

-        import pandas as pd
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=10)
        assert cv.shape[0] == 10
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=5)
--- a/tests/python/test_eval_metrics.py
+++ b/tests/python/test_eval_metrics.py
@@ -1,9 +1,8 @@
 import xgboost as xgb
 import numpy as np
-from sklearn.cross_validation import KFold, train_test_split
+from sklearn.cross_validation import train_test_split
 from sklearn.metrics import mean_squared_error
-from sklearn.grid_search import GridSearchCV
-from sklearn.datasets import load_iris, load_digits, load_boston
+from sklearn.datasets import load_digits
 import unittest

 rng = np.random.RandomState(1337)
--- a/tests/python/test_plotting.py
+++ b/tests/python/test_plotting.py
@@ -12,6 +12,7 @@ matplotlib.use('Agg')
 dpath = 'demo/data/'
 rng = np.random.RandomState(1994)

+
 class TestPlotting(unittest.TestCase):
    def test_plotting(self):
        bst2 = xgb.Booster(model_file='xgb.model')
--- a/tests/python/test_training_continuation.py
+++ b/tests/python/test_training_continuation.py
@@ -1,10 +1,7 @@
 import xgboost as xgb
 import numpy as np
-from sklearn.preprocessing import MultiLabelBinarizer
-from sklearn.cross_validation import KFold, train_test_split
 from sklearn.metrics import mean_squared_error
-from sklearn.grid_search import GridSearchCV
-from sklearn.datasets import load_iris, load_digits, load_boston
+from sklearn.datasets import load_digits
 import unittest

 rng = np.random.RandomState(1337)
@@ -57,10 +54,14 @@ class TestTrainingContinuation(unittest.TestCase):
        ntrees_02b = len(gbdt_02b.get_dump())
        assert ntrees_02a == 10
        assert ntrees_02b == 10
-        assert mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class)) == \
-               mean_squared_error(y_2class, gbdt_02a.predict(dtrain_2class))
-        assert mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class)) == \
-               mean_squared_error(y_2class, gbdt_02b.predict(dtrain_2class))
+
+        res1 = mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class))
+        res2 = mean_squared_error(y_2class, gbdt_02a.predict(dtrain_2class))
+        assert res1 == res2
+
+        res1 = mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class))
+        res2 = mean_squared_error(y_2class, gbdt_02b.predict(dtrain_2class))
+        assert res1 == res2

        gbdt_03 = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=3)
        gbdt_03.save_model('xgb_tc.model')
@@ -71,22 +72,30 @@ class TestTrainingContinuation(unittest.TestCase):
        ntrees_03b = len(gbdt_03b.get_dump())
        assert ntrees_03a == 10
        assert ntrees_03b == 10
-        assert mean_squared_error(y_2class, gbdt_03a.predict(dtrain_2class)) == \
-               mean_squared_error(y_2class, gbdt_03b.predict(dtrain_2class))
+
+        res1 = mean_squared_error(y_2class, gbdt_03a.predict(dtrain_2class))
+        res2 = mean_squared_error(y_2class, gbdt_03b.predict(dtrain_2class))
+        assert res1 == res2

        gbdt_04 = xgb.train(self.xgb_params_02, dtrain_2class, num_boost_round=3)
        assert gbdt_04.best_ntree_limit == (gbdt_04.best_iteration + 1) * self.num_parallel_tree
-        assert mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class)) == \
-               mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))
+
+        res1 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class))
+        res2 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))
+        assert res1 == res2

        gbdt_04 = xgb.train(self.xgb_params_02, dtrain_2class, num_boost_round=7, xgb_model=gbdt_04)
        assert gbdt_04.best_ntree_limit == (gbdt_04.best_iteration + 1) * self.num_parallel_tree
-        assert mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class)) == \
-               mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))
+
+        res1 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class))
+        res2 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))
+        assert res1 == res2

        gbdt_05 = xgb.train(self.xgb_params_03, dtrain_5class, num_boost_round=7)
        assert gbdt_05.best_ntree_limit == (gbdt_05.best_iteration + 1) * self.num_parallel_tree
        gbdt_05 = xgb.train(self.xgb_params_03, dtrain_5class, num_boost_round=3, xgb_model=gbdt_05)
        assert gbdt_05.best_ntree_limit == (gbdt_05.best_iteration + 1) * self.num_parallel_tree
-        assert np.any(gbdt_05.predict(dtrain_5class) !=
-                      gbdt_05.predict(dtrain_5class, ntree_limit=gbdt_05.best_ntree_limit)) == False
+
+        res1 = gbdt_05.predict(dtrain_5class)
+        res2 = gbdt_05.predict(dtrain_5class, ntree_limit=gbdt_05.best_ntree_limit)
+        np.testing.assert_almost_equal(res1, res2)
--- a/tests/python/test_with_pandas.py
+++ b/tests/python/test_with_pandas.py
@@ -111,43 +111,55 @@ class TestPandas(unittest.TestCase):
                        u'train-error-mean', u'train-error-std'])
        assert cv.columns.equals(exp)

-        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': 'auc'}
+        params = {'max_depth': 2, 'eta': 1, 'silent': 1,
+                  'objective': 'binary:logistic', 'eval_metric': 'auc'}
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
        assert 'eval_metric' in params
        assert 'auc' in cv.columns[0]

-        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
+        params = {'max_depth': 2, 'eta': 1, 'silent': 1,
+                  'objective': 'binary:logistic', 'eval_metric': ['auc']}
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
        assert 'eval_metric' in params
        assert 'auc' in cv.columns[0]

-        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
-        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, early_stopping_rounds=1)
+        params = {'max_depth': 2, 'eta': 1, 'silent': 1,
+                  'objective': 'binary:logistic', 'eval_metric': ['auc']}
+        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
+                    as_pandas=True, early_stopping_rounds=1)
        assert 'eval_metric' in params
        assert 'auc' in cv.columns[0]
        assert cv.shape[0] < 10

-        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
-        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics='auc')
+        params = {'max_depth': 2, 'eta': 1, 'silent': 1,
+                  'objective': 'binary:logistic'}
+        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
+                    as_pandas=True, metrics='auc')
        assert 'auc' in cv.columns[0]

-        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
-        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['auc'])
+        params = {'max_depth': 2, 'eta': 1, 'silent': 1,
+                  'objective': 'binary:logistic'}
+        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
+                    as_pandas=True, metrics=['auc'])
        assert 'auc' in cv.columns[0]

-        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
-        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics='error')
+        params = {'max_depth': 2, 'eta': 1, 'silent': 1,
+                  'objective': 'binary:logistic', 'eval_metric': ['auc']}
+        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
+                    as_pandas=True, metrics='error')
        assert 'eval_metric' in params
        assert 'auc' not in cv.columns[0]
        assert 'error' in cv.columns[0]

-        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['error'])
+        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
+                    as_pandas=True, metrics=['error'])
        assert 'eval_metric' in params
        assert 'auc' not in cv.columns[0]
        assert 'error' in cv.columns[0]

        params = list(params.items())
-        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['error'])
+        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
+                    as_pandas=True, metrics=['error'])
        assert isinstance(params, list)
        assert 'auc' not in cv.columns[0]
-        assert 'error' in cv.columns[0]
+        assert 'error' in cv.columns[0]
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -1,6 +1,5 @@
 import xgboost as xgb
 import numpy as np
-from sklearn.cross_validation import KFold
 from sklearn.metrics import mean_squared_error
 from sklearn.grid_search import GridSearchCV
 from sklearn.datasets import load_iris, load_digits, load_boston
@@ -8,33 +7,46 @@ from sklearn.cross_validation import KFold, StratifiedKFold, train_test_split

 rng = np.random.RandomState(1994)

+
 def test_binary_classification():
    digits = load_digits(2)
    y = digits['target']
    X = digits['data']
    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
    for train_index, test_index in kf:
-        xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
+        xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
        preds = xgb_model.predict(X[test_index])
        labels = y[test_index]
-        err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
-    assert err < 0.1
+        err = sum(1 for i in range(len(preds))
+                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
+        assert err < 0.1
+

 def test_multiclass_classification():
+
+    def check_pred(preds, labels):
+        err = sum(1 for i in range(len(preds))
+                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
+        assert err < 0.4
+
    iris = load_iris()
    y = iris['target']
    X = iris['data']
    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
    for train_index, test_index in kf:
-        xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
+        xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
        preds = xgb_model.predict(X[test_index])
        # test other params in XGBClassifier().fit
        preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
        preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
        preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
        labels = y[test_index]
-        err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
-    assert err < 0.4
+
+        check_pred(preds, labels)
+        check_pred(preds2, labels)
+        check_pred(preds3, labels)
+        check_pred(preds4, labels)
+

 def test_boston_housing_regression():
    boston = load_boston()
@@ -42,27 +54,33 @@ def test_boston_housing_regression():
    X = boston['data']
    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
    for train_index, test_index in kf:
-        xgb_model = xgb.XGBRegressor().fit(X[train_index],y[train_index])
+        xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])
+
        preds = xgb_model.predict(X[test_index])
        # test other params in XGBRegressor().fit
        preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
        preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
        preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
        labels = y[test_index]
-    assert mean_squared_error(preds, labels) < 25
+
+        assert mean_squared_error(preds, labels) < 25
+        assert mean_squared_error(preds2, labels) < 350
+        assert mean_squared_error(preds3, labels) < 25
+        assert mean_squared_error(preds4, labels) < 350
+

 def test_parameter_tuning():
    boston = load_boston()
    y = boston['target']
    X = boston['data']
    xgb_model = xgb.XGBRegressor()
-    clf = GridSearchCV(xgb_model,
-                       {'max_depth': [2,4,6],
-                        'n_estimators': [50,100,200]}, verbose=1)
-    clf.fit(X,y)
+    clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
+                                   'n_estimators': [50, 100, 200]}, verbose=1)
+    clf.fit(X, y)
    assert clf.best_score_ < 0.7
    assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4}

+
 def test_regression_with_custom_objective():
    def objective_ls(y_true, y_pred):
        grad = (y_pred - y_true)
@@ -86,20 +104,17 @@ def test_regression_with_custom_objective():
        pass

    def dummy_objective(y_true, y_pred):
-        raise  XGBCustomObjectiveException()
+        raise XGBCustomObjectiveException()

    xgb_model = xgb.XGBRegressor(objective=dummy_objective)
-    np.testing.assert_raises(
-        XGBCustomObjectiveException,
-        xgb_model.fit,
-        X, y
-    )
+    np.testing.assert_raises(XGBCustomObjectiveException, xgb_model.fit, X, y)
+

 def test_classification_with_custom_objective():
    def logregobj(y_true, y_pred):
        y_pred = 1.0 / (1.0 + np.exp(-y_pred))
        grad = y_pred - y_true
-        hess = y_pred * (1.0-y_pred)
+        hess = y_pred * (1.0 - y_pred)
        return grad, hess

    digits = load_digits(2)
@@ -107,22 +122,20 @@ def test_classification_with_custom_objective():
    X = digits['data']
    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
    for train_index, test_index in kf:
-        xgb_model = xgb.XGBClassifier(objective=logregobj).fit(
-            X[train_index],y[train_index]
-        )
+        xgb_model = xgb.XGBClassifier(objective=logregobj)
+        xgb_model.fit(X[train_index], y[train_index])
        preds = xgb_model.predict(X[test_index])
        labels = y[test_index]
        err = sum(1 for i in range(len(preds))
-                  if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
-    assert err < 0.1
-
+                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
+        assert err < 0.1

    # Test that the custom objective function is actually used
    class XGBCustomObjectiveException(Exception):
        pass

    def dummy_objective(y_true, y_preds):
-        raise  XGBCustomObjectiveException()
+        raise XGBCustomObjectiveException()

    xgb_model = xgb.XGBClassifier(objective=dummy_objective)
    np.testing.assert_raises(
@@ -131,6 +144,7 @@ def test_classification_with_custom_objective():
        X, y
    )

+
 def test_sklearn_api():
    iris = load_iris()
    tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target, train_size=120)
@@ -143,6 +157,7 @@ def test_sklearn_api():
    err = sum([1 for p, l in zip(preds, labels) if p != l]) / len(te_l)
    assert err < 0.2

+
 def test_sklearn_plotting():
    iris = load_iris()

@@ -168,12 +183,13 @@ def test_sklearn_plotting():
    ax = xgb.plot_tree(classifier, num_trees=0)
    assert isinstance(ax, Axes)

+
 def test_sklearn_nfolds_cv():
    digits = load_digits(3)
    X = digits['data']
    y = digits['target']
    dm = xgb.DMatrix(X, label=y)
-    
+
    params = {
        'max_depth': 2,
        'eta': 1,
@@ -187,9 +203,8 @@ def test_sklearn_nfolds_cv():
    nfolds = 5
    skf = StratifiedKFold(y, n_folds=nfolds, shuffle=True, random_state=seed)

-    import pandas as pd
    cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed)
    cv2 = xgb.cv(params, dm, num_boost_round=10, folds=skf, seed=seed)
    cv3 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, stratified=True, seed=seed)
    assert cv1.shape[0] == cv2.shape[0] and cv2.shape[0] == cv3.shape[0]
-    assert cv2.iloc[-1,0] == cv3.iloc[-1,0]
+    assert cv2.iloc[-1, 0] == cv3.iloc[-1, 0]