From 2ea0f887c182f37b0dc88ff8daca6663416b76be Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <ybbs.daans@hotmail.com>
Date: Thu, 15 Nov 2018 13:56:33 +1300
Subject: [PATCH] Refactor Python tests. (#3897)

* Deprecate nose tests.
* Format python tests.
---
 Jenkinsfile                                   |   4 +-
 appveyor.yml                                  |   4 +-
 tests/ci_build/Dockerfile.gpu                 |   2 +-
 tests/ci_build/Dockerfile.release             |   4 +-
 tests/ci_build/test_gpu.sh                    |   3 +-
 tests/ci_build/test_mgpu.sh                   |   2 +-
 tests/python-gpu/test_gpu_linear.py           |   7 +-
 tests/python-gpu/test_gpu_prediction.py       |  35 +++--
 tests/python-gpu/test_gpu_updaters.py         |   7 +-
 tests/python-gpu/test_large_sizes.py          |  19 +--
 .../python-gpu/test_monotonic_constraints.py  |  10 +-
 tests/python/test_basic.py                    |  36 +++--
 tests/python/test_dt.py                       |  12 +-
 tests/python/test_early_stopping.py           |  43 +++---
 tests/python/test_eval_metrics.py             |   9 +-
 tests/python/test_linear.py                   |  38 +++--
 tests/python/test_plotting.py                 |   4 +-
 tests/python/test_training_continuation.py    |  63 +++++---
 tests/python/test_updaters.py                 |  20 ++-
 tests/python/test_with_pandas.py              |  19 ++-
 tests/python/test_with_sklearn.py             | 139 +++++++++---------
 tests/python/testing.py                       |  29 ++--
 tests/travis/run_test.sh                      |  18 +--
 23 files changed, 302 insertions(+), 225 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 6ef117581..8cd876534 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -96,11 +96,11 @@ def buildPlatformCmake(buildName, conf, nodeReq, dockerTarget) {
                 # Test the wheel for compatibility on a barebones CPU container
                 ${dockerRun} release ${dockerArgs} bash -c " \
                     pip install --user python-package/dist/xgboost-*-none-any.whl && \
-                    python -m nose -v tests/python"
+		    pytest -v --fulltrace -s tests/python"
                 # Test the wheel for compatibility on CUDA 10.0 container
                 ${dockerRun} gpu --build-arg CUDA_VERSION=10.0 bash -c " \
                     pip install --user python-package/dist/xgboost-*-none-any.whl && \
-                    python -m nose -v --eval-attr='(not slow) and (not mgpu)' tests/python-gpu"
+		    pytest -v -s --fulltrace -m '(not mgpu) and (not slow)' tests/python-gpu"
                 """
             }
         }
diff --git a/appveyor.yml b/appveyor.yml
index f44118537..82c036593 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -44,7 +44,7 @@ install:
     - set DO_PYTHON=off
     - if /i "%target%" == "mingw" set DO_PYTHON=on
     - if /i "%target%_%ver%_%configuration%" == "msvc_2015_Release" set DO_PYTHON=on
-    - if /i "%DO_PYTHON%" == "on" conda install -y numpy scipy pandas matplotlib nose scikit-learn graphviz python-graphviz
+    - if /i "%DO_PYTHON%" == "on" conda install -y numpy scipy pandas matplotlib pytest scikit-learn graphviz python-graphviz
     # R: based on https://github.com/krlmlr/r-appveyor
     - ps: |
         if($env:target -eq 'rmingw' -or $env:target -eq 'rmsvc') {
@@ -96,7 +96,7 @@ build_script:
 
 test_script:
     - cd %APPVEYOR_BUILD_FOLDER%
-    - if /i "%DO_PYTHON%" == "on" python -m nose tests/python
+    - if /i "%DO_PYTHON%" == "on" python -m pytest tests/python
     # mingw R package: run the R check (which includes unit tests), and also keep the built binary package
     - if /i "%target%" == "rmingw" (
         set _R_CHECK_CRAN_INCOMING_=FALSE&&
diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu
index c64acba0e..c9e576a0e 100644
--- a/tests/ci_build/Dockerfile.gpu
+++ b/tests/ci_build/Dockerfile.gpu
@@ -37,7 +37,7 @@ ENV CPP=/opt/rh/devtoolset-2/root/usr/bin/cpp
 
 # Install Python packages
 RUN \
-    pip install numpy nose scipy scikit-learn wheel
+    pip install numpy pytest scipy scikit-learn wheel
 
 ENV GOSU_VERSION 1.10
 
diff --git a/tests/ci_build/Dockerfile.release b/tests/ci_build/Dockerfile.release
index 1d8228330..baf4d3d94 100644
--- a/tests/ci_build/Dockerfile.release
+++ b/tests/ci_build/Dockerfile.release
@@ -15,8 +15,8 @@ ENV PATH=/opt/python/bin:$PATH
 
 # Install Python packages
 RUN \
-    conda install numpy scipy pandas matplotlib nose scikit-learn && \
-    pip install nose wheel auditwheel graphviz
+    conda install numpy scipy pandas matplotlib pytest scikit-learn && \
+    pip install pytest wheel auditwheel graphviz
 
 ENV GOSU_VERSION 1.10
 
diff --git a/tests/ci_build/test_gpu.sh b/tests/ci_build/test_gpu.sh
index 48d5c7a61..adb275481 100755
--- a/tests/ci_build/test_gpu.sh
+++ b/tests/ci_build/test_gpu.sh
@@ -4,6 +4,5 @@ set -e
 cd python-package
 python setup.py install --user
 cd ..
-python -m nose -v --eval-attr='(not slow) and (not mgpu)' tests/python-gpu/
+pytest -v -s --fulltrace -m "(not mgpu) and (not slow)" tests/python-gpu
 ./testxgboost --gtest_filter=-*.MGPU_*
-
diff --git a/tests/ci_build/test_mgpu.sh b/tests/ci_build/test_mgpu.sh
index d803da58d..5eef3e708 100755
--- a/tests/ci_build/test_mgpu.sh
+++ b/tests/ci_build/test_mgpu.sh
@@ -4,5 +4,5 @@ set -e
 cd python-package
 python setup.py install --user
 cd ..
-python -m nose -v --eval-attr='(not slow) and mgpu' tests/python-gpu/
+pytest -v -s --fulltrace -m "(not slow) and mgpu" tests/python-gpu
 ./testxgboost --gtest_filter=*.MGPU_*
diff --git a/tests/python-gpu/test_gpu_linear.py b/tests/python-gpu/test_gpu_linear.py
index 25b042a37..755538ed0 100644
--- a/tests/python-gpu/test_gpu_linear.py
+++ b/tests/python-gpu/test_gpu_linear.py
@@ -1,18 +1,19 @@
 import sys
+import pytest
+import unittest
 
 sys.path.append('tests/python/')
 import test_linear
 import testing as tm
-import unittest
 
 
 class TestGPULinear(unittest.TestCase):
 
     datasets = ["Boston", "Digits", "Cancer", "Sparse regression",
                 "Boston External Memory"]
-    
+
+    @pytest.mark.skipif(**tm.no_sklearn())
     def test_gpu_coordinate(self):
-        tm._skip_if_no_sklearn()
         variable_param = {
             'booster': ['gblinear'],
             'updater': ['coord_descent'],
diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py
index db50cdb46..2eed591bc 100644
--- a/tests/python-gpu/test_gpu_prediction.py
+++ b/tests/python-gpu/test_gpu_prediction.py
@@ -1,15 +1,14 @@
 from __future__ import print_function
 
 import numpy as np
-import sys
 import unittest
 import xgboost as xgb
-from nose.plugins.attrib import attr
+import pytest
 
 rng = np.random.RandomState(1994)
 
 
-@attr('gpu')
+@pytest.mark.gpu
 class TestGPUPredict(unittest.TestCase):
     def test_predict(self):
         iterations = 10
@@ -18,9 +17,12 @@ class TestGPUPredict(unittest.TestCase):
         test_num_cols = [10, 50, 500]
         for num_rows in test_num_rows:
             for num_cols in test_num_cols:
-                dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
-                dval = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
-                dtest = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
+                dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols),
+                                     label=[0, 1] * int(num_rows / 2))
+                dval = xgb.DMatrix(np.random.randn(num_rows, num_cols),
+                                   label=[0, 1] * int(num_rows / 2))
+                dtest = xgb.DMatrix(np.random.randn(num_rows, num_cols),
+                                    label=[0, 1] * int(num_rows / 2))
                 watchlist = [(dtrain, 'train'), (dval, 'validation')]
                 res = {}
                 param = {
@@ -28,7 +30,8 @@ class TestGPUPredict(unittest.TestCase):
                     "predictor": "gpu_predictor",
                     'eval_metric': 'auc',
                 }
-                bst = xgb.train(param, dtrain, iterations, evals=watchlist, evals_result=res)
+                bst = xgb.train(param, dtrain, iterations, evals=watchlist,
+                                evals_result=res)
                 assert self.non_decreasing(res["train"]["auc"])
                 gpu_pred_train = bst.predict(dtrain, output_margin=True)
                 gpu_pred_test = bst.predict(dtest, output_margin=True)
@@ -39,21 +42,26 @@ class TestGPUPredict(unittest.TestCase):
                 cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True)
                 cpu_pred_test = bst_cpu.predict(dtest, output_margin=True)
                 cpu_pred_val = bst_cpu.predict(dval, output_margin=True)
-                np.testing.assert_allclose(cpu_pred_train, gpu_pred_train, rtol=1e-5)
-                np.testing.assert_allclose(cpu_pred_val, gpu_pred_val, rtol=1e-5)
-                np.testing.assert_allclose(cpu_pred_test, gpu_pred_test, rtol=1e-5)
+                np.testing.assert_allclose(cpu_pred_train, gpu_pred_train,
+                                           rtol=1e-5)
+                np.testing.assert_allclose(cpu_pred_val, gpu_pred_val,
+                                           rtol=1e-5)
+                np.testing.assert_allclose(cpu_pred_test, gpu_pred_test,
+                                           rtol=1e-5)
 
     def non_decreasing(self, L):
         return all((x - y) < 0.001 for x, y in zip(L, L[1:]))
 
-    # Test case for a bug where multiple batch predictions made on a test set produce incorrect results
+    # Test case for a bug where multiple batch predictions made on a
+    # test set produce incorrect results
     def test_multi_predict(self):
         from sklearn.datasets import make_regression
         from sklearn.model_selection import train_test_split
 
         n = 1000
         X, y = make_regression(n, random_state=rng)
-        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)
+        X_train, X_test, y_train, y_test = train_test_split(X, y,
+                                                            random_state=123)
         dtrain = xgb.DMatrix(X_train, label=y_train)
         dtest = xgb.DMatrix(X_test)
 
@@ -85,8 +93,7 @@ class TestGPUPredict(unittest.TestCase):
         params = {'tree_method': 'gpu_hist',
                   'predictor': 'cpu_predictor',
                   'n_jobs': -1,
-                  'seed': 123
-                  }
+                  'seed': 123}
         m = xgb.XGBRegressor(**params).fit(X_train, y_train)
         cpu_train_score = m.score(X_train, y_train)
         cpu_test_score = m.score(X_test, y_test)
diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py
index 4f8ac58b5..8c4cb1cda 100644
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -1,10 +1,9 @@
 import numpy as np
 import sys
 import unittest
-from nose.plugins.attrib import attr
+import pytest
 
 sys.path.append("tests/python")
-import xgboost as xgb
 from regression_test_utilities import run_suite, parameter_combinations, \
     assert_results_non_increasing
 
@@ -45,7 +44,7 @@ class TestGPU(unittest.TestCase):
             cpu_results = run_suite(param, select_datasets=datasets)
             assert_gpu_results(cpu_results, gpu_results)
 
-    @attr('mgpu')
+    @pytest.mark.mgpu
     def test_gpu_hist_mgpu(self):
         variable_param = {'n_gpus': [-1], 'max_depth': [2, 10],
                           'max_leaves': [255, 4],
@@ -56,7 +55,7 @@ class TestGPU(unittest.TestCase):
             gpu_results = run_suite(param, select_datasets=datasets)
             assert_results_non_increasing(gpu_results, 1e-2)
 
-    @attr('mgpu')
+    @pytest.mark.mgpu
     def test_specified_gpu_id_gpu_update(self):
         variable_param = {'n_gpus': [1],
                           'gpu_id': [1],
diff --git a/tests/python-gpu/test_large_sizes.py b/tests/python-gpu/test_large_sizes.py
index 3c1470d05..4c6dc3234 100644
--- a/tests/python-gpu/test_large_sizes.py
+++ b/tests/python-gpu/test_large_sizes.py
@@ -2,12 +2,12 @@ from __future__ import print_function
 
 import sys
 import time
+import pytest
 
 sys.path.append("../../tests/python")
 import xgboost as xgb
 import numpy as np
 import unittest
-from nose.plugins.attrib import attr
 
 
 def eprint(*args, **kwargs):
@@ -16,9 +16,11 @@ def eprint(*args, **kwargs):
     print(*args, file=sys.stdout, **kwargs)
     sys.stdout.flush()
 
+
 rng = np.random.RandomState(1994)
 
-# "realistic" size based upon http://stat-computing.org/dataexpo/2009/ , which has been processed to one-hot encode categoricalsxsy
+# "realistic" size based upon http://stat-computing.org/dataexpo/2009/
+# , which has been processed to one-hot encode categoricalsxsy
 cols = 31
 # reduced to fit onto 1 gpu but still be large
 rows3 = 5000  # small
@@ -28,7 +30,7 @@ rows1 = 42360032  # large
 rowslist = [rows1, rows2, rows3]
 
 
-@attr('slow')
+@pytest.mark.slow
 class TestGPU(unittest.TestCase):
     def test_large(self):
         for rows in rowslist:
@@ -47,15 +49,8 @@ class TestGPU(unittest.TestCase):
             max_depth = 6
             max_bin = 1024
 
-            # regression test --- hist must be same as exact on all-categorial data
-            ag_param = {'max_depth': max_depth,
-                        'tree_method': 'exact',
-                        'nthread': 0,
-                        'eta': 1,
-                        'silent': 0,
-                        'debug_verbose': 5,
-                        'objective': 'binary:logistic',
-                        'eval_metric': 'auc'}
+            # regression test --- hist must be same as exact on
+            # all-categorial data
             ag_paramb = {'max_depth': max_depth,
                          'tree_method': 'hist',
                          'nthread': 0,
diff --git a/tests/python-gpu/test_monotonic_constraints.py b/tests/python-gpu/test_monotonic_constraints.py
index d052bf899..69063f11b 100644
--- a/tests/python-gpu/test_monotonic_constraints.py
+++ b/tests/python-gpu/test_monotonic_constraints.py
@@ -1,11 +1,13 @@
 from __future__ import print_function
 
 import numpy as np
-import unittest
-import xgboost as xgb
-from nose.plugins.attrib import attr
 from sklearn.datasets import make_regression
 
+import unittest
+import pytest
+
+import xgboost as xgb
+
 rng = np.random.RandomState(1994)
 
 
@@ -33,7 +35,7 @@ def assert_constraint(constraint, tree_method):
         assert non_increasing(pred)
 
 
-@attr('gpu')
+@pytest.mark.gpu
 class TestMonotonicConstraints(unittest.TestCase):
     def test_exact(self):
         assert_constraint(1, 'exact')
diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py
index 11e28eeb2..34306a125 100644
--- a/tests/python/test_basic.py
+++ b/tests/python/test_basic.py
@@ -21,6 +21,8 @@ def captured_output():
     """
     Reassign stdout temporarily in order to test printed statements
     Taken from: https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python
+
+    Also works for pytest.
     """
     new_out, new_err = StringIO(), StringIO()
     old_out, old_err = sys.stdout, sys.stderr
@@ -36,7 +38,8 @@ class TestBasic(unittest.TestCase):
     def test_basic(self):
         dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
         dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
-        param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
+        param = {'max_depth': 2, 'eta': 1, 'silent': 1,
+                 'objective': 'binary:logistic'}
         # specify validations set to watch performance
         watchlist = [(dtest, 'eval'), (dtrain, 'train')]
         num_round = 2
@@ -44,7 +47,8 @@ class TestBasic(unittest.TestCase):
         # this is prediction
         preds = bst.predict(dtest)
         labels = dtest.get_label()
-        err = sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
+        err = sum(1 for i in range(len(preds))
+                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
         # error must be smaller than 10%
         assert err < 0.1
 
@@ -62,7 +66,8 @@ class TestBasic(unittest.TestCase):
     def test_record_results(self):
         dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
         dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
-        param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
+        param = {'max_depth': 2, 'eta': 1, 'silent': 1,
+                 'objective': 'binary:logistic'}
         # specify validations set to watch performance
         watchlist = [(dtest, 'eval'), (dtrain, 'train')]
         num_round = 2
@@ -86,7 +91,8 @@ class TestBasic(unittest.TestCase):
         # this is prediction
         preds = bst.predict(dtest)
         labels = dtest.get_label()
-        err = sum(1 for i in range(len(preds)) if preds[i] != labels[i]) / float(len(preds))
+        err = sum(1 for i in range(len(preds))
+                  if preds[i] != labels[i]) / float(len(preds))
         # error must be smaller than 10%
         assert err < 0.1
 
@@ -248,7 +254,8 @@ class TestBasic(unittest.TestCase):
 
     def test_cv(self):
         dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
+        params = {'max_depth': 2, 'eta': 1, 'silent': 1,
+                  'objective': 'binary:logistic'}
 
         # return np.ndarray
         cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False)
@@ -257,16 +264,19 @@ class TestBasic(unittest.TestCase):
 
     def test_cv_no_shuffle(self):
         dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
+        params = {'max_depth': 2, 'eta': 1, 'silent': 1,
+                  'objective': 'binary:logistic'}
 
         # return np.ndarray
-        cv = xgb.cv(params, dm, num_boost_round=10, shuffle=False, nfold=10, as_pandas=False)
+        cv = xgb.cv(params, dm, num_boost_round=10, shuffle=False, nfold=10,
+                    as_pandas=False)
         assert isinstance(cv, dict)
         assert len(cv) == (4)
 
     def test_cv_explicit_fold_indices(self):
         dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
+        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective':
+                  'binary:logistic'}
         folds = [
             # Train        Test
             ([1, 3], [5, 8]),
@@ -274,12 +284,14 @@ class TestBasic(unittest.TestCase):
         ]
 
         # return np.ndarray
-        cv = xgb.cv(params, dm, num_boost_round=10, folds=folds, as_pandas=False)
+        cv = xgb.cv(params, dm, num_boost_round=10, folds=folds,
+                    as_pandas=False)
         assert isinstance(cv, dict)
         assert len(cv) == (4)
 
     def test_cv_explicit_fold_indices_labels(self):
-        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'reg:linear'}
+        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective':
+                  'reg:linear'}
         N = 100
         F = 3
         dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N))
@@ -300,7 +312,9 @@ class TestBasic(unittest.TestCase):
                 as_pandas=False
             )
             output = out.getvalue().strip()
-        assert output == '[array([5., 8.], dtype=float32), array([23., 43., 11.], dtype=float32)]'
+        solution = ('[array([5., 8.], dtype=float32), array([23., 43., 11.],' +
+                    ' dtype=float32)]')
+        assert output == solution
 
     def test_get_info(self):
         dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
diff --git a/tests/python/test_dt.py b/tests/python/test_dt.py
index b9f318b09..cd138fa53 100644
--- a/tests/python/test_dt.py
+++ b/tests/python/test_dt.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 import unittest
+import pytest
 
 import testing as tm
 import xgboost as xgb
@@ -10,14 +11,16 @@ try:
 except ImportError:
     pass
 
-tm._skip_if_no_dt()
-tm._skip_if_no_pandas()
+pytestmark = pytest.mark.skipif(
+    tm.no_dt()['condition'] or tm.no_pandas()['condition'],
+    reason=tm.no_dt()['reason'] + ' or ' + tm.no_pandas()['reason'])
 
 
 class TestDataTable(unittest.TestCase):
 
     def test_dt(self):
-        df = pd.DataFrame([[1, 2., True], [2, 3., False]], columns=['a', 'b', 'c'])
+        df = pd.DataFrame([[1, 2., True], [2, 3., False]],
+                          columns=['a', 'b', 'c'])
         dtable = dt.Frame(df)
         labels = dt.Frame([1, 2])
         dm = xgb.DMatrix(dtable, label=labels)
@@ -34,7 +37,8 @@ class TestDataTable(unittest.TestCase):
         assert dm.num_col() == 3
 
         # incorrect dtypes
-        df = pd.DataFrame([[1, 2., 'x'], [2, 3., 'y']], columns=['a', 'b', 'c'])
+        df = pd.DataFrame([[1, 2., 'x'], [2, 3., 'y']],
+                          columns=['a', 'b', 'c'])
         dtable = dt.Frame(df)
         self.assertRaises(ValueError, xgb.DMatrix, dtable)
 
diff --git a/tests/python/test_early_stopping.py b/tests/python/test_early_stopping.py
index 7553aed66..13b0cec23 100644
--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@@ -2,24 +2,26 @@ import xgboost as xgb
 import testing as tm
 import numpy as np
 import unittest
+import pytest
 
 rng = np.random.RandomState(1994)
 
 
 class TestEarlyStopping(unittest.TestCase):
 
+    @pytest.mark.skipif(**tm.no_sklearn())
     def test_early_stopping_nonparallel(self):
-        tm._skip_if_no_sklearn()
         from sklearn.datasets import load_digits
         try:
             from sklearn.model_selection import train_test_split
-        except:
+        except ImportError:
             from sklearn.cross_validation import train_test_split
 
         digits = load_digits(2)
         X = digits['data']
         y = digits['target']
-        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+        X_train, X_test, y_train, y_test = train_test_split(X, y,
+                                                            random_state=0)
         clf1 = xgb.XGBClassifier()
         clf1.fit(X_train, y_train, early_stopping_rounds=5, eval_metric="auc",
                  eval_set=[(X_test, y_test)])
@@ -35,36 +37,41 @@ class TestEarlyStopping(unittest.TestCase):
                  eval_set=[(X_test, y_test)])
         assert clf3.best_score == 1
 
+    @pytest.mark.skipif(**tm.no_sklearn())
     def evalerror(self, preds, dtrain):
-        tm._skip_if_no_sklearn()
         from sklearn.metrics import mean_squared_error
 
         labels = dtrain.get_label()
         return 'rmse', mean_squared_error(labels, preds)
 
+    @pytest.mark.skipif(**tm.no_sklearn())
     def test_cv_early_stopping(self):
-        tm._skip_if_no_sklearn()
         from sklearn.datasets import load_digits
 
         digits = load_digits(2)
         X = digits['data']
         y = digits['target']
         dm = xgb.DMatrix(X, label=y)
-        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
+        params = {'max_depth': 2, 'eta': 1, 'silent': 1,
+                  'objective': 'binary:logistic'}
 
-        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=10)
-        assert cv.shape[0] == 10
-        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=5)
-        assert cv.shape[0] == 3
-        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=1)
-        assert cv.shape[0] == 1
-
-        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, feval=self.evalerror,
+        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
                     early_stopping_rounds=10)
         assert cv.shape[0] == 10
-        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, feval=self.evalerror,
+        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
+                    early_stopping_rounds=5)
+        assert cv.shape[0] == 3
+        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
+                    early_stopping_rounds=1)
+        assert cv.shape[0] == 1
+
+        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
+                    feval=self.evalerror, early_stopping_rounds=10)
+        assert cv.shape[0] == 10
+        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
+                    feval=self.evalerror, early_stopping_rounds=1)
+        assert cv.shape[0] == 5
+        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
+                    feval=self.evalerror, maximize=True,
                     early_stopping_rounds=1)
-        assert cv.shape[0] == 5
-        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, feval=self.evalerror,
-                    maximize=True, early_stopping_rounds=1)
         assert cv.shape[0] == 1
diff --git a/tests/python/test_eval_metrics.py b/tests/python/test_eval_metrics.py
index 611b7e8fe..65dcbff75 100644
--- a/tests/python/test_eval_metrics.py
+++ b/tests/python/test_eval_metrics.py
@@ -2,6 +2,7 @@ import xgboost as xgb
 import testing as tm
 import numpy as np
 import unittest
+import pytest
 
 rng = np.random.RandomState(1337)
 
@@ -39,27 +40,27 @@ class TestEvalMetrics(unittest.TestCase):
         labels = dtrain.get_label()
         return [('error', float(sum(labels != (preds > 0.0))) / len(labels))]
 
+    @pytest.mark.skipif(**tm.no_sklearn())
     def evalerror_03(self, preds, dtrain):
-        tm._skip_if_no_sklearn()
         from sklearn.metrics import mean_squared_error
 
         labels = dtrain.get_label()
         return [('rmse', mean_squared_error(labels, preds)),
                 ('error', float(sum(labels != (preds > 0.0))) / len(labels))]
 
+    @pytest.mark.skipif(**tm.no_sklearn())
     def evalerror_04(self, preds, dtrain):
-        tm._skip_if_no_sklearn()
         from sklearn.metrics import mean_squared_error
 
         labels = dtrain.get_label()
         return [('error', float(sum(labels != (preds > 0.0))) / len(labels)),
                 ('rmse', mean_squared_error(labels, preds))]
 
+    @pytest.mark.skipif(**tm.no_sklearn())
     def test_eval_metrics(self):
-        tm._skip_if_no_sklearn()
         try:
             from sklearn.model_selection import train_test_split
-        except:
+        except ImportError:
             from sklearn.cross_validation import train_test_split
         from sklearn.datasets import load_digits
 
diff --git a/tests/python/test_linear.py b/tests/python/test_linear.py
index 5cfd42687..e3d759910 100644
--- a/tests/python/test_linear.py
+++ b/tests/python/test_linear.py
@@ -3,6 +3,8 @@ from __future__ import print_function
 import numpy as np
 import testing as tm
 import unittest
+import pytest
+
 import xgboost as xgb
 
 try:
@@ -22,13 +24,16 @@ def is_float(s):
 
 
 def xgb_get_weights(bst):
-    return np.array([float(s) for s in bst.get_dump()[0].split() if is_float(s)])
+    return np.array([float(s) for s in bst.get_dump()[0].split() if
+                     is_float(s)])
 
 
 def assert_regression_result(results, tol):
-    regression_results = [r for r in results if r["param"]["objective"] == "reg:linear"]
+    regression_results = [r for r in results if
+                          r["param"]["objective"] == "reg:linear"]
     for res in regression_results:
-        X = scale(res["dataset"].X, with_mean=isinstance(res["dataset"].X, np.ndarray))
+        X = scale(res["dataset"].X,
+                  with_mean=isinstance(res["dataset"].X, np.ndarray))
         y = res["dataset"].y
         reg_alpha = res["param"]["alpha"]
         reg_lambda = res["param"]["lambda"]
@@ -38,14 +43,16 @@ def assert_regression_result(results, tol):
                           l1_ratio=reg_alpha / (reg_alpha + reg_lambda))
         enet.fit(X, y)
         enet_pred = enet.predict(X)
-        assert np.isclose(weights, enet.coef_, rtol=tol, atol=tol).all(), (weights, enet.coef_)
+        assert np.isclose(weights, enet.coef_, rtol=tol,
+                          atol=tol).all(), (weights, enet.coef_)
         assert np.isclose(enet_pred, pred, rtol=tol, atol=tol).all(), (
             res["dataset"].name, enet_pred[:5], pred[:5])
 
 
 # TODO: More robust classification tests
 def assert_classification_result(results):
-    classification_results = [r for r in results if r["param"]["objective"] != "reg:linear"]
+    classification_results = [r for r in results if
+                              r["param"]["objective"] != "reg:linear"]
     for res in classification_results:
         # Check accuracy  is reasonable
         assert res["eval"][-1] < 0.5, (res["dataset"].name, res["eval"][-1])
@@ -56,25 +63,26 @@ class TestLinear(unittest.TestCase):
     datasets = ["Boston", "Digits", "Cancer", "Sparse regression",
                 "Boston External Memory"]
 
+    @pytest.mark.skipif(**tm.no_sklearn())
     def test_coordinate(self):
-        tm._skip_if_no_sklearn()
-        variable_param = {'booster': ['gblinear'], 'updater': ['coord_descent'], 'eta': [0.5],
-                          'top_k': [10], 'tolerance': [1e-5], 'nthread': [2],
+        variable_param = {'booster': ['gblinear'], 'updater':
+                          ['coord_descent'], 'eta': [0.5], 'top_k':
+                          [10], 'tolerance': [1e-5], 'nthread': [2],
                           'alpha': [.005, .1], 'lambda': [.005],
-                          'feature_selector': ['cyclic', 'shuffle', 'greedy', 'thrifty']
-                          }
+                          'feature_selector': ['cyclic', 'shuffle',
+                                               'greedy', 'thrifty']}
         for param in parameter_combinations(variable_param):
             results = run_suite(param, 200, self.datasets, scale_features=True)
             assert_regression_result(results, 1e-2)
             assert_classification_result(results)
 
+    @pytest.mark.skipif(**tm.no_sklearn())
     def test_shotgun(self):
-        tm._skip_if_no_sklearn()
-        variable_param = {'booster': ['gblinear'], 'updater': ['shotgun'], 'eta': [0.5],
-                          'top_k': [10], 'tolerance': [1e-5], 'nthread': [2],
+        variable_param = {'booster': ['gblinear'], 'updater':
+                          ['shotgun'], 'eta': [0.5], 'top_k': [10],
+                          'tolerance': [1e-5], 'nthread': [2],
                           'alpha': [.005, .1], 'lambda': [.005],
-                          'feature_selector': ['cyclic', 'shuffle']
-                          }
+                          'feature_selector': ['cyclic', 'shuffle']}
         for param in parameter_combinations(variable_param):
             results = run_suite(param, 200, self.datasets, True)
             assert_regression_result(results, 1e-2)
diff --git a/tests/python/test_plotting.py b/tests/python/test_plotting.py
index fde98dcca..7d98280e4 100644
--- a/tests/python/test_plotting.py
+++ b/tests/python/test_plotting.py
@@ -2,7 +2,9 @@
 import numpy as np
 import xgboost as xgb
 import testing as tm
+
 import unittest
+import pytest
 
 try:
     import matplotlib
@@ -13,7 +15,7 @@ except ImportError:
     pass
 
 
-tm._skip_if_no_matplotlib()
+pytestmark = pytest.mark.skipif(**tm.no_matplotlib())
 
 
 dpath = 'demo/data/'
diff --git a/tests/python/test_training_continuation.py b/tests/python/test_training_continuation.py
index f7511f685..ec09ad43a 100644
--- a/tests/python/test_training_continuation.py
+++ b/tests/python/test_training_continuation.py
@@ -2,6 +2,7 @@ import xgboost as xgb
 import testing as tm
 import numpy as np
 import unittest
+import pytest
 
 rng = np.random.RandomState(1337)
 
@@ -27,8 +28,8 @@ class TestTrainingContinuation(unittest.TestCase):
         'num_parallel_tree': num_parallel_tree
     }
 
+    @pytest.mark.skipif(**tm.no_sklearn())
     def test_training_continuation(self):
-        tm._skip_if_no_sklearn()
         from sklearn.datasets import load_digits
         from sklearn.metrics import mean_squared_error
 
@@ -44,15 +45,19 @@ class TestTrainingContinuation(unittest.TestCase):
         dtrain_2class = xgb.DMatrix(X_2class, label=y_2class)
         dtrain_5class = xgb.DMatrix(X_5class, label=y_5class)
 
-        gbdt_01 = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=10)
+        gbdt_01 = xgb.train(self.xgb_params_01, dtrain_2class,
+                            num_boost_round=10)
         ntrees_01 = len(gbdt_01.get_dump())
         assert ntrees_01 == 10
 
-        gbdt_02 = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=0)
+        gbdt_02 = xgb.train(self.xgb_params_01, dtrain_2class,
+                            num_boost_round=0)
         gbdt_02.save_model('xgb_tc.model')
 
-        gbdt_02a = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=10, xgb_model=gbdt_02)
-        gbdt_02b = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=10, xgb_model="xgb_tc.model")
+        gbdt_02a = xgb.train(self.xgb_params_01, dtrain_2class,
+                             num_boost_round=10, xgb_model=gbdt_02)
+        gbdt_02b = xgb.train(self.xgb_params_01, dtrain_2class,
+                             num_boost_round=10, xgb_model="xgb_tc.model")
         ntrees_02a = len(gbdt_02a.get_dump())
         ntrees_02b = len(gbdt_02b.get_dump())
         assert ntrees_02a == 10
@@ -66,11 +71,14 @@ class TestTrainingContinuation(unittest.TestCase):
         res2 = mean_squared_error(y_2class, gbdt_02b.predict(dtrain_2class))
         assert res1 == res2
 
-        gbdt_03 = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=3)
+        gbdt_03 = xgb.train(self.xgb_params_01, dtrain_2class,
+                            num_boost_round=3)
         gbdt_03.save_model('xgb_tc.model')
 
-        gbdt_03a = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=7, xgb_model=gbdt_03)
-        gbdt_03b = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=7, xgb_model="xgb_tc.model")
+        gbdt_03a = xgb.train(self.xgb_params_01, dtrain_2class,
+                             num_boost_round=7, xgb_model=gbdt_03)
+        gbdt_03b = xgb.train(self.xgb_params_01, dtrain_2class,
+                             num_boost_round=7, xgb_model="xgb_tc.model")
         ntrees_03a = len(gbdt_03a.get_dump())
         ntrees_03b = len(gbdt_03b.get_dump())
         assert ntrees_03a == 10
@@ -80,25 +88,42 @@ class TestTrainingContinuation(unittest.TestCase):
         res2 = mean_squared_error(y_2class, gbdt_03b.predict(dtrain_2class))
         assert res1 == res2
 
-        gbdt_04 = xgb.train(self.xgb_params_02, dtrain_2class, num_boost_round=3)
-        assert gbdt_04.best_ntree_limit == (gbdt_04.best_iteration + 1) * self.num_parallel_tree
+        gbdt_04 = xgb.train(self.xgb_params_02, dtrain_2class,
+                            num_boost_round=3)
+        assert gbdt_04.best_ntree_limit == (gbdt_04.best_iteration +
+                                            1) * self.num_parallel_tree
 
         res1 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class))
-        res2 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))
+        res2 = mean_squared_error(y_2class,
+                                  gbdt_04.predict(
+                                      dtrain_2class,
+                                      ntree_limit=gbdt_04.best_ntree_limit))
         assert res1 == res2
 
-        gbdt_04 = xgb.train(self.xgb_params_02, dtrain_2class, num_boost_round=7, xgb_model=gbdt_04)
-        assert gbdt_04.best_ntree_limit == (gbdt_04.best_iteration + 1) * self.num_parallel_tree
+        gbdt_04 = xgb.train(self.xgb_params_02, dtrain_2class,
+                            num_boost_round=7, xgb_model=gbdt_04)
+        assert gbdt_04.best_ntree_limit == (
+            gbdt_04.best_iteration + 1) * self.num_parallel_tree
 
         res1 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class))
-        res2 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))
+        res2 = mean_squared_error(y_2class,
+                                  gbdt_04.predict(
+                                      dtrain_2class,
+                                      ntree_limit=gbdt_04.best_ntree_limit))
         assert res1 == res2
 
-        gbdt_05 = xgb.train(self.xgb_params_03, dtrain_5class, num_boost_round=7)
-        assert gbdt_05.best_ntree_limit == (gbdt_05.best_iteration + 1) * self.num_parallel_tree
-        gbdt_05 = xgb.train(self.xgb_params_03, dtrain_5class, num_boost_round=3, xgb_model=gbdt_05)
-        assert gbdt_05.best_ntree_limit == (gbdt_05.best_iteration + 1) * self.num_parallel_tree
+        gbdt_05 = xgb.train(self.xgb_params_03, dtrain_5class,
+                            num_boost_round=7)
+        assert gbdt_05.best_ntree_limit == (
+            gbdt_05.best_iteration + 1) * self.num_parallel_tree
+        gbdt_05 = xgb.train(self.xgb_params_03,
+                            dtrain_5class,
+                            num_boost_round=3,
+                            xgb_model=gbdt_05)
+        assert gbdt_05.best_ntree_limit == (
+            gbdt_05.best_iteration + 1) * self.num_parallel_tree
 
         res1 = gbdt_05.predict(dtrain_5class)
-        res2 = gbdt_05.predict(dtrain_5class, ntree_limit=gbdt_05.best_ntree_limit)
+        res2 = gbdt_05.predict(dtrain_5class,
+                               ntree_limit=gbdt_05.best_ntree_limit)
         np.testing.assert_almost_equal(res1, res2)
diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py
index 33f0ebb48..b19710e35 100644
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -1,5 +1,6 @@
 import testing as tm
 import unittest
+import pytest
 import xgboost as xgb
 
 try:
@@ -10,24 +11,27 @@ except ImportError:
 
 
 class TestUpdaters(unittest.TestCase):
+    @pytest.mark.skipif(**tm.no_sklearn())
     def test_histmaker(self):
-        tm._skip_if_no_sklearn()
         variable_param = {'updater': ['grow_histmaker'], 'max_depth': [2, 8]}
         for param in parameter_combinations(variable_param):
             result = run_suite(param)
             assert_results_non_increasing(result, 1e-2)
 
+    @pytest.mark.skipif(**tm.no_sklearn())
     def test_colmaker(self):
-        tm._skip_if_no_sklearn()
         variable_param = {'updater': ['grow_colmaker'], 'max_depth': [2, 8]}
         for param in parameter_combinations(variable_param):
             result = run_suite(param)
             assert_results_non_increasing(result, 1e-2)
 
+    @pytest.mark.skipif(**tm.no_sklearn())
     def test_fast_histmaker(self):
-        tm._skip_if_no_sklearn()
-        variable_param = {'tree_method': ['hist'], 'max_depth': [2, 8], 'max_bin': [2, 256],
-                          'grow_policy': ['depthwise', 'lossguide'], 'max_leaves': [64, 0],
+        variable_param = {'tree_method': ['hist'],
+                          'max_depth': [2, 8],
+                          'max_bin': [2, 256],
+                          'grow_policy': ['depthwise', 'lossguide'],
+                          'max_leaves': [64, 0],
                           'silent': [1]}
         for param in parameter_combinations(variable_param):
             result = run_suite(param)
@@ -46,10 +50,12 @@ class TestUpdaters(unittest.TestCase):
         hist_res = {}
         exact_res = {}
 
-        xgb.train(ag_param, ag_dtrain, 10, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
+        xgb.train(ag_param, ag_dtrain, 10,
+                  [(ag_dtrain, 'train'), (ag_dtest, 'test')],
                   evals_result=hist_res)
         ag_param["tree_method"] = "exact"
-        xgb.train(ag_param, ag_dtrain, 10, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
+        xgb.train(ag_param, ag_dtrain, 10,
+                  [(ag_dtrain, 'train'), (ag_dtest, 'test')],
                   evals_result=exact_res)
         assert hist_res['train']['auc'] == exact_res['train']['auc']
         assert hist_res['test']['auc'] == exact_res['test']['auc']
diff --git a/tests/python/test_with_pandas.py b/tests/python/test_with_pandas.py
index 3bb26c12d..0f2eb25de 100644
--- a/tests/python/test_with_pandas.py
+++ b/tests/python/test_with_pandas.py
@@ -3,6 +3,7 @@ import numpy as np
 import xgboost as xgb
 import testing as tm
 import unittest
+import pytest
 
 try:
     import pandas as pd
@@ -10,7 +11,7 @@ except ImportError:
     pass
 
 
-tm._skip_if_no_pandas()
+pytestmark = pytest.mark.skipif(**tm.no_pandas())
 
 
 dpath = 'demo/data/'
@@ -21,7 +22,8 @@ class TestPandas(unittest.TestCase):
 
     def test_pandas(self):
 
-        df = pd.DataFrame([[1, 2., True], [2, 3., False]], columns=['a', 'b', 'c'])
+        df = pd.DataFrame([[1, 2., True], [2, 3., False]],
+                          columns=['a', 'b', 'c'])
         dm = xgb.DMatrix(df, label=pd.Series([1, 2]))
         assert dm.feature_names == ['a', 'b', 'c']
         assert dm.feature_types == ['int', 'float', 'i']
@@ -30,14 +32,16 @@ class TestPandas(unittest.TestCase):
 
         # overwrite feature_names and feature_types
         dm = xgb.DMatrix(df, label=pd.Series([1, 2]),
-                         feature_names=['x', 'y', 'z'], feature_types=['q', 'q', 'q'])
+                         feature_names=['x', 'y', 'z'],
+                         feature_types=['q', 'q', 'q'])
         assert dm.feature_names == ['x', 'y', 'z']
         assert dm.feature_types == ['q', 'q', 'q']
         assert dm.num_row() == 2
         assert dm.num_col() == 3
 
         # incorrect dtypes
-        df = pd.DataFrame([[1, 2., 'x'], [2, 3., 'y']], columns=['a', 'b', 'c'])
+        df = pd.DataFrame([[1, 2., 'x'], [2, 3., 'y']],
+                          columns=['a', 'b', 'c'])
         self.assertRaises(ValueError, xgb.DMatrix, df)
 
         # numeric columns
@@ -107,7 +111,8 @@ class TestPandas(unittest.TestCase):
 
         df = pd.DataFrame({'A': np.array([1, 2, 3], dtype=int)})
         result = xgb.core._maybe_pandas_label(df)
-        np.testing.assert_array_equal(result, np.array([[1.], [2.], [3.]], dtype=float))
+        np.testing.assert_array_equal(result, np.array([[1.], [2.], [3.]],
+                                                       dtype=float))
 
         dm = xgb.DMatrix(np.random.randn(3, 2), label=df)
         assert dm.num_row() == 3
@@ -115,9 +120,9 @@ class TestPandas(unittest.TestCase):
 
     def test_cv_as_pandas(self):
         dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
-        params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
+        params = {'max_depth': 2, 'eta': 1, 'silent': 1,
+                  'objective': 'binary:logistic'}
 
-        import pandas as pd
         cv = xgb.cv(params, dm, num_boost_round=10, nfold=10)
         assert isinstance(cv, pd.DataFrame)
         exp = pd.Index([u'test-error-mean', u'test-error-std',
diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index 8808ccdfb..0826493b6 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -4,10 +4,12 @@ import testing as tm
 import tempfile
 import os
 import shutil
-from nose.tools import raises
+import pytest
 
 rng = np.random.RandomState(1994)
 
+pytestmark = pytest.mark.skipif(**tm.no_sklearn())
+
 
 class TemporaryDirectory(object):
     """Context manager for tempfile.mkdtemp()"""
@@ -20,7 +22,6 @@ class TemporaryDirectory(object):
 
 
 def test_binary_classification():
-    tm._skip_if_no_sklearn()
     from sklearn.datasets import load_digits
     from sklearn.model_selection import KFold
 
@@ -38,7 +39,6 @@ def test_binary_classification():
 
 
 def test_multiclass_classification():
-    tm._skip_if_no_sklearn()
     from sklearn.datasets import load_iris
     from sklearn.model_selection import KFold
 
@@ -59,9 +59,12 @@ def test_multiclass_classification():
         xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
         preds = xgb_model.predict(X[test_index])
         # test other params in XGBClassifier().fit
-        preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
-        preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
-        preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
+        preds2 = xgb_model.predict(X[test_index], output_margin=True,
+                                   ntree_limit=3)
+        preds3 = xgb_model.predict(X[test_index], output_margin=True,
+                                   ntree_limit=0)
+        preds4 = xgb_model.predict(X[test_index], output_margin=False,
+                                   ntree_limit=3)
         labels = y[test_index]
 
         check_pred(preds, labels, output_margin=False)
@@ -71,7 +74,6 @@ def test_multiclass_classification():
 
 
 def test_ranking():
-    tm._skip_if_no_sklearn()
     # generate random data
     x_train = np.random.rand(1000, 10)
     y_train = np.random.randint(5, size=1000)
@@ -105,13 +107,13 @@ def test_ranking():
 
 
 def test_feature_importances_weight():
-    tm._skip_if_no_sklearn()
     from sklearn.datasets import load_digits
 
     digits = load_digits(2)
     y = digits['target']
     X = digits['data']
-    xgb_model = xgb.XGBClassifier(random_state=0, importance_type="weight").fit(X, y)
+    xgb_model = xgb.XGBClassifier(
+        random_state=0, importance_type="weight").fit(X, y)
 
     exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.00833333, 0.,
                     0., 0., 0., 0., 0., 0., 0., 0.025, 0.14166667, 0., 0., 0.,
@@ -127,28 +129,32 @@ def test_feature_importances_weight():
     import pandas as pd
     y = pd.Series(digits['target'])
     X = pd.DataFrame(digits['data'])
-    xgb_model = xgb.XGBClassifier(random_state=0, importance_type="weight").fit(X, y)
+    xgb_model = xgb.XGBClassifier(
+        random_state=0, importance_type="weight").fit(X, y)
     np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
 
-    xgb_model = xgb.XGBClassifier(random_state=0, importance_type="weight").fit(X, y)
+    xgb_model = xgb.XGBClassifier(
+        random_state=0, importance_type="weight").fit(X, y)
     np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
 
 
 def test_feature_importances_gain():
-    tm._skip_if_no_sklearn()
     from sklearn.datasets import load_digits
 
     digits = load_digits(2)
     y = digits['target']
     X = digits['data']
-    xgb_model = xgb.XGBClassifier(random_state=0, importance_type="gain").fit(X, y)
+    xgb_model = xgb.XGBClassifier(
+        random_state=0, importance_type="gain").fit(X, y)
 
-    exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.00326159, 0., 0., 0.,
-                    0., 0., 0., 0., 0., 0.00297238, 0.00988034, 0., 0., 0., 0.,
-                    0., 0., 0.03512521, 0.41123885, 0., 0., 0., 0., 0.01326332,
-                    0.00160674, 0., 0.4206952, 0., 0., 0., 0., 0.00616747, 0.01237546,
-                    0., 0., 0., 0., 0., 0., 0., 0.08240705, 0., 0., 0., 0.,
-                    0., 0., 0., 0.00100649, 0., 0., 0., 0., 0.], dtype=np.float32)
+    exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+                    0.00326159, 0., 0., 0., 0., 0., 0., 0., 0.,
+                    0.00297238, 0.00988034, 0., 0., 0., 0., 0., 0.,
+                    0.03512521, 0.41123885, 0., 0., 0., 0.,
+                    0.01326332, 0.00160674, 0., 0.4206952, 0., 0., 0.,
+                    0., 0.00616747, 0.01237546, 0., 0., 0., 0., 0.,
+                    0., 0., 0.08240705, 0., 0., 0., 0., 0., 0., 0.,
+                    0.00100649, 0., 0., 0., 0., 0.], dtype=np.float32)
 
     np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
 
@@ -156,15 +162,16 @@ def test_feature_importances_gain():
     import pandas as pd
     y = pd.Series(digits['target'])
     X = pd.DataFrame(digits['data'])
-    xgb_model = xgb.XGBClassifier(random_state=0, importance_type="gain").fit(X, y)
+    xgb_model = xgb.XGBClassifier(
+        random_state=0, importance_type="gain").fit(X, y)
     np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
 
-    xgb_model = xgb.XGBClassifier(random_state=0, importance_type="gain").fit(X, y)
+    xgb_model = xgb.XGBClassifier(
+        random_state=0, importance_type="gain").fit(X, y)
     np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
 
 
 def test_boston_housing_regression():
-    tm._skip_if_no_sklearn()
     from sklearn.metrics import mean_squared_error
     from sklearn.datasets import load_boston
     from sklearn.model_selection import KFold
@@ -178,9 +185,12 @@ def test_boston_housing_regression():
 
         preds = xgb_model.predict(X[test_index])
         # test other params in XGBRegressor().fit
-        preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
-        preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
-        preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
+        preds2 = xgb_model.predict(X[test_index], output_margin=True,
+                                   ntree_limit=3)
+        preds3 = xgb_model.predict(X[test_index], output_margin=True,
+                                   ntree_limit=0)
+        preds4 = xgb_model.predict(X[test_index], output_margin=False,
+                                   ntree_limit=3)
         labels = y[test_index]
 
         assert mean_squared_error(preds, labels) < 25
@@ -190,7 +200,6 @@ def test_boston_housing_regression():
 
 
 def test_parameter_tuning():
-    tm._skip_if_no_sklearn()
     from sklearn.model_selection import GridSearchCV
     from sklearn.datasets import load_boston
 
@@ -207,7 +216,6 @@ def test_parameter_tuning():
 
 
 def test_regression_with_custom_objective():
-    tm._skip_if_no_sklearn()
     from sklearn.metrics import mean_squared_error
     from sklearn.datasets import load_boston
     from sklearn.model_selection import KFold
@@ -241,7 +249,6 @@ def test_regression_with_custom_objective():
 
 
 def test_classification_with_custom_objective():
-    tm._skip_if_no_sklearn()
     from sklearn.datasets import load_digits
     from sklearn.model_selection import KFold
 
@@ -280,7 +287,6 @@ def test_classification_with_custom_objective():
 
 
 def test_sklearn_api():
-    tm._skip_if_no_sklearn()
     from sklearn.datasets import load_iris
     from sklearn.model_selection import train_test_split
 
@@ -298,12 +304,12 @@ def test_sklearn_api():
 
 
 def test_sklearn_api_gblinear():
-    tm._skip_if_no_sklearn()
     from sklearn.datasets import load_iris
     from sklearn.model_selection import train_test_split
 
     iris = load_iris()
-    tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target, train_size=120)
+    tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target,
+                                              train_size=120)
 
     classifier = xgb.XGBClassifier(booster='gblinear', n_estimators=100)
     classifier.fit(tr_d, tr_l)
@@ -314,8 +320,8 @@ def test_sklearn_api_gblinear():
     assert err < 0.5
 
 
+@pytest.mark.skipif(**tm.no_matplotlib())
 def test_sklearn_plotting():
-    tm._skip_if_no_sklearn()
     from sklearn.datasets import load_iris
 
     iris = load_iris()
@@ -344,7 +350,6 @@ def test_sklearn_plotting():
 
 
 def test_sklearn_nfolds_cv():
-    tm._skip_if_no_sklearn()
     from sklearn.datasets import load_digits
     from sklearn.model_selection import StratifiedKFold
 
@@ -367,14 +372,15 @@ def test_sklearn_nfolds_cv():
     skf = StratifiedKFold(n_splits=nfolds, shuffle=True, random_state=seed)
 
     cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed)
-    cv2 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, folds=skf, seed=seed)
-    cv3 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, stratified=True, seed=seed)
+    cv2 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds,
+                 folds=skf, seed=seed)
+    cv3 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds,
+                 stratified=True, seed=seed)
     assert cv1.shape[0] == cv2.shape[0] and cv2.shape[0] == cv3.shape[0]
     assert cv2.iloc[-1, 0] == cv3.iloc[-1, 0]
 
 
 def test_split_value_histograms():
-    tm._skip_if_no_sklearn()
     from sklearn.datasets import load_digits
 
     digits_2class = load_digits(2)
@@ -383,11 +389,14 @@ def test_split_value_histograms():
     y = digits_2class['target']
 
     dm = xgb.DMatrix(X, label=y)
-    params = {'max_depth': 6, 'eta': 0.01, 'silent': 1, 'objective': 'binary:logistic'}
+    params = {'max_depth': 6, 'eta': 0.01, 'silent': 1,
+              'objective': 'binary:logistic'}
 
     gbdt = xgb.train(params, dm, num_boost_round=10)
-    assert gbdt.get_split_value_histogram("not_there", as_pandas=True).shape[0] == 0
-    assert gbdt.get_split_value_histogram("not_there", as_pandas=False).shape[0] == 0
+    assert gbdt.get_split_value_histogram("not_there",
+                                          as_pandas=True).shape[0] == 0
+    assert gbdt.get_split_value_histogram("not_there",
+                                          as_pandas=False).shape[0] == 0
     assert gbdt.get_split_value_histogram("f28", bins=0).shape[0] == 1
     assert gbdt.get_split_value_histogram("f28", bins=1).shape[0] == 1
     assert gbdt.get_split_value_histogram("f28", bins=2).shape[0] == 2
@@ -396,8 +405,6 @@ def test_split_value_histograms():
 
 
 def test_sklearn_random_state():
-    tm._skip_if_no_sklearn()
-
     clf = xgb.XGBClassifier(random_state=402)
     assert clf.get_xgb_params()['seed'] == 402
 
@@ -406,8 +413,6 @@ def test_sklearn_random_state():
 
 
 def test_sklearn_n_jobs():
-    tm._skip_if_no_sklearn()
-
     clf = xgb.XGBClassifier(n_jobs=1)
     assert clf.get_xgb_params()['nthread'] == 1
 
@@ -416,8 +421,6 @@ def test_sklearn_n_jobs():
 
 
 def test_kwargs():
-    tm._skip_if_no_sklearn()
-
     params = {'updater': 'grow_gpu', 'subsample': .5, 'n_jobs': -1}
     clf = xgb.XGBClassifier(n_estimators=1000, **params)
     assert clf.get_params()['updater'] == 'grow_gpu'
@@ -426,7 +429,6 @@ def test_kwargs():
 
 
 def test_kwargs_grid_search():
-    tm._skip_if_no_sklearn()
     from sklearn.model_selection import GridSearchCV
     from sklearn import datasets
 
@@ -446,17 +448,14 @@ def test_kwargs_grid_search():
     assert len(means) == len(set(means))
 
 
-@raises(TypeError)
 def test_kwargs_error():
-    tm._skip_if_no_sklearn()
-
     params = {'updater': 'grow_gpu', 'subsample': .5, 'n_jobs': -1}
-    clf = xgb.XGBClassifier(n_jobs=1000, **params)
-    assert isinstance(clf, xgb.XGBClassifier)
+    with pytest.raises(TypeError):
+        clf = xgb.XGBClassifier(n_jobs=1000, **params)
+        assert isinstance(clf, xgb.XGBClassifier)
 
 
 def test_sklearn_clone():
-    tm._skip_if_no_sklearn()
     from sklearn.base import clone
 
     clf = xgb.XGBClassifier(n_jobs=2, nthread=3)
@@ -465,7 +464,6 @@ def test_sklearn_clone():
 
 
 def test_validation_weights_xgbmodel():
-    tm._skip_if_no_sklearn()
     from sklearn.datasets import make_hastie_10_2
 
     # prepare training and test data
@@ -489,7 +487,8 @@ def test_validation_weights_xgbmodel():
 
     # evaluate logloss metric on test set *without* using weights
     evals_result_without_weights = clf.evals_result()
-    logloss_without_weights = evals_result_without_weights["validation_0"]["logloss"]
+    logloss_without_weights = evals_result_without_weights[
+        "validation_0"]["logloss"]
 
     # now use weights for the test set
     np.random.seed(0)
@@ -503,13 +502,13 @@ def test_validation_weights_xgbmodel():
     evals_result_with_weights = clf.evals_result()
     logloss_with_weights = evals_result_with_weights["validation_0"]["logloss"]
 
-    # check that the logloss in the test set is actually different when using weights
-    # than when not using them
-    assert all((logloss_with_weights[i] != logloss_without_weights[i] for i in [0, 1]))
+    # check that the logloss in the test set is actually different when using
+    # weights than when not using them
+    assert all((logloss_with_weights[i] != logloss_without_weights[i]
+                for i in [0, 1]))
 
 
 def test_validation_weights_xgbclassifier():
-    tm._skip_if_no_sklearn()
     from sklearn.datasets import make_hastie_10_2
 
     # prepare training and test data
@@ -533,7 +532,8 @@ def test_validation_weights_xgbclassifier():
 
     # evaluate logloss metric on test set *without* using weights
     evals_result_without_weights = clf.evals_result()
-    logloss_without_weights = evals_result_without_weights["validation_0"]["logloss"]
+    logloss_without_weights = evals_result_without_weights[
+        "validation_0"]["logloss"]
 
     # now use weights for the test set
     np.random.seed(0)
@@ -547,13 +547,13 @@ def test_validation_weights_xgbclassifier():
     evals_result_with_weights = clf.evals_result()
     logloss_with_weights = evals_result_with_weights["validation_0"]["logloss"]
 
-    # check that the logloss in the test set is actually different when using weights
-    # than when not using them
-    assert all((logloss_with_weights[i] != logloss_without_weights[i] for i in [0, 1]))
+    # check that the logloss in the test set is actually different
+    # when using weights than when not using them
+    assert all((logloss_with_weights[i] != logloss_without_weights[i]
+                for i in [0, 1]))
 
 
 def test_save_load_model():
-    tm._skip_if_no_sklearn()
     from sklearn.datasets import load_digits
     from sklearn.model_selection import KFold
 
@@ -576,7 +576,6 @@ def test_save_load_model():
 
 
 def test_RFECV():
-    tm._skip_if_no_sklearn()
     from sklearn.datasets import load_boston
     from sklearn.datasets import load_breast_cancer
     from sklearn.datasets import load_iris
@@ -587,21 +586,25 @@ def test_RFECV():
     bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1,
                             n_estimators=10, n_jobs=1, objective='reg:linear',
                             random_state=0, silent=True)
-    rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='neg_mean_squared_error')
+    rfecv = RFECV(
+        estimator=bst, step=1, cv=3, scoring='neg_mean_squared_error')
     rfecv.fit(X, y)
 
     # Binary classification
     X, y = load_breast_cancer(return_X_y=True)
     bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1,
-                            n_estimators=10, n_jobs=1, objective='binary:logistic',
+                            n_estimators=10, n_jobs=1,
+                            objective='binary:logistic',
                             random_state=0, silent=True)
     rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='roc_auc')
     rfecv.fit(X, y)
 
     # Multi-class classification
     X, y = load_iris(return_X_y=True)
-    bst = xgb.XGBClassifier(base_score=0.4, booster='gblinear', learning_rate=0.1,
-                            n_estimators=10, n_jobs=1, objective='multi:softprob',
+    bst = xgb.XGBClassifier(base_score=0.4, booster='gblinear',
+                            learning_rate=0.1,
+                            n_estimators=10, n_jobs=1,
+                            objective='multi:softprob',
                             random_state=0, reg_alpha=0.001, reg_lambda=0.01,
                             scale_pos_weight=0.5, silent=True)
     rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='neg_log_loss')
diff --git a/tests/python/testing.py b/tests/python/testing.py
index 2eef0bfd6..234e39527 100644
--- a/tests/python/testing.py
+++ b/tests/python/testing.py
@@ -1,27 +1,28 @@
 # coding: utf-8
-
-import nose
-
 from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED, DT_INSTALLED
 
 
-def _skip_if_no_sklearn():
-    if not SKLEARN_INSTALLED:
-        raise nose.SkipTest()
+def no_sklearn():
+    return {'condition': not SKLEARN_INSTALLED,
+            'reason': 'Scikit-Learn is not installed'}
 
 
-def _skip_if_no_pandas():
-    if not PANDAS_INSTALLED:
-        raise nose.SkipTest()
+def no_pandas():
+    return {'condition': not PANDAS_INSTALLED,
+            'reason': 'Pandas is not installed.'}
 
 
-def _skip_if_no_dt():
-    if not DT_INSTALLED:
-        raise nose.SkipTest()
+def no_dt():
+    return {'condition': not DT_INSTALLED,
+            'reason': 'Datatable is not installed.'}
 
 
-def _skip_if_no_matplotlib():
+def no_matplotlib():
+    reason = 'Matplotlib is not installed.'
     try:
         import matplotlib.pyplot as _     # noqa
+        return {'condition': False,
+                'reason': reason}
     except ImportError:
-        raise nose.SkipTest()
+        return {'condition': True,
+                'reason': reason}
diff --git a/tests/travis/run_test.sh b/tests/travis/run_test.sh
index 87ce386ab..b2fd2dcf4 100755
--- a/tests/travis/run_test.sh
+++ b/tests/travis/run_test.sh
@@ -53,7 +53,7 @@ if [ ${TASK} == "python_test" ]; then
     echo "-------------------------------"
     source activate python3
     python --version
-    conda install numpy scipy pandas matplotlib nose scikit-learn
+    conda install numpy scipy pandas matplotlib scikit-learn
 
     # Install data table from source
     wget http://releases.llvm.org/5.0.2/clang+llvm-5.0.2-x86_64-linux-gnu-ubuntu-14.04.tar.xz
@@ -62,15 +62,15 @@ if [ ${TASK} == "python_test" ]; then
     python -m pip install datatable --no-binary datatable
 
     python -m pip install graphviz pytest pytest-cov codecov
-    python -m nose -v tests/python || exit -1
-    py.test tests/python --cov=python-package/xgboost
+    py.test -v --fulltrace -s tests/python --cov=python-package/xgboost || exit -1
     codecov
+
     source activate python2
     echo "-------------------------------"
     python --version
-    conda install numpy scipy pandas matplotlib nose scikit-learn
+    conda install numpy scipy pandas matplotlib scikit-learn
     python -m pip install graphviz
-    python -m nose -v tests/python || exit -1
+    py.test -v --fulltrace -s tests/python || exit -1
     exit 0
 fi
 
@@ -79,17 +79,15 @@ if [ ${TASK} == "python_lightweight_test" ]; then
     echo "-------------------------------"
     source activate python3
     python --version
-    conda install numpy scipy nose
+    conda install numpy scipy
     python -m pip install graphviz pytest pytest-cov codecov
-    python -m nose -v tests/python || exit -1
-    py.test tests/python --cov=python-package/xgboost
+    py.test -v --fulltrace -s tests/python --cov=python-package/xgboost || exit -1
     codecov
     source activate python2
     echo "-------------------------------"
     python --version
-    conda install numpy scipy nose
+    conda install numpy scipy
     python -m pip install graphviz
-    python -m nose -v tests/python || exit -1
     python -m pip install flake8==3.4.1
     flake8 --ignore E501 python-package || exit -1
     flake8 --ignore E501 tests/python || exit -1