Use pytest conventions consistently (#6337)

* Do not derive from unittest.TestCase (not needed for pytest) * assertRaises -> pytest.raises * Simplify test_empty_dmatrix with test parametrization * setUpClass -> setup_class, tearDownClass -> teardown_class * Don't import unittest; import pytest * Use plain assert * Use parametrized tests in more places * Fix test_gpu_with_sklearn.py * Put back run_empty_dmatrix_reg / run_empty_dmatrix_cls * Fix test_eta_decay_gpu_hist * Add parametrized tests for monotone constraints * Fix test names * Remove test parametrization * Revise test_slice to be not flaky
2020-11-19 17:00:15 -08:00
parent c763b50dd0
commit 9c9070aea2
34 changed files with 200 additions and 223 deletions
--- a/tests/python/test_dmatrix.py
+++ b/tests/python/test_dmatrix.py
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 import numpy as np
 import xgboost as xgb
-import unittest
 import scipy.sparse
 import pytest
 from scipy.sparse import rand, csr_matrix
@@ -12,7 +11,7 @@ dpath = 'demo/data/'
 rng = np.random.RandomState(1994)


-class TestDMatrix(unittest.TestCase):
+class TestDMatrix:
    def test_warn_missing(self):
        from xgboost import data
        with pytest.warns(UserWarning):
@@ -48,15 +47,19 @@ class TestDMatrix(unittest.TestCase):
        assert dm.num_col() == 2

        # 0d array
-        self.assertRaises(ValueError, xgb.DMatrix, np.array(1))
+        with pytest.raises(ValueError):
+            xgb.DMatrix(np.array(1))
        # 1d array
-        self.assertRaises(ValueError, xgb.DMatrix, np.array([1, 2, 3]))
+        with pytest.raises(ValueError):
+            xgb.DMatrix(np.array([1, 2, 3]))
        # 3d array
        data = np.random.randn(5, 5, 5)
-        self.assertRaises(ValueError, xgb.DMatrix, data)
+        with pytest.raises(ValueError):
+            xgb.DMatrix(data)
        # object dtype
        data = np.array([['a', 'b'], ['c', 'd']])
-        self.assertRaises(ValueError, xgb.DMatrix, data)
+        with pytest.raises(ValueError):
+            xgb.DMatrix(data)

    def test_csr(self):
        indptr = np.array([0, 2, 3, 6])
@@ -106,56 +109,50 @@ class TestDMatrix(unittest.TestCase):

    def test_slice(self):
        X = rng.randn(100, 100)
-        y = rng.randint(low=0, high=3, size=100)
+        y = rng.randint(low=0, high=3, size=100).astype(np.float32)
        d = xgb.DMatrix(X, y)
-        np.testing.assert_equal(d.get_label(), y.astype(np.float32))
+        np.testing.assert_equal(d.get_label(), y)

        fw = rng.uniform(size=100).astype(np.float32)
        d.set_info(feature_weights=fw)

-        eval_res_0 = {}
-        booster = xgb.train(
-            {'num_class': 3, 'objective': 'multi:softprob',
-             'eval_metric': 'merror'},
-            d,
-            num_boost_round=2, evals=[(d, 'd')], evals_result=eval_res_0)
-
-        predt = booster.predict(d)
-        predt = predt.reshape(100 * 3, 1)
-
-        d.set_base_margin(predt)
+        # base margin is per-class in multi-class classifier
+        base_margin = rng.randn(100, 3).astype(np.float32)
+        d.set_base_margin(base_margin.flatten())

        ridxs = [1, 2, 3, 4, 5, 6]
        sliced = d.slice(ridxs)

-        sliced_margin = sliced.get_float_info('base_margin')
-        assert sliced_margin.shape[0] == len(ridxs) * 3
+        # Slicing works with label and other meta info fields
+        np.testing.assert_equal(sliced.get_label(), y[1:7])
+        np.testing.assert_equal(sliced.get_float_info('feature_weights'), fw)
+        np.testing.assert_equal(sliced.get_base_margin(), base_margin[1:7, :].flatten())
+        np.testing.assert_equal(sliced.get_base_margin(), sliced.get_float_info('base_margin'))

-        eval_res_1 = {}
-        xgb.train(
+        # Slicing a DMatrix results into a DMatrix that's equivalent to a DMatrix that's
+        # constructed from the corresponding NumPy slice
+        d2 = xgb.DMatrix(X[1:7, :], y[1:7])
+        d2.set_base_margin(base_margin[1:7, :].flatten())
+        eval_res = {}
+        _ = xgb.train(
            {'num_class': 3, 'objective': 'multi:softprob',
-             'eval_metric': 'merror'},
-            sliced,
-            num_boost_round=2, evals=[(sliced, 'd')], evals_result=eval_res_1)
-
-        eval_res_0 = eval_res_0['d']['merror']
-        eval_res_1 = eval_res_1['d']['merror']
-
-        for i in range(len(eval_res_0)):
-            assert abs(eval_res_0[i] - eval_res_1[i]) < 0.02
+             'eval_metric': 'mlogloss'},
+            d,
+            num_boost_round=2, evals=[(d2, 'd2'), (sliced, 'sliced')], evals_result=eval_res)
+        np.testing.assert_equal(eval_res['d2']['mlogloss'], eval_res['sliced']['mlogloss'])

    def test_feature_names_slice(self):
        data = np.random.randn(5, 5)

        # different length
-        self.assertRaises(ValueError, xgb.DMatrix, data,
-                          feature_names=list('abcdef'))
+        with pytest.raises(ValueError):
+            xgb.DMatrix(data, feature_names=list('abcdef'))
        # contains duplicates
-        self.assertRaises(ValueError, xgb.DMatrix, data,
-                          feature_names=['a', 'b', 'c', 'd', 'd'])
+        with pytest.raises(ValueError):
+            xgb.DMatrix(data, feature_names=['a', 'b', 'c', 'd', 'd'])
        # contains symbol
-        self.assertRaises(ValueError, xgb.DMatrix, data,
-                          feature_names=['a', 'b', 'c', 'd', 'e<1'])
+        with pytest.raises(ValueError):
+            xgb.DMatrix(data, feature_names=['a', 'b', 'c', 'd', 'e<1'])

        dm = xgb.DMatrix(data)
        dm.feature_names = list('abcde')
@@ -170,14 +167,12 @@ class TestDMatrix(unittest.TestCase):
        dm.feature_types = list('qiqiq')
        assert dm.feature_types == list('qiqiq')

-        def incorrect_type_set():
+        with pytest.raises(ValueError):
            dm.feature_types = list('abcde')

-        self.assertRaises(ValueError, incorrect_type_set)
-
        # reset
        dm.feature_names = None
-        self.assertEqual(dm.feature_names, ['f0', 'f1', 'f2', 'f3', 'f4'])
+        assert dm.feature_names == ['f0', 'f1', 'f2', 'f3', 'f4']
        assert dm.feature_types is None

    def test_feature_names(self):
@@ -209,7 +204,8 @@ class TestDMatrix(unittest.TestCase):

            # different feature name must raises error
            dm = xgb.DMatrix(dummy, feature_names=list('abcde'))
-            self.assertRaises(ValueError, bst.predict, dm)
+            with pytest.raises(ValueError):
+                bst.predict(dm)

    def test_get_info(self):
        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
@@ -234,9 +230,8 @@ class TestDMatrix(unittest.TestCase):

        fw -= 1

-        def assign_weight():
+        with pytest.raises(ValueError):
            m.set_info(feature_weights=fw)
-        self.assertRaises(ValueError, assign_weight)

    def test_sparse_dmatrix_csr(self):
        nrow = 100