pytest tests/python fails if no pandas installed (#4620)
* _maybe_pandas_xxx should return their arguments unchanged if no pandas installed * Tests should not assume pandas is installed * Mark tests which require pandas as such
This commit is contained in:
parent
45876bf41b
commit
986fee6022
@ -226,7 +226,7 @@ PANDAS_DTYPE_MAPPER = {'int8': 'int', 'int16': 'int', 'int32': 'int', 'int64': '
|
|||||||
def _maybe_pandas_data(data, feature_names, feature_types):
|
def _maybe_pandas_data(data, feature_names, feature_types):
|
||||||
""" Extract internal data from pd.DataFrame for DMatrix data """
|
""" Extract internal data from pd.DataFrame for DMatrix data """
|
||||||
|
|
||||||
if not isinstance(data, DataFrame):
|
if not (PANDAS_INSTALLED and isinstance(data, DataFrame)):
|
||||||
return data, feature_names, feature_types
|
return data, feature_names, feature_types
|
||||||
|
|
||||||
data_dtypes = data.dtypes
|
data_dtypes = data.dtypes
|
||||||
@ -258,7 +258,7 @@ def _maybe_pandas_data(data, feature_names, feature_types):
|
|||||||
def _maybe_pandas_label(label):
|
def _maybe_pandas_label(label):
|
||||||
""" Extract internal data from pd.DataFrame for DMatrix label """
|
""" Extract internal data from pd.DataFrame for DMatrix label """
|
||||||
|
|
||||||
if isinstance(label, DataFrame):
|
if PANDAS_INSTALLED and isinstance(label, DataFrame):
|
||||||
if len(label.columns) > 1:
|
if len(label.columns) > 1:
|
||||||
raise ValueError('DataFrame for label cannot have multiple columns')
|
raise ValueError('DataFrame for label cannot have multiple columns')
|
||||||
|
|
||||||
|
|||||||
@ -44,6 +44,11 @@ class TestEarlyStopping(unittest.TestCase):
|
|||||||
labels = dtrain.get_label()
|
labels = dtrain.get_label()
|
||||||
return 'rmse', mean_squared_error(labels, preds)
|
return 'rmse', mean_squared_error(labels, preds)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def assert_metrics_length(cv, expected_length):
|
||||||
|
for key, value in cv.items():
|
||||||
|
assert len(value) == expected_length
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
def test_cv_early_stopping(self):
|
def test_cv_early_stopping(self):
|
||||||
from sklearn.datasets import load_digits
|
from sklearn.datasets import load_digits
|
||||||
@ -57,21 +62,21 @@ class TestEarlyStopping(unittest.TestCase):
|
|||||||
|
|
||||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||||
early_stopping_rounds=10)
|
early_stopping_rounds=10)
|
||||||
assert cv.shape[0] == 10
|
self.assert_metrics_length(cv, 10)
|
||||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||||
early_stopping_rounds=5)
|
early_stopping_rounds=5)
|
||||||
assert cv.shape[0] == 3
|
self.assert_metrics_length(cv, 3)
|
||||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||||
early_stopping_rounds=1)
|
early_stopping_rounds=1)
|
||||||
assert cv.shape[0] == 1
|
self.assert_metrics_length(cv, 1)
|
||||||
|
|
||||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||||
feval=self.evalerror, early_stopping_rounds=10)
|
feval=self.evalerror, early_stopping_rounds=10)
|
||||||
assert cv.shape[0] == 10
|
self.assert_metrics_length(cv, 10)
|
||||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||||
feval=self.evalerror, early_stopping_rounds=1)
|
feval=self.evalerror, early_stopping_rounds=1)
|
||||||
assert cv.shape[0] == 5
|
self.assert_metrics_length(cv, 5)
|
||||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||||
feval=self.evalerror, maximize=True,
|
feval=self.evalerror, maximize=True,
|
||||||
early_stopping_rounds=1)
|
early_stopping_rounds=1)
|
||||||
assert cv.shape[0] == 1
|
self.assert_metrics_length(cv, 1)
|
||||||
|
|||||||
@ -108,6 +108,7 @@ def test_ranking():
|
|||||||
np.testing.assert_almost_equal(pred, pred_orig)
|
np.testing.assert_almost_equal(pred, pred_orig)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
def test_feature_importances_weight():
|
def test_feature_importances_weight():
|
||||||
from sklearn.datasets import load_digits
|
from sklearn.datasets import load_digits
|
||||||
|
|
||||||
@ -140,6 +141,7 @@ def test_feature_importances_weight():
|
|||||||
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
def test_feature_importances_gain():
|
def test_feature_importances_gain():
|
||||||
from sklearn.datasets import load_digits
|
from sklearn.datasets import load_digits
|
||||||
|
|
||||||
@ -368,6 +370,7 @@ def test_sklearn_plotting():
|
|||||||
assert isinstance(ax, Axes)
|
assert isinstance(ax, Axes)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
def test_sklearn_nfolds_cv():
|
def test_sklearn_nfolds_cv():
|
||||||
from sklearn.datasets import load_digits
|
from sklearn.datasets import load_digits
|
||||||
from sklearn.model_selection import StratifiedKFold
|
from sklearn.model_selection import StratifiedKFold
|
||||||
@ -390,15 +393,16 @@ def test_sklearn_nfolds_cv():
|
|||||||
nfolds = 5
|
nfolds = 5
|
||||||
skf = StratifiedKFold(n_splits=nfolds, shuffle=True, random_state=seed)
|
skf = StratifiedKFold(n_splits=nfolds, shuffle=True, random_state=seed)
|
||||||
|
|
||||||
cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed)
|
cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed, as_pandas=True)
|
||||||
cv2 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds,
|
cv2 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds,
|
||||||
folds=skf, seed=seed)
|
folds=skf, seed=seed, as_pandas=True)
|
||||||
cv3 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds,
|
cv3 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds,
|
||||||
stratified=True, seed=seed)
|
stratified=True, seed=seed, as_pandas=True)
|
||||||
assert cv1.shape[0] == cv2.shape[0] and cv2.shape[0] == cv3.shape[0]
|
assert cv1.shape[0] == cv2.shape[0] and cv2.shape[0] == cv3.shape[0]
|
||||||
assert cv2.iloc[-1, 0] == cv3.iloc[-1, 0]
|
assert cv2.iloc[-1, 0] == cv3.iloc[-1, 0]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
def test_split_value_histograms():
|
def test_split_value_histograms():
|
||||||
from sklearn.datasets import load_digits
|
from sklearn.datasets import load_digits
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user