diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 2dc1cafad..b72a859a8 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -226,7 +226,7 @@ PANDAS_DTYPE_MAPPER = {'int8': 'int', 'int16': 'int', 'int32': 'int', 'int64': ' def _maybe_pandas_data(data, feature_names, feature_types): """ Extract internal data from pd.DataFrame for DMatrix data """ - if not isinstance(data, DataFrame): + if not (PANDAS_INSTALLED and isinstance(data, DataFrame)): return data, feature_names, feature_types data_dtypes = data.dtypes @@ -258,7 +258,7 @@ def _maybe_pandas_data(data, feature_names, feature_types): def _maybe_pandas_label(label): """ Extract internal data from pd.DataFrame for DMatrix label """ - if isinstance(label, DataFrame): + if PANDAS_INSTALLED and isinstance(label, DataFrame): if len(label.columns) > 1: raise ValueError('DataFrame for label cannot have multiple columns') diff --git a/tests/python/test_early_stopping.py b/tests/python/test_early_stopping.py index e67e58215..e251f02bf 100644 --- a/tests/python/test_early_stopping.py +++ b/tests/python/test_early_stopping.py @@ -44,6 +44,11 @@ class TestEarlyStopping(unittest.TestCase): labels = dtrain.get_label() return 'rmse', mean_squared_error(labels, preds) + @staticmethod + def assert_metrics_length(cv, expected_length): + for key, value in cv.items(): + assert len(value) == expected_length + @pytest.mark.skipif(**tm.no_sklearn()) def test_cv_early_stopping(self): from sklearn.datasets import load_digits @@ -57,21 +62,21 @@ class TestEarlyStopping(unittest.TestCase): cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=10) - assert cv.shape[0] == 10 + self.assert_metrics_length(cv, 10) cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=5) - assert cv.shape[0] == 3 + self.assert_metrics_length(cv, 3) cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=1) - assert cv.shape[0] == 1 + self.assert_metrics_length(cv, 1) cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, feval=self.evalerror, early_stopping_rounds=10) - assert cv.shape[0] == 10 + self.assert_metrics_length(cv, 10) cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, feval=self.evalerror, early_stopping_rounds=1) - assert cv.shape[0] == 5 + self.assert_metrics_length(cv, 5) cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, feval=self.evalerror, maximize=True, early_stopping_rounds=1) - assert cv.shape[0] == 1 + self.assert_metrics_length(cv, 1) diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 09a88ac2b..01ebe8531 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -108,6 +108,7 @@ def test_ranking(): np.testing.assert_almost_equal(pred, pred_orig) +@pytest.mark.skipif(**tm.no_pandas()) def test_feature_importances_weight(): from sklearn.datasets import load_digits @@ -140,6 +141,7 @@ def test_feature_importances_weight(): np.testing.assert_almost_equal(xgb_model.feature_importances_, exp) +@pytest.mark.skipif(**tm.no_pandas()) def test_feature_importances_gain(): from sklearn.datasets import load_digits @@ -368,6 +370,7 @@ def test_sklearn_plotting(): assert isinstance(ax, Axes) +@pytest.mark.skipif(**tm.no_pandas()) def test_sklearn_nfolds_cv(): from sklearn.datasets import load_digits from sklearn.model_selection import StratifiedKFold @@ -390,15 +393,16 @@ def test_sklearn_nfolds_cv(): nfolds = 5 skf = StratifiedKFold(n_splits=nfolds, shuffle=True, random_state=seed) - cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed) + cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed, as_pandas=True) cv2 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, - folds=skf, seed=seed) + folds=skf, seed=seed, as_pandas=True) cv3 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, - stratified=True, seed=seed) + stratified=True, seed=seed, as_pandas=True) assert cv1.shape[0] == cv2.shape[0] and cv2.shape[0] == cv3.shape[0] assert cv2.iloc[-1, 0] == cv3.iloc[-1, 0] +@pytest.mark.skipif(**tm.no_pandas()) def test_split_value_histograms(): from sklearn.datasets import load_digits