diff --git a/python-package/xgboost/compat.py b/python-package/xgboost/compat.py index 671090968..44707c539 100644 --- a/python-package/xgboost/compat.py +++ b/python-package/xgboost/compat.py @@ -48,11 +48,13 @@ try: from sklearn.cross_validation import KFold, StratifiedKFold SKLEARN_INSTALLED = True - XGBKFold = KFold - XGBStratifiedKFold = StratifiedKFold XGBModelBase = BaseEstimator XGBRegressorBase = RegressorMixin XGBClassifierBase = ClassifierMixin + + XGBKFold = KFold + XGBStratifiedKFold = StratifiedKFold + XGBLabelEncoder = LabelEncoder except ImportError: SKLEARN_INSTALLED = False @@ -60,5 +62,7 @@ except ImportError: XGBModelBase = object XGBClassifierBase = object XGBRegressorBase = object + XGBKFold = None XGBStratifiedKFold = None + XGBLabelEncoder = None diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 66ae44fff..141a79182 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -7,8 +7,10 @@ import numpy as np from .core import Booster, DMatrix, XGBoostError from .training import train +# Do not use class names on scikit-learn directly. +# Re-define the classes on .compat to guarantee the behavior without scikit-learn from .compat import (SKLEARN_INSTALLED, XGBModelBase, - XGBClassifierBase, XGBRegressorBase, LabelEncoder) + XGBClassifierBase, XGBRegressorBase, XGBLabelEncoder) def _objective_decorator(func): @@ -398,7 +400,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase): self._features_count = X.shape[1] - self._le = LabelEncoder().fit(y) + self._le = XGBLabelEncoder().fit(y) training_labels = self._le.transform(y) if sample_weight is not None: diff --git a/python-package/xgboost/testing.py b/python-package/xgboost/testing.py new file mode 100644 index 000000000..647a89fef --- /dev/null +++ b/python-package/xgboost/testing.py @@ -0,0 +1,22 @@ +# coding: utf-8 + +import nose + +from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED + + +def _skip_if_no_sklearn(): + if not SKLEARN_INSTALLED: + raise nose.SkipTest() + + +def _skip_if_no_pandas(): + if not PANDAS_INSTALLED: + raise nose.SkipTest() + + +def _skip_if_no_matplotlib(): + try: + import matplotlib.pyplot as plt # noqa + except ImportError: + raise nose.SkipTest() diff --git a/tests/python/test_early_stopping.py b/tests/python/test_early_stopping.py index 994ae3dde..b015547a1 100644 --- a/tests/python/test_early_stopping.py +++ b/tests/python/test_early_stopping.py @@ -1,15 +1,18 @@ import xgboost as xgb +import xgboost.testing as tm import numpy as np -from sklearn.datasets import load_digits -from sklearn.cross_validation import train_test_split -from sklearn.metrics import mean_squared_error import unittest rng = np.random.RandomState(1994) class TestEarlyStopping(unittest.TestCase): + def test_early_stopping_nonparallel(self): + tm._skip_if_no_sklearn() + from sklearn.datasets import load_digits + from sklearn.cross_validation import train_test_split + digits = load_digits(2) X = digits['data'] y = digits['target'] @@ -30,10 +33,16 @@ class TestEarlyStopping(unittest.TestCase): assert clf3.best_score == 1 def evalerror(self, preds, dtrain): + tm._skip_if_no_sklearn() + from sklearn.metrics import mean_squared_error + labels = dtrain.get_label() return 'rmse', mean_squared_error(labels, preds) def test_cv_early_stopping(self): + tm._skip_if_no_sklearn() + from sklearn.datasets import load_digits + digits = load_digits(2) X = digits['data'] y = digits['target'] diff --git a/tests/python/test_eval_metrics.py b/tests/python/test_eval_metrics.py index d76916557..2391bfe28 100644 --- a/tests/python/test_eval_metrics.py +++ b/tests/python/test_eval_metrics.py @@ -1,8 +1,6 @@ import xgboost as xgb +import xgboost.testing as tm import numpy as np -from sklearn.cross_validation import train_test_split -from sklearn.metrics import mean_squared_error -from sklearn.datasets import load_digits import unittest rng = np.random.RandomState(1337) @@ -42,16 +40,26 @@ class TestEvalMetrics(unittest.TestCase): return [('error', float(sum(labels != (preds > 0.0))) / len(labels))] def evalerror_03(self, preds, dtrain): + tm._skip_if_no_sklearn() + from sklearn.metrics import mean_squared_error + labels = dtrain.get_label() return [('rmse', mean_squared_error(labels, preds)), ('error', float(sum(labels != (preds > 0.0))) / len(labels))] def evalerror_04(self, preds, dtrain): + tm._skip_if_no_sklearn() + from sklearn.metrics import mean_squared_error + labels = dtrain.get_label() return [('error', float(sum(labels != (preds > 0.0))) / len(labels)), ('rmse', mean_squared_error(labels, preds))] def test_eval_metrics(self): + tm._skip_if_no_sklearn() + from sklearn.cross_validation import train_test_split + from sklearn.datasets import load_digits + digits = load_digits(2) X = digits['data'] y = digits['target'] diff --git a/tests/python/test_plotting.py b/tests/python/test_plotting.py index 20b62d2b3..7a70bd95e 100644 --- a/tests/python/test_plotting.py +++ b/tests/python/test_plotting.py @@ -1,19 +1,27 @@ # -*- coding: utf-8 -*- import numpy as np import xgboost as xgb +import xgboost.testing as tm import unittest -import matplotlib -from matplotlib.axes import Axes -from graphviz import Digraph +try: + import matplotlib + matplotlib.use('Agg') + from matplotlib.axes import Axes + from graphviz import Digraph +except ImportError: + pass + + +tm._skip_if_no_matplotlib() -matplotlib.use('Agg') dpath = 'demo/data/' rng = np.random.RandomState(1994) class TestPlotting(unittest.TestCase): + def test_plotting(self): bst2 = xgb.Booster(model_file='xgb.model') diff --git a/tests/python/test_training_continuation.py b/tests/python/test_training_continuation.py index 21da9df1d..2cb93f9ac 100644 --- a/tests/python/test_training_continuation.py +++ b/tests/python/test_training_continuation.py @@ -1,7 +1,6 @@ import xgboost as xgb +import xgboost.testing as tm import numpy as np -from sklearn.metrics import mean_squared_error -from sklearn.datasets import load_digits import unittest rng = np.random.RandomState(1337) @@ -29,6 +28,10 @@ class TestTrainingContinuation(unittest.TestCase): } def test_training_continuation(self): + tm._skip_if_no_sklearn() + from sklearn.datasets import load_digits + from sklearn.metrics import mean_squared_error + digits_2class = load_digits(2) digits_5class = load_digits(5) diff --git a/tests/python/test_with_pandas.py b/tests/python/test_with_pandas.py index f23e2b946..9536c1e82 100644 --- a/tests/python/test_with_pandas.py +++ b/tests/python/test_with_pandas.py @@ -1,15 +1,26 @@ # -*- coding: utf-8 -*- import numpy as np import xgboost as xgb +import xgboost.testing as tm import unittest -import pandas as pd + +try: + import pandas as pd +except ImportError: + pass + + +tm._skip_if_no_pandas() + dpath = 'demo/data/' rng = np.random.RandomState(1994) class TestPandas(unittest.TestCase): + def test_pandas(self): + df = pd.DataFrame([[1, 2., True], [2, 3., False]], columns=['a', 'b', 'c']) dm = xgb.DMatrix(df, label=pd.Series([1, 2])) assert dm.feature_names == ['a', 'b', 'c'] diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 4cf63dcd0..72ae27948 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -1,15 +1,16 @@ +import numpy as np import random import xgboost as xgb -import numpy as np -from sklearn.metrics import mean_squared_error -from sklearn.grid_search import GridSearchCV -from sklearn.datasets import load_iris, load_digits, load_boston -from sklearn.cross_validation import KFold, StratifiedKFold, train_test_split +import xgboost.testing as tm rng = np.random.RandomState(1994) def test_binary_classification(): + tm._skip_if_no_sklearn() + from sklearn.datasets import load_digits + from sklearn.cross_validation import KFold + digits = load_digits(2) y = digits['target'] X = digits['data'] @@ -24,6 +25,9 @@ def test_binary_classification(): def test_multiclass_classification(): + tm._skip_if_no_sklearn() + from sklearn.datasets import load_iris + from sklearn.cross_validation import KFold def check_pred(preds, labels): err = sum(1 for i in range(len(preds)) @@ -50,6 +54,9 @@ def test_multiclass_classification(): def test_feature_importances(): + tm._skip_if_no_sklearn() + from sklearn.datasets import load_digits + digits = load_digits(2) y = digits['target'] X = digits['data'] @@ -81,6 +88,11 @@ def test_feature_importances(): def test_boston_housing_regression(): + tm._skip_if_no_sklearn() + from sklearn.metrics import mean_squared_error + from sklearn.datasets import load_boston + from sklearn.cross_validation import KFold + boston = load_boston() y = boston['target'] X = boston['data'] @@ -102,6 +114,10 @@ def test_boston_housing_regression(): def test_parameter_tuning(): + tm._skip_if_no_sklearn() + from sklearn.grid_search import GridSearchCV + from sklearn.datasets import load_boston + boston = load_boston() y = boston['target'] X = boston['data'] @@ -114,6 +130,11 @@ def test_parameter_tuning(): def test_regression_with_custom_objective(): + tm._skip_if_no_sklearn() + from sklearn.metrics import mean_squared_error + from sklearn.datasets import load_boston + from sklearn.cross_validation import KFold + def objective_ls(y_true, y_pred): grad = (y_pred - y_true) hess = np.ones(len(y_true)) @@ -143,6 +164,10 @@ def test_regression_with_custom_objective(): def test_classification_with_custom_objective(): + tm._skip_if_no_sklearn() + from sklearn.datasets import load_digits + from sklearn.cross_validation import KFold + def logregobj(y_true, y_pred): y_pred = 1.0 / (1.0 + np.exp(-y_pred)) grad = y_pred - y_true @@ -178,6 +203,10 @@ def test_classification_with_custom_objective(): def test_sklearn_api(): + tm._skip_if_no_sklearn() + from sklearn.datasets import load_iris + from sklearn.cross_validation import train_test_split + iris = load_iris() tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target, train_size=120) @@ -191,6 +220,9 @@ def test_sklearn_api(): def test_sklearn_plotting(): + tm._skip_if_no_sklearn() + from sklearn.datasets import load_iris + iris = load_iris() classifier = xgb.XGBClassifier() @@ -217,6 +249,10 @@ def test_sklearn_plotting(): def test_sklearn_nfolds_cv(): + tm._skip_if_no_sklearn() + from sklearn.datasets import load_digits + from sklearn.cross_validation import StratifiedKFold + digits = load_digits(3) X = digits['data'] y = digits['target'] @@ -243,6 +279,9 @@ def test_sklearn_nfolds_cv(): def test_split_value_histograms(): + tm._skip_if_no_sklearn() + from sklearn.datasets import load_digits + digits_2class = load_digits(2) X = digits_2class['data'] diff --git a/tests/travis/run_test.sh b/tests/travis/run_test.sh index 753523737..e00cb6e01 100755 --- a/tests/travis/run_test.sh +++ b/tests/travis/run_test.sh @@ -45,13 +45,13 @@ if [ ${TASK} == "python_lightweight_test" ]; then python --version conda install numpy scipy nose python -m pip install graphviz - python -m nose tests/python/test_basic*.py || exit -1 + python -m nose tests/python || exit -1 source activate python2 echo "-------------------------------" python --version conda install numpy scipy nose python -m pip install graphviz - python -m nose tests/python/test_basic*.py || exit -1 + python -m nose tests/python || exit -1 python -m pip install flake8 flake8 --ignore E501 python-package || exit -1 flake8 --ignore E501 tests/python || exit -1