From c69a19e2b107d44dbeb6620464245b4d0204a109 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 18 Apr 2020 06:52:17 +0800 Subject: [PATCH] Fix skl nan tag. (#5538) --- Jenkinsfile-win64 | 8 ++++++++ python-package/xgboost/sklearn.py | 4 ++++ tests/python/test_with_sklearn.py | 16 +++++++++++++--- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile-win64 b/Jenkinsfile-win64 index f78a61b40..15dc345ed 100644 --- a/Jenkinsfile-win64 +++ b/Jenkinsfile-win64 @@ -112,6 +112,10 @@ def TestWin64CPU() { bat """ conda activate && for /R %%i in (python-package\\dist\\*.whl) DO python -m pip install "%%i" """ + echo "Installing Python dependencies..." + bat """ + conda activate && conda upgrade scikit-learn pandas numpy + """ echo "Running Python tests..." bat "conda activate && python -m pytest -v -s --fulltrace tests\\python" bat "conda activate && python -m pip uninstall -y xgboost" @@ -133,6 +137,10 @@ def TestWin64GPU(args) { bat """ conda activate && for /R %%i in (python-package\\dist\\*.whl) DO python -m pip install "%%i" """ + echo "Installing Python dependencies..." + bat """ + conda activate && conda upgrade scikit-learn pandas numpy + """ echo "Running Python tests..." bat """ conda activate && python -m pytest -v -s --fulltrace -m "(not slow) and (not mgpu)" tests\\python-gpu diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index bf49202e7..c1f73c3f2 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -244,6 +244,10 @@ class XGBModel(XGBModelBase): self.gpu_id = gpu_id self.validate_parameters = validate_parameters + def _more_tags(self): + '''Tags used for scikit-learn data validation.''' + return {'allow_nan': True} + def get_booster(self): """Get the underlying xgboost Booster of this model. diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index d2b04927e..1f7b33c88 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -719,7 +719,7 @@ def test_RFECV(): # Regression X, y = load_boston(return_X_y=True) bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1, - n_estimators=10, n_jobs=1, + n_estimators=10, objective='reg:squarederror', random_state=0, verbosity=0) rfecv = RFECV( @@ -729,7 +729,7 @@ def test_RFECV(): # Binary classification X, y = load_breast_cancer(return_X_y=True) bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1, - n_estimators=10, n_jobs=1, + n_estimators=10, objective='binary:logistic', random_state=0, verbosity=0) rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='roc_auc') @@ -739,13 +739,23 @@ def test_RFECV(): X, y = load_iris(return_X_y=True) bst = xgb.XGBClassifier(base_score=0.4, booster='gblinear', learning_rate=0.1, - n_estimators=10, n_jobs=1, + n_estimators=10, objective='multi:softprob', random_state=0, reg_alpha=0.001, reg_lambda=0.01, scale_pos_weight=0.5, verbosity=0) rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='neg_log_loss') rfecv.fit(X, y) + X[0:4, :] = np.nan # verify scikit_learn doesn't throw with nan + reg = xgb.XGBRegressor() + rfecv = RFECV(estimator=reg) + rfecv.fit(X, y) + + cls = xgb.XGBClassifier() + rfecv = RFECV(estimator=cls, step=1, cv=3, + scoring='neg_mean_squared_error') + rfecv.fit(X, y) + def test_XGBClassifier_resume(): from sklearn.datasets import load_breast_cancer