From c218d8ffbf3d6095e4a4928f74b3ee5361cf38a6 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 3 Apr 2020 10:23:58 +0800 Subject: [PATCH] Enable parameter validation for skl. (#5477) --- python-package/xgboost/core.py | 9 ++++++--- python-package/xgboost/sklearn.py | 21 +++++++++++++-------- tests/python/test_with_sklearn.py | 25 ++++++++++++++++++++++++- 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 893ef3612..688904ce1 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -1098,6 +1098,7 @@ class DeviceQuantileDMatrix(DMatrix): ctypes.c_int(self.max_bin), ctypes.byref(handle))) self.handle = handle + class Booster(object): # pylint: disable=too-many-public-methods """A Booster of XGBoost. @@ -1129,10 +1130,12 @@ class Booster(object): self.handle = ctypes.c_void_p() _check_call(_LIB.XGBoosterCreate(dmats, c_bst_ulong(len(cache)), ctypes.byref(self.handle))) + params = params or {} + if isinstance(params, list): + params.append(('validate_parameters', True)) + else: + params['validate_parameters'] = True - if isinstance(params, dict) and \ - 'validate_parameters' not in params.keys(): - params['validate_parameters'] = 1 self.set_param(params or {}) if (params is not None) and ('booster' in params): self.booster = params['booster'] diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 3e20904c0..8787f9a7a 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -210,7 +210,7 @@ class XGBModel(XGBModelBase): missing=np.nan, num_parallel_tree=None, monotone_constraints=None, interaction_constraints=None, importance_type="gain", gpu_id=None, - validate_parameters=False, **kwargs): + validate_parameters=None, **kwargs): if not SKLEARN_INSTALLED: raise XGBoostError( 'sklearn needs to be installed in order to use this module') @@ -242,9 +242,6 @@ class XGBModel(XGBModelBase): self.interaction_constraints = interaction_constraints self.importance_type = importance_type self.gpu_id = gpu_id - # Parameter validation is not working with Scikit-Learn interface, as - # it passes all paraemters into XGBoost core, whether they are used or - # not. self.validate_parameters = validate_parameters def get_booster(self): @@ -340,9 +337,16 @@ class XGBModel(XGBModelBase): return params def get_xgb_params(self): - """Get xgboost type parameters.""" - xgb_params = self.get_params() - return xgb_params + """Get xgboost specific parameters.""" + params = self.get_params() + # Parameters that should not go into native learner. + wrapper_specific = { + 'importance_type', 'kwargs', 'missing', 'n_estimators'} + filtered = dict() + for k, v in params.items(): + if k not in wrapper_specific: + filtered[k] = v + return filtered def get_num_boosting_rounds(self): """Gets the number of xgboost boosting rounds.""" @@ -540,7 +544,8 @@ class XGBModel(XGBModelBase): if evals_result: for val in evals_result.items(): evals_result_key = list(val[1].keys())[0] - evals_result[val[0]][evals_result_key] = val[1][evals_result_key] + evals_result[val[0]][evals_result_key] = val[1][ + evals_result_key] self.evals_result_ = evals_result if early_stopping_rounds is not None: diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 0a58f3af5..4f1e8629c 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -9,6 +9,8 @@ import pytest import unittest import json +from test_basic import captured_output + rng = np.random.RandomState(1994) pytestmark = pytest.mark.skipif(**tm.no_sklearn()) @@ -265,7 +267,7 @@ def test_parameter_tuning(): xgb_model = xgb.XGBRegressor(learning_rate=0.1) clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6], 'n_estimators': [50, 100, 200]}, - cv=3, verbose=1, iid=True) + cv=3, verbose=1) clf.fit(X, y) assert clf.best_score_ < 0.7 assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4} @@ -785,6 +787,27 @@ def test_constraint_parameters(): 'train_param']['interaction_constraints'] == '[[0, 1], [2, 3, 4]]' +def test_parameter_validation(): + reg = xgb.XGBRegressor(foo='bar', verbosity=1) + X = np.random.randn(10, 10) + y = np.random.randn(10) + with captured_output() as (out, err): + reg.fit(X, y) + output = out.getvalue().strip() + + assert output.find('foo') != -1 + + reg = xgb.XGBRegressor(n_estimators=2, missing=3, + importance_type='gain', verbosity=1) + X = np.random.randn(10, 10) + y = np.random.randn(10) + with captured_output() as (out, err): + reg.fit(X, y) + output = out.getvalue().strip() + + assert len(output) == 0 + + class TestBoostFromPrediction(unittest.TestCase): def run_boost_from_prediction(self, tree_method): from sklearn.datasets import load_breast_cancer