Enable parameter validation for skl. (#5477)

2020-04-03 10:23:58 +08:00 · 2020-04-03 10:23:58 +08:00 · c218d8ffbf
commit c218d8ffbf
parent d0b86c75d9
3 changed files with 43 additions and 12 deletions
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@ -1098,6 +1098,7 @@ class DeviceQuantileDMatrix(DMatrix):
                ctypes.c_int(self.max_bin), ctypes.byref(handle)))
        self.handle = handle

+
 class Booster(object):
    # pylint: disable=too-many-public-methods
    """A Booster of XGBoost.
@ -1129,10 +1130,12 @@ class Booster(object):
        self.handle = ctypes.c_void_p()
        _check_call(_LIB.XGBoosterCreate(dmats, c_bst_ulong(len(cache)),
                                         ctypes.byref(self.handle)))
+        params = params or {}
+        if isinstance(params, list):
+            params.append(('validate_parameters', True))
+        else:
+            params['validate_parameters'] = True

-        if isinstance(params, dict) and \
-            'validate_parameters' not in params.keys():
-            params['validate_parameters'] = 1
        self.set_param(params or {})
        if (params is not None) and ('booster' in params):
            self.booster = params['booster']
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@ -210,7 +210,7 @@ class XGBModel(XGBModelBase):
                 missing=np.nan, num_parallel_tree=None,
                 monotone_constraints=None, interaction_constraints=None,
                 importance_type="gain", gpu_id=None,
-                 validate_parameters=False, **kwargs):
+                 validate_parameters=None, **kwargs):
        if not SKLEARN_INSTALLED:
            raise XGBoostError(
                'sklearn needs to be installed in order to use this module')
@ -242,9 +242,6 @@ class XGBModel(XGBModelBase):
        self.interaction_constraints = interaction_constraints
        self.importance_type = importance_type
        self.gpu_id = gpu_id
-        # Parameter validation is not working with Scikit-Learn interface, as
-        # it passes all paraemters into XGBoost core, whether they are used or
-        # not.
        self.validate_parameters = validate_parameters

    def get_booster(self):
@ -340,9 +337,16 @@ class XGBModel(XGBModelBase):
        return params

    def get_xgb_params(self):
-        """Get xgboost type parameters."""
-        xgb_params = self.get_params()
-        return xgb_params
+        """Get xgboost specific parameters."""
+        params = self.get_params()
+        # Parameters that should not go into native learner.
+        wrapper_specific = {
+            'importance_type', 'kwargs', 'missing', 'n_estimators'}
+        filtered = dict()
+        for k, v in params.items():
+            if k not in wrapper_specific:
+                filtered[k] = v
+        return filtered

    def get_num_boosting_rounds(self):
        """Gets the number of xgboost boosting rounds."""
@ -540,7 +544,8 @@ class XGBModel(XGBModelBase):
        if evals_result:
            for val in evals_result.items():
                evals_result_key = list(val[1].keys())[0]
-                evals_result[val[0]][evals_result_key] = val[1][evals_result_key]
+                evals_result[val[0]][evals_result_key] = val[1][
+                    evals_result_key]
            self.evals_result_ = evals_result

        if early_stopping_rounds is not None:
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@ -9,6 +9,8 @@ import pytest
 import unittest
 import json

+from test_basic import captured_output
+
 rng = np.random.RandomState(1994)

 pytestmark = pytest.mark.skipif(**tm.no_sklearn())
@ -265,7 +267,7 @@ def test_parameter_tuning():
    xgb_model = xgb.XGBRegressor(learning_rate=0.1)
    clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
                                   'n_estimators': [50, 100, 200]},
-                       cv=3, verbose=1, iid=True)
+                       cv=3, verbose=1)
    clf.fit(X, y)
    assert clf.best_score_ < 0.7
    assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4}
@ -785,6 +787,27 @@ def test_constraint_parameters():
        'train_param']['interaction_constraints'] == '[[0, 1], [2, 3, 4]]'


+def test_parameter_validation():
+    reg = xgb.XGBRegressor(foo='bar', verbosity=1)
+    X = np.random.randn(10, 10)
+    y = np.random.randn(10)
+    with captured_output() as (out, err):
+        reg.fit(X, y)
+        output = out.getvalue().strip()
+
+    assert output.find('foo') != -1
+
+    reg = xgb.XGBRegressor(n_estimators=2, missing=3,
+                           importance_type='gain', verbosity=1)
+    X = np.random.randn(10, 10)
+    y = np.random.randn(10)
+    with captured_output() as (out, err):
+        reg.fit(X, y)
+        output = out.getvalue().strip()
+
+    assert len(output) == 0
+
+
 class TestBoostFromPrediction(unittest.TestCase):
    def run_boost_from_prediction(self, tree_method):
        from sklearn.datasets import load_breast_cancer