From a5cc112eea80db913f8b97f1f6590160e848686c Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 3 Feb 2020 12:46:51 +0800 Subject: [PATCH] Export JSON config in `get_params`. (#5256) --- python-package/xgboost/sklearn.py | 43 ++++++++++++++++++++++++++----- src/learner.cc | 6 ++--- tests/python/test_with_sklearn.py | 25 +++++++++++++----- 3 files changed, 56 insertions(+), 18 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index d59b93cbc..f296ec16b 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -200,7 +200,7 @@ Parameters @xgboost_model_doc("""Implementation of the Scikit-Learn API for XGBoost.""", ['estimators', 'model', 'objective']) class XGBModel(XGBModelBase): - # pylint: disable=too-many-arguments, too-many-instance-attributes, invalid-name, missing-docstring + # pylint: disable=too-many-arguments, too-many-instance-attributes, missing-docstring def __init__(self, max_depth=None, learning_rate=None, n_estimators=100, verbosity=None, objective=None, booster=None, tree_method=None, n_jobs=None, gamma=None, @@ -210,7 +210,8 @@ class XGBModel(XGBModelBase): scale_pos_weight=None, base_score=None, random_state=None, missing=None, num_parallel_tree=None, monotone_constraints=None, interaction_constraints=None, - importance_type="gain", gpu_id=None, **kwargs): + importance_type="gain", gpu_id=None, + validate_parameters=False, **kwargs): if not SKLEARN_INSTALLED: raise XGBoostError( 'sklearn needs to be installed in order to use this module') @@ -243,6 +244,10 @@ class XGBModel(XGBModelBase): self.interaction_constraints = interaction_constraints self.importance_type = importance_type self.gpu_id = gpu_id + # Parameter validation is not working with Scikit-Learn interface, as + # it passes all paraemters into XGBoost core, whether they are used or + # not. + self.validate_parameters = validate_parameters def __setstate__(self, state): # backward compatibility code @@ -314,11 +319,35 @@ class XGBModel(XGBModelBase): if isinstance(params['random_state'], np.random.RandomState): params['random_state'] = params['random_state'].randint( np.iinfo(np.int32).max) - # Parameter validation is not working with Scikit-Learn interface, as - # it passes all paraemters into XGBoost core, whether they are used or - # not. - if 'validate_parameters' not in params.keys(): - params['validate_parameters'] = False + + def parse_parameter(value): + for t in (int, float): + try: + ret = t(value) + return ret + except ValueError: + continue + return None + + # Get internal parameter values + try: + config = json.loads(self.get_booster().save_config()) + stack = [config] + internal = {} + while stack: + obj = stack.pop() + for k, v in obj.items(): + if k.endswith('_param'): + for p_k, p_v in v.items(): + internal[p_k] = p_v + elif isinstance(v, dict): + stack.append(v) + + for k, v in internal.items(): + if k in params.keys() and params[k] is None: + params[k] = parse_parameter(v) + except XGBoostError: + pass return params def get_xgb_params(self): diff --git a/src/learner.cc b/src/learner.cc index b81af7751..70ffceda6 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -269,10 +269,7 @@ class LearnerImpl : public Learner { } } } - auto learner_model_param = mparam_.ToJson(); - for (auto const& kv : get(learner_model_param)) { - keys.emplace_back(kv.first); - } + keys.emplace_back(kEvalMetric); keys.emplace_back("verbosity"); keys.emplace_back("num_output_group"); @@ -425,6 +422,7 @@ class LearnerImpl : public Learner { auto& learner_parameters = out["learner"]; learner_parameters["learner_train_param"] = toJson(tparam_); + learner_parameters["learner_model_param"] = mparam_.ToJson(); learner_parameters["gradient_booster"] = Object(); auto& gradient_booster = learner_parameters["gradient_booster"]; gbm_->SaveConfig(&gradient_booster); diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 58704c0cc..d29de0464 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -490,6 +490,13 @@ def test_kwargs(): assert clf.get_params()['n_estimators'] == 1000 +def test_kwargs_error(): + params = {'updater': 'grow_gpu_hist', 'subsample': .5, 'n_jobs': -1} + with pytest.raises(TypeError): + clf = xgb.XGBClassifier(n_jobs=1000, **params) + assert isinstance(clf, xgb.XGBClassifier) + + def test_kwargs_grid_search(): from sklearn.model_selection import GridSearchCV from sklearn import datasets @@ -510,13 +517,6 @@ def test_kwargs_grid_search(): assert len(means) == len(set(means)) -def test_kwargs_error(): - params = {'updater': 'grow_gpu_hist', 'subsample': .5, 'n_jobs': -1} - with pytest.raises(TypeError): - clf = xgb.XGBClassifier(n_jobs=1000, **params) - assert isinstance(clf, xgb.XGBClassifier) - - def test_sklearn_clone(): from sklearn.base import clone @@ -525,6 +525,17 @@ def test_sklearn_clone(): clone(clf) +def test_sklearn_get_default_params(): + from sklearn.datasets import load_digits + digits_2class = load_digits(2) + X = digits_2class['data'] + y = digits_2class['target'] + cls = xgb.XGBClassifier() + assert cls.get_params()['base_score'] is None + cls.fit(X[:4, ...], y[:4, ...]) + assert cls.get_params()['base_score'] is not None + + def test_validation_weights_xgbmodel(): from sklearn.datasets import make_hastie_10_2