Export JSON config in get_params. (#5256)

This commit is contained in:
Jiaming Yuan 2020-02-03 12:46:51 +08:00 committed by GitHub
parent ed0216642f
commit a5cc112eea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 56 additions and 18 deletions

View File

@ -200,7 +200,7 @@ Parameters
@xgboost_model_doc("""Implementation of the Scikit-Learn API for XGBoost.""", @xgboost_model_doc("""Implementation of the Scikit-Learn API for XGBoost.""",
['estimators', 'model', 'objective']) ['estimators', 'model', 'objective'])
class XGBModel(XGBModelBase): class XGBModel(XGBModelBase):
# pylint: disable=too-many-arguments, too-many-instance-attributes, invalid-name, missing-docstring # pylint: disable=too-many-arguments, too-many-instance-attributes, missing-docstring
def __init__(self, max_depth=None, learning_rate=None, n_estimators=100, def __init__(self, max_depth=None, learning_rate=None, n_estimators=100,
verbosity=None, objective=None, booster=None, verbosity=None, objective=None, booster=None,
tree_method=None, n_jobs=None, gamma=None, tree_method=None, n_jobs=None, gamma=None,
@ -210,7 +210,8 @@ class XGBModel(XGBModelBase):
scale_pos_weight=None, base_score=None, random_state=None, scale_pos_weight=None, base_score=None, random_state=None,
missing=None, num_parallel_tree=None, missing=None, num_parallel_tree=None,
monotone_constraints=None, interaction_constraints=None, monotone_constraints=None, interaction_constraints=None,
importance_type="gain", gpu_id=None, **kwargs): importance_type="gain", gpu_id=None,
validate_parameters=False, **kwargs):
if not SKLEARN_INSTALLED: if not SKLEARN_INSTALLED:
raise XGBoostError( raise XGBoostError(
'sklearn needs to be installed in order to use this module') 'sklearn needs to be installed in order to use this module')
@ -243,6 +244,10 @@ class XGBModel(XGBModelBase):
self.interaction_constraints = interaction_constraints self.interaction_constraints = interaction_constraints
self.importance_type = importance_type self.importance_type = importance_type
self.gpu_id = gpu_id self.gpu_id = gpu_id
# Parameter validation is not working with Scikit-Learn interface, as
# it passes all paraemters into XGBoost core, whether they are used or
# not.
self.validate_parameters = validate_parameters
def __setstate__(self, state): def __setstate__(self, state):
# backward compatibility code # backward compatibility code
@ -314,11 +319,35 @@ class XGBModel(XGBModelBase):
if isinstance(params['random_state'], np.random.RandomState): if isinstance(params['random_state'], np.random.RandomState):
params['random_state'] = params['random_state'].randint( params['random_state'] = params['random_state'].randint(
np.iinfo(np.int32).max) np.iinfo(np.int32).max)
# Parameter validation is not working with Scikit-Learn interface, as
# it passes all paraemters into XGBoost core, whether they are used or def parse_parameter(value):
# not. for t in (int, float):
if 'validate_parameters' not in params.keys(): try:
params['validate_parameters'] = False ret = t(value)
return ret
except ValueError:
continue
return None
# Get internal parameter values
try:
config = json.loads(self.get_booster().save_config())
stack = [config]
internal = {}
while stack:
obj = stack.pop()
for k, v in obj.items():
if k.endswith('_param'):
for p_k, p_v in v.items():
internal[p_k] = p_v
elif isinstance(v, dict):
stack.append(v)
for k, v in internal.items():
if k in params.keys() and params[k] is None:
params[k] = parse_parameter(v)
except XGBoostError:
pass
return params return params
def get_xgb_params(self): def get_xgb_params(self):

View File

@ -269,10 +269,7 @@ class LearnerImpl : public Learner {
} }
} }
} }
auto learner_model_param = mparam_.ToJson();
for (auto const& kv : get<Object>(learner_model_param)) {
keys.emplace_back(kv.first);
}
keys.emplace_back(kEvalMetric); keys.emplace_back(kEvalMetric);
keys.emplace_back("verbosity"); keys.emplace_back("verbosity");
keys.emplace_back("num_output_group"); keys.emplace_back("num_output_group");
@ -425,6 +422,7 @@ class LearnerImpl : public Learner {
auto& learner_parameters = out["learner"]; auto& learner_parameters = out["learner"];
learner_parameters["learner_train_param"] = toJson(tparam_); learner_parameters["learner_train_param"] = toJson(tparam_);
learner_parameters["learner_model_param"] = mparam_.ToJson();
learner_parameters["gradient_booster"] = Object(); learner_parameters["gradient_booster"] = Object();
auto& gradient_booster = learner_parameters["gradient_booster"]; auto& gradient_booster = learner_parameters["gradient_booster"];
gbm_->SaveConfig(&gradient_booster); gbm_->SaveConfig(&gradient_booster);

View File

@ -490,6 +490,13 @@ def test_kwargs():
assert clf.get_params()['n_estimators'] == 1000 assert clf.get_params()['n_estimators'] == 1000
def test_kwargs_error():
params = {'updater': 'grow_gpu_hist', 'subsample': .5, 'n_jobs': -1}
with pytest.raises(TypeError):
clf = xgb.XGBClassifier(n_jobs=1000, **params)
assert isinstance(clf, xgb.XGBClassifier)
def test_kwargs_grid_search(): def test_kwargs_grid_search():
from sklearn.model_selection import GridSearchCV from sklearn.model_selection import GridSearchCV
from sklearn import datasets from sklearn import datasets
@ -510,13 +517,6 @@ def test_kwargs_grid_search():
assert len(means) == len(set(means)) assert len(means) == len(set(means))
def test_kwargs_error():
params = {'updater': 'grow_gpu_hist', 'subsample': .5, 'n_jobs': -1}
with pytest.raises(TypeError):
clf = xgb.XGBClassifier(n_jobs=1000, **params)
assert isinstance(clf, xgb.XGBClassifier)
def test_sklearn_clone(): def test_sklearn_clone():
from sklearn.base import clone from sklearn.base import clone
@ -525,6 +525,17 @@ def test_sklearn_clone():
clone(clf) clone(clf)
def test_sklearn_get_default_params():
from sklearn.datasets import load_digits
digits_2class = load_digits(2)
X = digits_2class['data']
y = digits_2class['target']
cls = xgb.XGBClassifier()
assert cls.get_params()['base_score'] is None
cls.fit(X[:4, ...], y[:4, ...])
assert cls.get_params()['base_score'] is not None
def test_validation_weights_xgbmodel(): def test_validation_weights_xgbmodel():
from sklearn.datasets import make_hastie_10_2 from sklearn.datasets import make_hastie_10_2