Cleanup configuration for constraints. (#7758)

This commit is contained in:
Jiaming Yuan 2022-03-29 04:22:46 +08:00 committed by GitHub
parent 3c9b04460a
commit a50b84244e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 53 additions and 42 deletions

View File

@ -134,7 +134,7 @@ Following table summarizes some differences in supported features between 4 tree
+------------------+-----------+---------------------+---------------------+------------------------+ +------------------+-----------+---------------------+---------------------+------------------------+
| categorical data | F | T | T | T | | categorical data | F | T | T | T |
+------------------+-----------+---------------------+---------------------+------------------------+ +------------------+-----------+---------------------+---------------------+------------------------+
| External memory | F | T | P | P | | External memory | F | T | T | P |
+------------------+-----------+---------------------+---------------------+------------------------+ +------------------+-----------+---------------------+---------------------+------------------------+
| Distributed | F | T | T | T | | Distributed | F | T | T | T |
+------------------+-----------+---------------------+---------------------+------------------------+ +------------------+-----------+---------------------+---------------------+------------------------+

View File

@ -174,6 +174,14 @@ parameter:
num_boost_round = 1000, evals = evallist, num_boost_round = 1000, evals = evallist,
early_stopping_rounds = 10) early_stopping_rounds = 10)
**************************
Using feature name instead
**************************
XGBoost's Python package supports using feature names instead of feature index for
specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the
feature interaction constraint can be specified as ``[["f0", "f2"]]``.
************** **************
Advanced topic Advanced topic
************** **************

View File

@ -69,7 +69,7 @@ Then fitting with monotonicity constraints only requires adding a single paramet
.. code-block:: python .. code-block:: python
params_constrained = params.copy() params_constrained = params.copy()
params_constrained['monotone_constraints'] = "(1,-1)" params_constrained['monotone_constraints'] = (1,-1)
model_with_constraints = xgb.train(params_constrained, dtrain, model_with_constraints = xgb.train(params_constrained, dtrain,
num_boost_round = 1000, evals = evallist, num_boost_round = 1000, evals = evallist,
@ -90,3 +90,13 @@ monotonic constraints may produce unnecessarily shallow trees. This is because t
split. Monotonic constraints may wipe out all available split candidates, in which case no split. Monotonic constraints may wipe out all available split candidates, in which case no
split is made. To reduce the effect, you may want to increase the ``max_bin`` parameter to split is made. To reduce the effect, you may want to increase the ``max_bin`` parameter to
consider more split candidates. consider more split candidates.
*******************
Using feature names
*******************
XGBoost's Python package supports using feature names instead of feature index for
specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the
monotonic constraint can be specified as ``{"f0": 1, "f2": -1}``, and ``"f1"`` will
default to ``0`` (no constraint).

View File

@ -1392,50 +1392,46 @@ class Booster:
raise TypeError('Unknown type:', model_file) raise TypeError('Unknown type:', model_file)
params = params or {} params = params or {}
params = _configure_metrics(params.copy()) params_processed = _configure_metrics(params.copy())
params = self._configure_constraints(params) params_processed = self._configure_constraints(params_processed)
if isinstance(params, list): if isinstance(params_processed, list):
params.append(('validate_parameters', True)) params_processed.append(("validate_parameters", True))
else: else:
params['validate_parameters'] = True params_processed["validate_parameters"] = True
self.set_param(params or {}) self.set_param(params_processed or {})
if (params is not None) and ('booster' in params):
self.booster = params['booster']
else:
self.booster = 'gbtree'
def _transform_monotone_constrains(self, value: Union[Dict[str, int], str]) -> str: def _transform_monotone_constrains(
self, value: Union[Dict[str, int], str]
) -> Union[Tuple[int, ...], str]:
if isinstance(value, str): if isinstance(value, str):
return value return value
constrained_features = set(value.keys()) constrained_features = set(value.keys())
if not constrained_features.issubset(set(self.feature_names or [])): feature_names = self.feature_names or []
raise ValueError('Constrained features are not a subset of ' if not constrained_features.issubset(set(feature_names)):
'training data feature names') raise ValueError(
"Constrained features are not a subset of training data feature names"
)
return '(' + ','.join([str(value.get(feature_name, 0)) return tuple(value.get(name, 0) for name in feature_names)
for feature_name in self.feature_names]) + ')'
def _transform_interaction_constraints( def _transform_interaction_constraints(
self, value: Union[List[Tuple[str]], str] self, value: Union[Sequence[Sequence[str]], str]
) -> str: ) -> Union[str, List[List[int]]]:
if isinstance(value, str): if isinstance(value, str):
return value return value
feature_idx_mapping = {
feature_idx_mapping = {k: str(v) for v, k in enumerate(self.feature_names or [])} name: idx for idx, name in enumerate(self.feature_names or [])
}
try: try:
s = "[" result = []
for constraint in value: for constraint in value:
s += ( result.append(
"[" [feature_idx_mapping[feature_name] for feature_name in constraint]
+ ",".join(
[feature_idx_mapping[feature_name] for feature_name in constraint]
)
+ "],"
) )
return s[:-1] + "]" return result
except KeyError as e: except KeyError as e:
raise ValueError( raise ValueError(
"Constrained features are not a subset of training data feature names" "Constrained features are not a subset of training data feature names"
@ -1444,17 +1440,16 @@ class Booster:
def _configure_constraints(self, params: Union[List, Dict]) -> Union[List, Dict]: def _configure_constraints(self, params: Union[List, Dict]) -> Union[List, Dict]:
if isinstance(params, dict): if isinstance(params, dict):
value = params.get("monotone_constraints") value = params.get("monotone_constraints")
if value: if value is not None:
params[ params["monotone_constraints"] = self._transform_monotone_constrains(
"monotone_constraints" value
] = self._transform_monotone_constrains(value) )
value = params.get("interaction_constraints") value = params.get("interaction_constraints")
if value: if value is not None:
params[ params[
"interaction_constraints" "interaction_constraints"
] = self._transform_interaction_constraints(value) ] = self._transform_interaction_constraints(value)
elif isinstance(params, list): elif isinstance(params, list):
for idx, param in enumerate(params): for idx, param in enumerate(params):
name, value = param name, value = param
@ -2462,11 +2457,9 @@ class Booster:
if not PANDAS_INSTALLED: if not PANDAS_INSTALLED:
raise ImportError(('pandas must be available to use this method.' raise ImportError(('pandas must be available to use this method.'
'Install pandas before calling again.')) 'Install pandas before calling again.'))
booster = json.loads(self.save_config())["learner"]["gradient_booster"]["name"]
if getattr(self, 'booster', None) is not None and self.booster not in {'gbtree', 'dart'}: if booster not in {"gbtree", "dart"}:
raise ValueError( raise ValueError(f"This method is not defined for Booster type {booster}")
f"This method is not defined for Booster type {self.booster}"
)
tree_ids = [] tree_ids = []
node_ids = [] node_ids = []

View File

@ -98,7 +98,7 @@ class TestMonotoneConstraints:
# next check monotonicity when initializing monotone_constraints by feature names # next check monotonicity when initializing monotone_constraints by feature names
params = { params = {
'tree_method': 'hist', 'verbosity': 1, 'tree_method': 'hist',
'grow_policy': 'lossguide', 'grow_policy': 'lossguide',
'monotone_constraints': {'feature_0': 1, 'feature_1': -1} 'monotone_constraints': {'feature_0': 1, 'feature_1': -1}
} }