Cleanup configuration for constraints. (#7758)
This commit is contained in:
parent
3c9b04460a
commit
a50b84244e
@ -134,7 +134,7 @@ Following table summarizes some differences in supported features between 4 tree
|
||||
+------------------+-----------+---------------------+---------------------+------------------------+
|
||||
| categorical data | F | T | T | T |
|
||||
+------------------+-----------+---------------------+---------------------+------------------------+
|
||||
| External memory | F | T | P | P |
|
||||
| External memory | F | T | T | P |
|
||||
+------------------+-----------+---------------------+---------------------+------------------------+
|
||||
| Distributed | F | T | T | T |
|
||||
+------------------+-----------+---------------------+---------------------+------------------------+
|
||||
|
||||
@ -174,6 +174,14 @@ parameter:
|
||||
num_boost_round = 1000, evals = evallist,
|
||||
early_stopping_rounds = 10)
|
||||
|
||||
**************************
|
||||
Using feature name instead
|
||||
**************************
|
||||
|
||||
XGBoost's Python package supports using feature names instead of feature index for
|
||||
specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the
|
||||
feature interaction constraint can be specified as ``[["f0", "f2"]]``.
|
||||
|
||||
**************
|
||||
Advanced topic
|
||||
**************
|
||||
|
||||
@ -69,7 +69,7 @@ Then fitting with monotonicity constraints only requires adding a single paramet
|
||||
.. code-block:: python
|
||||
|
||||
params_constrained = params.copy()
|
||||
params_constrained['monotone_constraints'] = "(1,-1)"
|
||||
params_constrained['monotone_constraints'] = (1,-1)
|
||||
|
||||
model_with_constraints = xgb.train(params_constrained, dtrain,
|
||||
num_boost_round = 1000, evals = evallist,
|
||||
@ -90,3 +90,13 @@ monotonic constraints may produce unnecessarily shallow trees. This is because t
|
||||
split. Monotonic constraints may wipe out all available split candidates, in which case no
|
||||
split is made. To reduce the effect, you may want to increase the ``max_bin`` parameter to
|
||||
consider more split candidates.
|
||||
|
||||
|
||||
*******************
|
||||
Using feature names
|
||||
*******************
|
||||
|
||||
XGBoost's Python package supports using feature names instead of feature index for
|
||||
specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the
|
||||
monotonic constraint can be specified as ``{"f0": 1, "f2": -1}``, and ``"f1"`` will
|
||||
default to ``0`` (no constraint).
|
||||
|
||||
@ -1392,50 +1392,46 @@ class Booster:
|
||||
raise TypeError('Unknown type:', model_file)
|
||||
|
||||
params = params or {}
|
||||
params = _configure_metrics(params.copy())
|
||||
params = self._configure_constraints(params)
|
||||
if isinstance(params, list):
|
||||
params.append(('validate_parameters', True))
|
||||
params_processed = _configure_metrics(params.copy())
|
||||
params_processed = self._configure_constraints(params_processed)
|
||||
if isinstance(params_processed, list):
|
||||
params_processed.append(("validate_parameters", True))
|
||||
else:
|
||||
params['validate_parameters'] = True
|
||||
params_processed["validate_parameters"] = True
|
||||
|
||||
self.set_param(params or {})
|
||||
if (params is not None) and ('booster' in params):
|
||||
self.booster = params['booster']
|
||||
else:
|
||||
self.booster = 'gbtree'
|
||||
self.set_param(params_processed or {})
|
||||
|
||||
def _transform_monotone_constrains(self, value: Union[Dict[str, int], str]) -> str:
|
||||
def _transform_monotone_constrains(
|
||||
self, value: Union[Dict[str, int], str]
|
||||
) -> Union[Tuple[int, ...], str]:
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
|
||||
constrained_features = set(value.keys())
|
||||
if not constrained_features.issubset(set(self.feature_names or [])):
|
||||
raise ValueError('Constrained features are not a subset of '
|
||||
'training data feature names')
|
||||
feature_names = self.feature_names or []
|
||||
if not constrained_features.issubset(set(feature_names)):
|
||||
raise ValueError(
|
||||
"Constrained features are not a subset of training data feature names"
|
||||
)
|
||||
|
||||
return '(' + ','.join([str(value.get(feature_name, 0))
|
||||
for feature_name in self.feature_names]) + ')'
|
||||
return tuple(value.get(name, 0) for name in feature_names)
|
||||
|
||||
def _transform_interaction_constraints(
|
||||
self, value: Union[List[Tuple[str]], str]
|
||||
) -> str:
|
||||
self, value: Union[Sequence[Sequence[str]], str]
|
||||
) -> Union[str, List[List[int]]]:
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
|
||||
feature_idx_mapping = {k: str(v) for v, k in enumerate(self.feature_names or [])}
|
||||
feature_idx_mapping = {
|
||||
name: idx for idx, name in enumerate(self.feature_names or [])
|
||||
}
|
||||
|
||||
try:
|
||||
s = "["
|
||||
result = []
|
||||
for constraint in value:
|
||||
s += (
|
||||
"["
|
||||
+ ",".join(
|
||||
[feature_idx_mapping[feature_name] for feature_name in constraint]
|
||||
)
|
||||
+ "],"
|
||||
result.append(
|
||||
[feature_idx_mapping[feature_name] for feature_name in constraint]
|
||||
)
|
||||
return s[:-1] + "]"
|
||||
return result
|
||||
except KeyError as e:
|
||||
raise ValueError(
|
||||
"Constrained features are not a subset of training data feature names"
|
||||
@ -1444,17 +1440,16 @@ class Booster:
|
||||
def _configure_constraints(self, params: Union[List, Dict]) -> Union[List, Dict]:
|
||||
if isinstance(params, dict):
|
||||
value = params.get("monotone_constraints")
|
||||
if value:
|
||||
params[
|
||||
"monotone_constraints"
|
||||
] = self._transform_monotone_constrains(value)
|
||||
if value is not None:
|
||||
params["monotone_constraints"] = self._transform_monotone_constrains(
|
||||
value
|
||||
)
|
||||
|
||||
value = params.get("interaction_constraints")
|
||||
if value:
|
||||
if value is not None:
|
||||
params[
|
||||
"interaction_constraints"
|
||||
] = self._transform_interaction_constraints(value)
|
||||
|
||||
elif isinstance(params, list):
|
||||
for idx, param in enumerate(params):
|
||||
name, value = param
|
||||
@ -2462,11 +2457,9 @@ class Booster:
|
||||
if not PANDAS_INSTALLED:
|
||||
raise ImportError(('pandas must be available to use this method.'
|
||||
'Install pandas before calling again.'))
|
||||
|
||||
if getattr(self, 'booster', None) is not None and self.booster not in {'gbtree', 'dart'}:
|
||||
raise ValueError(
|
||||
f"This method is not defined for Booster type {self.booster}"
|
||||
)
|
||||
booster = json.loads(self.save_config())["learner"]["gradient_booster"]["name"]
|
||||
if booster not in {"gbtree", "dart"}:
|
||||
raise ValueError(f"This method is not defined for Booster type {booster}")
|
||||
|
||||
tree_ids = []
|
||||
node_ids = []
|
||||
|
||||
@ -98,7 +98,7 @@ class TestMonotoneConstraints:
|
||||
|
||||
# next check monotonicity when initializing monotone_constraints by feature names
|
||||
params = {
|
||||
'tree_method': 'hist', 'verbosity': 1,
|
||||
'tree_method': 'hist',
|
||||
'grow_policy': 'lossguide',
|
||||
'monotone_constraints': {'feature_0': 1, 'feature_1': -1}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user