Cleanup configuration for constraints. (#7758)
This commit is contained in:
parent
3c9b04460a
commit
a50b84244e
@ -134,7 +134,7 @@ Following table summarizes some differences in supported features between 4 tree
|
|||||||
+------------------+-----------+---------------------+---------------------+------------------------+
|
+------------------+-----------+---------------------+---------------------+------------------------+
|
||||||
| categorical data | F | T | T | T |
|
| categorical data | F | T | T | T |
|
||||||
+------------------+-----------+---------------------+---------------------+------------------------+
|
+------------------+-----------+---------------------+---------------------+------------------------+
|
||||||
| External memory | F | T | P | P |
|
| External memory | F | T | T | P |
|
||||||
+------------------+-----------+---------------------+---------------------+------------------------+
|
+------------------+-----------+---------------------+---------------------+------------------------+
|
||||||
| Distributed | F | T | T | T |
|
| Distributed | F | T | T | T |
|
||||||
+------------------+-----------+---------------------+---------------------+------------------------+
|
+------------------+-----------+---------------------+---------------------+------------------------+
|
||||||
|
|||||||
@ -174,6 +174,14 @@ parameter:
|
|||||||
num_boost_round = 1000, evals = evallist,
|
num_boost_round = 1000, evals = evallist,
|
||||||
early_stopping_rounds = 10)
|
early_stopping_rounds = 10)
|
||||||
|
|
||||||
|
**************************
|
||||||
|
Using feature name instead
|
||||||
|
**************************
|
||||||
|
|
||||||
|
XGBoost's Python package supports using feature names instead of feature index for
|
||||||
|
specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the
|
||||||
|
feature interaction constraint can be specified as ``[["f0", "f2"]]``.
|
||||||
|
|
||||||
**************
|
**************
|
||||||
Advanced topic
|
Advanced topic
|
||||||
**************
|
**************
|
||||||
|
|||||||
@ -69,7 +69,7 @@ Then fitting with monotonicity constraints only requires adding a single paramet
|
|||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
params_constrained = params.copy()
|
params_constrained = params.copy()
|
||||||
params_constrained['monotone_constraints'] = "(1,-1)"
|
params_constrained['monotone_constraints'] = (1,-1)
|
||||||
|
|
||||||
model_with_constraints = xgb.train(params_constrained, dtrain,
|
model_with_constraints = xgb.train(params_constrained, dtrain,
|
||||||
num_boost_round = 1000, evals = evallist,
|
num_boost_round = 1000, evals = evallist,
|
||||||
@ -90,3 +90,13 @@ monotonic constraints may produce unnecessarily shallow trees. This is because t
|
|||||||
split. Monotonic constraints may wipe out all available split candidates, in which case no
|
split. Monotonic constraints may wipe out all available split candidates, in which case no
|
||||||
split is made. To reduce the effect, you may want to increase the ``max_bin`` parameter to
|
split is made. To reduce the effect, you may want to increase the ``max_bin`` parameter to
|
||||||
consider more split candidates.
|
consider more split candidates.
|
||||||
|
|
||||||
|
|
||||||
|
*******************
|
||||||
|
Using feature names
|
||||||
|
*******************
|
||||||
|
|
||||||
|
XGBoost's Python package supports using feature names instead of feature index for
|
||||||
|
specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the
|
||||||
|
monotonic constraint can be specified as ``{"f0": 1, "f2": -1}``, and ``"f1"`` will
|
||||||
|
default to ``0`` (no constraint).
|
||||||
|
|||||||
@ -1392,50 +1392,46 @@ class Booster:
|
|||||||
raise TypeError('Unknown type:', model_file)
|
raise TypeError('Unknown type:', model_file)
|
||||||
|
|
||||||
params = params or {}
|
params = params or {}
|
||||||
params = _configure_metrics(params.copy())
|
params_processed = _configure_metrics(params.copy())
|
||||||
params = self._configure_constraints(params)
|
params_processed = self._configure_constraints(params_processed)
|
||||||
if isinstance(params, list):
|
if isinstance(params_processed, list):
|
||||||
params.append(('validate_parameters', True))
|
params_processed.append(("validate_parameters", True))
|
||||||
else:
|
else:
|
||||||
params['validate_parameters'] = True
|
params_processed["validate_parameters"] = True
|
||||||
|
|
||||||
self.set_param(params or {})
|
self.set_param(params_processed or {})
|
||||||
if (params is not None) and ('booster' in params):
|
|
||||||
self.booster = params['booster']
|
|
||||||
else:
|
|
||||||
self.booster = 'gbtree'
|
|
||||||
|
|
||||||
def _transform_monotone_constrains(self, value: Union[Dict[str, int], str]) -> str:
|
def _transform_monotone_constrains(
|
||||||
|
self, value: Union[Dict[str, int], str]
|
||||||
|
) -> Union[Tuple[int, ...], str]:
|
||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
return value
|
return value
|
||||||
|
|
||||||
constrained_features = set(value.keys())
|
constrained_features = set(value.keys())
|
||||||
if not constrained_features.issubset(set(self.feature_names or [])):
|
feature_names = self.feature_names or []
|
||||||
raise ValueError('Constrained features are not a subset of '
|
if not constrained_features.issubset(set(feature_names)):
|
||||||
'training data feature names')
|
raise ValueError(
|
||||||
|
"Constrained features are not a subset of training data feature names"
|
||||||
|
)
|
||||||
|
|
||||||
return '(' + ','.join([str(value.get(feature_name, 0))
|
return tuple(value.get(name, 0) for name in feature_names)
|
||||||
for feature_name in self.feature_names]) + ')'
|
|
||||||
|
|
||||||
def _transform_interaction_constraints(
|
def _transform_interaction_constraints(
|
||||||
self, value: Union[List[Tuple[str]], str]
|
self, value: Union[Sequence[Sequence[str]], str]
|
||||||
) -> str:
|
) -> Union[str, List[List[int]]]:
|
||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
return value
|
return value
|
||||||
|
feature_idx_mapping = {
|
||||||
feature_idx_mapping = {k: str(v) for v, k in enumerate(self.feature_names or [])}
|
name: idx for idx, name in enumerate(self.feature_names or [])
|
||||||
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
s = "["
|
result = []
|
||||||
for constraint in value:
|
for constraint in value:
|
||||||
s += (
|
result.append(
|
||||||
"["
|
[feature_idx_mapping[feature_name] for feature_name in constraint]
|
||||||
+ ",".join(
|
|
||||||
[feature_idx_mapping[feature_name] for feature_name in constraint]
|
|
||||||
)
|
|
||||||
+ "],"
|
|
||||||
)
|
)
|
||||||
return s[:-1] + "]"
|
return result
|
||||||
except KeyError as e:
|
except KeyError as e:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Constrained features are not a subset of training data feature names"
|
"Constrained features are not a subset of training data feature names"
|
||||||
@ -1444,17 +1440,16 @@ class Booster:
|
|||||||
def _configure_constraints(self, params: Union[List, Dict]) -> Union[List, Dict]:
|
def _configure_constraints(self, params: Union[List, Dict]) -> Union[List, Dict]:
|
||||||
if isinstance(params, dict):
|
if isinstance(params, dict):
|
||||||
value = params.get("monotone_constraints")
|
value = params.get("monotone_constraints")
|
||||||
if value:
|
if value is not None:
|
||||||
params[
|
params["monotone_constraints"] = self._transform_monotone_constrains(
|
||||||
"monotone_constraints"
|
value
|
||||||
] = self._transform_monotone_constrains(value)
|
)
|
||||||
|
|
||||||
value = params.get("interaction_constraints")
|
value = params.get("interaction_constraints")
|
||||||
if value:
|
if value is not None:
|
||||||
params[
|
params[
|
||||||
"interaction_constraints"
|
"interaction_constraints"
|
||||||
] = self._transform_interaction_constraints(value)
|
] = self._transform_interaction_constraints(value)
|
||||||
|
|
||||||
elif isinstance(params, list):
|
elif isinstance(params, list):
|
||||||
for idx, param in enumerate(params):
|
for idx, param in enumerate(params):
|
||||||
name, value = param
|
name, value = param
|
||||||
@ -2462,11 +2457,9 @@ class Booster:
|
|||||||
if not PANDAS_INSTALLED:
|
if not PANDAS_INSTALLED:
|
||||||
raise ImportError(('pandas must be available to use this method.'
|
raise ImportError(('pandas must be available to use this method.'
|
||||||
'Install pandas before calling again.'))
|
'Install pandas before calling again.'))
|
||||||
|
booster = json.loads(self.save_config())["learner"]["gradient_booster"]["name"]
|
||||||
if getattr(self, 'booster', None) is not None and self.booster not in {'gbtree', 'dart'}:
|
if booster not in {"gbtree", "dart"}:
|
||||||
raise ValueError(
|
raise ValueError(f"This method is not defined for Booster type {booster}")
|
||||||
f"This method is not defined for Booster type {self.booster}"
|
|
||||||
)
|
|
||||||
|
|
||||||
tree_ids = []
|
tree_ids = []
|
||||||
node_ids = []
|
node_ids = []
|
||||||
|
|||||||
@ -98,7 +98,7 @@ class TestMonotoneConstraints:
|
|||||||
|
|
||||||
# next check monotonicity when initializing monotone_constraints by feature names
|
# next check monotonicity when initializing monotone_constraints by feature names
|
||||||
params = {
|
params = {
|
||||||
'tree_method': 'hist', 'verbosity': 1,
|
'tree_method': 'hist',
|
||||||
'grow_policy': 'lossguide',
|
'grow_policy': 'lossguide',
|
||||||
'monotone_constraints': {'feature_0': 1, 'feature_1': -1}
|
'monotone_constraints': {'feature_0': 1, 'feature_1': -1}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user