Cleanup configuration for constraints. (#7758)

2022-03-29 04:22:46 +08:00 · 2022-03-29 04:22:46 +08:00 · a50b84244e
commit a50b84244e
parent 3c9b04460a
5 changed files with 53 additions and 42 deletions
--- a/doc/treemethod.rst
+++ b/doc/treemethod.rst
@ -134,7 +134,7 @@ Following table summarizes some differences in supported features between 4 tree
 +------------------+-----------+---------------------+---------------------+------------------------+
 | categorical data | F         | T                   | T                   | T                      |
 +------------------+-----------+---------------------+---------------------+------------------------+
-| External memory  | F         | T                   | P                   | P                      |
+| External memory  | F         | T                   | T                   | P                      |
 +------------------+-----------+---------------------+---------------------+------------------------+
 | Distributed      | F         | T                   | T                   | T                      |
 +------------------+-----------+---------------------+---------------------+------------------------+
--- a/doc/tutorials/feature_interaction_constraint.rst
+++ b/doc/tutorials/feature_interaction_constraint.rst
@ -174,6 +174,14 @@ parameter:
                                     num_boost_round = 1000, evals = evallist,
                                     early_stopping_rounds = 10)

+**************************
+Using feature name instead
+**************************
+
+XGBoost's Python package supports using feature names instead of feature index for
+specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the
+feature interaction constraint can be specified as ``[["f0", "f2"]]``.
+
 **************
 Advanced topic
 **************
--- a/doc/tutorials/monotonic.rst
+++ b/doc/tutorials/monotonic.rst
@ -69,7 +69,7 @@ Then fitting with monotonicity constraints only requires adding a single paramet
 .. code-block:: python

  params_constrained = params.copy()
-  params_constrained['monotone_constraints'] = "(1,-1)"
+  params_constrained['monotone_constraints'] = (1,-1)

  model_with_constraints = xgb.train(params_constrained, dtrain,
                                     num_boost_round = 1000, evals = evallist,
@ -90,3 +90,13 @@ monotonic constraints may produce unnecessarily shallow trees. This is because t
 split. Monotonic constraints may wipe out all available split candidates, in which case no
 split is made. To reduce the effect, you may want to increase the ``max_bin`` parameter to
 consider more split candidates.
+
+
+*******************
+Using feature names
+*******************
+
+XGBoost's Python package supports using feature names instead of feature index for
+specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the
+monotonic constraint can be specified as ``{"f0": 1, "f2": -1}``, and ``"f1"`` will
+default to ``0`` (no constraint).
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@ -1392,50 +1392,46 @@ class Booster:
            raise TypeError('Unknown type:', model_file)

        params = params or {}
-        params = _configure_metrics(params.copy())
-        params = self._configure_constraints(params)
-        if isinstance(params, list):
-            params.append(('validate_parameters', True))
+        params_processed = _configure_metrics(params.copy())
+        params_processed = self._configure_constraints(params_processed)
+        if isinstance(params_processed, list):
+            params_processed.append(("validate_parameters", True))
        else:
-            params['validate_parameters'] = True
+            params_processed["validate_parameters"] = True

-        self.set_param(params or {})
-        if (params is not None) and ('booster' in params):
-            self.booster = params['booster']
-        else:
-            self.booster = 'gbtree'
+        self.set_param(params_processed or {})

-    def _transform_monotone_constrains(self, value: Union[Dict[str, int], str]) -> str:
+    def _transform_monotone_constrains(
+        self, value: Union[Dict[str, int], str]
+    ) -> Union[Tuple[int, ...], str]:
        if isinstance(value, str):
            return value

        constrained_features = set(value.keys())
-        if not constrained_features.issubset(set(self.feature_names or [])):
-            raise ValueError('Constrained features are not a subset of '
-                             'training data feature names')
+        feature_names = self.feature_names or []
+        if not constrained_features.issubset(set(feature_names)):
+            raise ValueError(
+                "Constrained features are not a subset of training data feature names"
+            )

-        return '(' + ','.join([str(value.get(feature_name, 0))
-                               for feature_name in self.feature_names]) + ')'
+        return tuple(value.get(name, 0) for name in feature_names)

    def _transform_interaction_constraints(
-        self, value: Union[List[Tuple[str]], str]
-    ) -> str:
+        self, value: Union[Sequence[Sequence[str]], str]
+    ) -> Union[str, List[List[int]]]:
        if isinstance(value, str):
            return value
-
-        feature_idx_mapping = {k: str(v) for v, k in enumerate(self.feature_names or [])}
+        feature_idx_mapping = {
+            name: idx for idx, name in enumerate(self.feature_names or [])
+        }

        try:
-            s = "["
+            result = []
            for constraint in value:
-                s += (
-                    "["
-                    + ",".join(
-                        [feature_idx_mapping[feature_name] for feature_name in constraint]
-                    )
-                    + "],"
+                result.append(
+                    [feature_idx_mapping[feature_name] for feature_name in constraint]
                )
-            return s[:-1] + "]"
+            return result
        except KeyError as e:
            raise ValueError(
                "Constrained features are not a subset of training data feature names"
@ -1444,17 +1440,16 @@ class Booster:
    def _configure_constraints(self, params: Union[List, Dict]) -> Union[List, Dict]:
        if isinstance(params, dict):
            value = params.get("monotone_constraints")
-            if value:
-                params[
-                    "monotone_constraints"
-                ] = self._transform_monotone_constrains(value)
+            if value is not None:
+                params["monotone_constraints"] = self._transform_monotone_constrains(
+                    value
+                )

            value = params.get("interaction_constraints")
-            if value:
+            if value is not None:
                params[
                    "interaction_constraints"
                ] = self._transform_interaction_constraints(value)
-
        elif isinstance(params, list):
            for idx, param in enumerate(params):
                name, value = param
@ -2462,11 +2457,9 @@ class Booster:
        if not PANDAS_INSTALLED:
            raise ImportError(('pandas must be available to use this method.'
                               'Install pandas before calling again.'))
-
-        if getattr(self, 'booster', None) is not None and self.booster not in {'gbtree', 'dart'}:
-            raise ValueError(
-                f"This method is not defined for Booster type {self.booster}"
-            )
+        booster = json.loads(self.save_config())["learner"]["gradient_booster"]["name"]
+        if booster not in {"gbtree", "dart"}:
+            raise ValueError(f"This method is not defined for Booster type {booster}")

        tree_ids = []
        node_ids = []
--- a/tests/python/test_monotone_constraints.py
+++ b/tests/python/test_monotone_constraints.py
@ -98,7 +98,7 @@ class TestMonotoneConstraints:

        # next check monotonicity when initializing monotone_constraints by feature names
        params = {
-            'tree_method': 'hist', 'verbosity': 1,
+            'tree_method': 'hist',
            'grow_policy': 'lossguide',
            'monotone_constraints': {'feature_0': 1, 'feature_1': -1}
        }