Add constraint parameters to Scikit-Learn interface. (#5227)

* Add document for constraints. * Fix a format error in doc for objective function.
2020-01-25 11:12:02 +08:00 · 2020-01-25 11:12:02 +08:00 · 40680368cf
commit 40680368cf
parent 44469a0ca9
3 changed files with 52 additions and 17 deletions
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@ -208,6 +208,17 @@ Parameters for Tree Booster
 * ``num_parallel_tree``, [default=1]
  - Number of parallel trees constructed during each iteration. This option is used to support boosted random forest.

+* ``monotone_constraints``
+
+  - Constraint of variable monotonicity.  See tutorial for more information.
+
+* ``interaction_constraints``
+
+  - Constraints for interaction representing permitted interactions.  The constraints must
+    be specified in the form of a nest list, e.g. ``[[0, 1], [2, 3, 4]]``, where each inner
+    list is a group of indices of features that are allowed to interact with each other.
+    See tutorial for more information
+
 Additional parameters for Dart Booster (``booster=dart``)
 =========================================================

--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@ -110,6 +110,15 @@ __model_doc = '''
        None, defaults to np.nan.
    num_parallel_tree: int
        Used for boosting random forest.
+    monotone_constraints : str
+        Constraint of variable monotonicity.  See tutorial for more
+        information.c
+    interaction_constraints : str
+        Constraints for interaction representing permitted interactions.  The
+        constraints must be specified in the form of a nest list, e.g. [[0, 1],
+        [2, 3, 4]], where each inner list is a group of indices of features
+        that are allowed to interact with each other.  See tutorial for more
+        information
    importance_type: string, default "gain"
        The feature importance type for the feature_importances\\_ property:
        either "gain", "weight", "cover", "total_gain" or "total_cover".
@ -125,24 +134,25 @@ __model_doc = '''

            \\*\\*kwargs is unsupported by scikit-learn.  We do not guarantee
            that parameters passed via this argument will interact properly
-            with scikit-learn.  '''
+            with scikit-learn.
+'''

 __custom_obj_note = '''
-    Note
-    ----
-    A custom objective function can be provided for the ``objective``
-    parameter. In this case, it should have the signature
-    ``objective(y_true, y_pred) -> grad, hess``:
+        .. note::  Custom objective function

-    y_true: array_like of shape [n_samples]
-        The target values
-    y_pred: array_like of shape [n_samples]
-        The predicted values
+            A custom objective function can be provided for the ``objective``
+            parameter. In this case, it should have the signature
+            ``objective(y_true, y_pred) -> grad, hess``:

-    grad: array_like of shape [n_samples]
-        The value of the gradient for each sample point.
-    hess: array_like of shape [n_samples]
-        The value of the second derivative for each sample point
+            y_true: array_like of shape [n_samples]
+                The target values
+            y_pred: array_like of shape [n_samples]
+                The predicted values
+
+            grad: array_like of shape [n_samples]
+                The value of the gradient for each sample point.
+            hess: array_like of shape [n_samples]
+                The value of the second derivative for each sample point
 '''


@ -198,8 +208,9 @@ class XGBModel(XGBModelBase):
                 colsample_bytree=None, colsample_bylevel=None,
                 colsample_bynode=None, reg_alpha=None, reg_lambda=None,
                 scale_pos_weight=None, base_score=None, random_state=None,
-                 missing=None, num_parallel_tree=None, importance_type="gain",
-                 gpu_id=None, **kwargs):
+                 missing=None, num_parallel_tree=None,
+                 monotone_constraints=None, interaction_constraints=None,
+                 importance_type="gain", gpu_id=None, **kwargs):
        if not SKLEARN_INSTALLED:
            raise XGBoostError(
                'sklearn needs to be installed in order to use this module')
@ -228,8 +239,10 @@ class XGBModel(XGBModelBase):
        self._Booster = None
        self.random_state = random_state
        self.n_jobs = n_jobs
-        self.gpu_id = gpu_id
+        self.monotone_constraints = monotone_constraints
+        self.interaction_constraints = interaction_constraints
        self.importance_type = importance_type
+        self.gpu_id = gpu_id

    def __setstate__(self, state):
        # backward compatibility code
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@ -715,6 +715,17 @@ def test_XGBClassifier_resume():
        assert log_loss1 > log_loss2


+def test_constraint_parameters():
+    reg = xgb.XGBRegressor(interaction_constraints='[[0, 1], [2, 3, 4]]')
+    X = np.random.randn(10, 10)
+    y = np.random.randn(10)
+    reg.fit(X, y)
+
+    config = json.loads(reg.get_booster().save_config())
+    assert config['learner']['gradient_booster']['updater']['grow_colmaker'][
+        'train_param']['interaction_constraints'] == '[[0, 1], [2, 3, 4]]'
+
+
 class TestBoostFromPrediction(unittest.TestCase):
    def run_boost_from_prediction(self, tree_method):
        from sklearn.datasets import load_breast_cancer