diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index c4713a9e4..560a3a8ed 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -782,7 +782,10 @@ class XGBModel(XGBModelBase): def _more_tags(self) -> Dict[str, bool]: """Tags used for scikit-learn data validation.""" - return {"allow_nan": True, "no_validation": True} + tags = {"allow_nan": True, "no_validation": True} + if hasattr(self, "kwargs") and self.kwargs.get("updater") == "shotgun": + tags["non_deterministic"] = True + return tags def __sklearn_is_fitted__(self) -> bool: return hasattr(self, "_Booster") @@ -1439,6 +1442,11 @@ class XGBClassifier(XGBModel, XGBClassifierBase): ) -> None: super().__init__(objective=objective, **kwargs) + def _more_tags(self) -> Dict[str, bool]: + tags = super()._more_tags() + tags["multilabel"] = True + return tags + @_deprecate_positional_args def fit( self, @@ -1717,6 +1725,12 @@ class XGBRegressor(XGBModel, XGBRegressorBase): ) -> None: super().__init__(objective=objective, **kwargs) + def _more_tags(self) -> Dict[str, bool]: + tags = super()._more_tags() + tags["multioutput"] = True + tags["multioutput_only"] = False + return tags + @xgboost_model_doc( "scikit-learn API for XGBoost random forest regression.", diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 507470724..2d12b038e 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -1300,20 +1300,12 @@ def test_estimator_reg(estimator, check): ): estimator.fit(X, y) return - if ( - os.environ["PYTEST_CURRENT_TEST"].find("check_estimators_overwrite_params") - != -1 - ): - # A hack to pass the scikit-learn parameter mutation tests. XGBoost regressor - # returns actual internal default values for parameters in `get_params`, but - # those are set as `None` in sklearn interface to avoid duplication. So we fit - # a dummy model and obtain the default parameters here for the mutation tests. - from sklearn.datasets import make_regression - - X, y = make_regression(n_samples=2, n_features=1) - estimator.set_params(**xgb.XGBRegressor().fit(X, y).get_params()) - - check(estimator) + elif os.environ["PYTEST_CURRENT_TEST"].find("check_regressor_multioutput") != -1: + # sklearn requires float64 + with pytest.raises(AssertionError, match="Got float32"): + check(estimator) + else: + check(estimator) def test_categorical(): @@ -1475,3 +1467,19 @@ def test_fit_none() -> None: with pytest.raises(ValueError, match="labels"): xgb.XGBRegressor().fit(X, None) + + +def test_tags() -> None: + for reg in [xgb.XGBRegressor(), xgb.XGBRFRegressor()]: + tags = reg._more_tags() + assert "non_deterministic" not in tags + assert tags["multioutput"] is True + assert tags["multioutput_only"] is False + + for clf in [xgb.XGBClassifier()]: + tags = clf._more_tags() + assert "multioutput" not in tags + assert tags["multilabel"] is True + + tags = xgb.XGBRanker()._more_tags() + assert "multioutput" not in tags