From 5f7b5a69213bbf82238832a277f1a11046b0d57f Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 4 Jan 2024 14:52:48 +0800 Subject: [PATCH] Add tests for pickling with custom obj and metric. (#9943) --- doc/tutorials/custom_metric_obj.rst | 1 - doc/tutorials/saving_model.rst | 2 ++ python-package/xgboost/sklearn.py | 27 ++++++++++++-------- python-package/xgboost/testing/__init__.py | 7 ++++++ tests/python/test_pickling.py | 27 ++++++++++++++++++++ tests/python/test_with_sklearn.py | 29 ++++++++++------------ 6 files changed, 65 insertions(+), 28 deletions(-) diff --git a/doc/tutorials/custom_metric_obj.rst b/doc/tutorials/custom_metric_obj.rst index 76ee1b3de..118a099c1 100644 --- a/doc/tutorials/custom_metric_obj.rst +++ b/doc/tutorials/custom_metric_obj.rst @@ -279,7 +279,6 @@ available at :ref:`sphx_glr_python_examples_custom_softmax.py`. Also, see Scikit-Learn Interface ********************** - The scikit-learn interface of XGBoost has some utilities to improve the integration with standard scikit-learn functions. For instance, after XGBoost 1.6.0 users can use the cost function (not scoring functions) from scikit-learn out of the box: diff --git a/doc/tutorials/saving_model.rst b/doc/tutorials/saving_model.rst index 54c217249..34b5430df 100644 --- a/doc/tutorials/saving_model.rst +++ b/doc/tutorials/saving_model.rst @@ -101,6 +101,8 @@ snapshot generated by an earlier version of XGBoost may result in errors or unde **If a model is persisted with** ``pickle.dump`` (Python) or ``saveRDS`` (R), **then the model may not be accessible in later versions of XGBoost.** +.. _custom-obj-metric: + *************************** Custom objective and metric *************************** diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index ea8d8d041..3383ae0b7 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -192,11 +192,16 @@ __model_doc = f""" Boosting learning rate (xgb's "eta") verbosity : Optional[int] The degree of verbosity. Valid values are 0 (silent) - 3 (debug). + objective : {SklObjective} - Specify the learning task and the corresponding learning objective or - a custom objective function to be used (see note below). + + Specify the learning task and the corresponding learning objective or a custom + objective function to be used. For custom objective, see + :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more + information. + booster: Optional[str] - Specify which booster to use: gbtree, gblinear or dart. + Specify which booster to use: `gbtree`, `gblinear` or `dart`. tree_method: Optional[str] Specify which tree method to use. Default to auto. If this parameter is set to default, XGBoost will choose the most conservative option available. It's @@ -328,21 +333,21 @@ __model_doc = f""" Metric used for monitoring the training result and early stopping. It can be a string or list of strings as names of predefined metric in XGBoost (See - doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any other - user defined metric that looks like `sklearn.metrics`. + doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any + other user defined metric that looks like `sklearn.metrics`. If custom objective is also provided, then custom metric should implement the corresponding reverse link function. Unlike the `scoring` parameter commonly used in scikit-learn, when a callable - object is provided, it's assumed to be a cost function and by default XGBoost will - minimize the result during early stopping. + object is provided, it's assumed to be a cost function and by default XGBoost + will minimize the result during early stopping. - For advanced usage on Early stopping like directly choosing to maximize instead of - minimize, see :py:obj:`xgboost.callback.EarlyStopping`. + For advanced usage on Early stopping like directly choosing to maximize instead + of minimize, see :py:obj:`xgboost.callback.EarlyStopping`. - See :doc:`Custom Objective and Evaluation Metric ` - for more. + See :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more + information. .. note:: diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py index 6b8daf561..373ad1c58 100644 --- a/python-package/xgboost/testing/__init__.py +++ b/python-package/xgboost/testing/__init__.py @@ -815,6 +815,13 @@ def softprob_obj( return objective +def ls_obj(y_true: np.ndarray, y_pred: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """Least squared error.""" + grad = y_pred - y_true + hess = np.ones(len(y_true)) + return grad, hess + + class DirectoryExcursion: """Change directory. Change back and optionally cleaning up the directory when exit. diff --git a/tests/python/test_pickling.py b/tests/python/test_pickling.py index 2f4d77bf0..083a2a7fd 100644 --- a/tests/python/test_pickling.py +++ b/tests/python/test_pickling.py @@ -1,10 +1,13 @@ import json import os import pickle +import tempfile import numpy as np +import pytest import xgboost as xgb +from xgboost import testing as tm kRows = 100 kCols = 10 @@ -61,3 +64,27 @@ class TestPickling: params = {"nthread": 8, "tree_method": "exact", "subsample": 0.5} config = self.run_model_pickling(params) check(config) + + @pytest.mark.skipif(**tm.no_sklearn()) + def test_with_sklearn_obj_metric(self) -> None: + from sklearn.metrics import mean_squared_error + + X, y = tm.datasets.make_regression() + reg = xgb.XGBRegressor(objective=tm.ls_obj, eval_metric=mean_squared_error) + reg.fit(X, y) + + pkl = pickle.dumps(reg) + reg_1 = pickle.loads(pkl) + assert callable(reg_1.objective) + assert callable(reg_1.eval_metric) + + with tempfile.TemporaryDirectory() as tmpdir: + path = os.path.join(tmpdir, "model.json") + reg.save_model(path) + + reg_2 = xgb.XGBRegressor() + reg_2.load_model(path) + + assert not callable(reg_2.objective) + assert not callable(reg_2.eval_metric) + assert reg_2.eval_metric is None diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 1e49ed053..ee0085d51 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -504,15 +504,10 @@ def test_regression_with_custom_objective(): from sklearn.metrics import mean_squared_error from sklearn.model_selection import KFold - def objective_ls(y_true, y_pred): - grad = (y_pred - y_true) - hess = np.ones(len(y_true)) - return grad, hess - X, y = fetch_california_housing(return_X_y=True) kf = KFold(n_splits=2, shuffle=True, random_state=rng) for train_index, test_index in kf.split(X, y): - xgb_model = xgb.XGBRegressor(objective=objective_ls).fit( + xgb_model = xgb.XGBRegressor(objective=tm.ls_obj).fit( X[train_index], y[train_index] ) preds = xgb_model.predict(X[test_index]) @@ -530,27 +525,29 @@ def test_regression_with_custom_objective(): np.testing.assert_raises(XGBCustomObjectiveException, xgb_model.fit, X, y) +def logregobj(y_true, y_pred): + y_pred = 1.0 / (1.0 + np.exp(-y_pred)) + grad = y_pred - y_true + hess = y_pred * (1.0 - y_pred) + return grad, hess + + def test_classification_with_custom_objective(): from sklearn.datasets import load_digits from sklearn.model_selection import KFold - def logregobj(y_true, y_pred): - y_pred = 1.0 / (1.0 + np.exp(-y_pred)) - grad = y_pred - y_true - hess = y_pred * (1.0 - y_pred) - return grad, hess - digits = load_digits(n_class=2) - y = digits['target'] - X = digits['data'] + y = digits["target"] + X = digits["data"] kf = KFold(n_splits=2, shuffle=True, random_state=rng) for train_index, test_index in kf.split(X, y): xgb_model = xgb.XGBClassifier(objective=logregobj) xgb_model.fit(X[train_index], y[train_index]) preds = xgb_model.predict(X[test_index]) labels = y[test_index] - err = sum(1 for i in range(len(preds)) - if int(preds[i] > 0.5) != labels[i]) / float(len(preds)) + err = sum( + 1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i] + ) / float(len(preds)) assert err < 0.1 # Test that the custom objective function is actually used