Save model in ubj as the default. (#9947)

2024-01-05 17:53:36 +08:00 · 2024-01-05 17:53:36 +08:00 · 38dd91f491
commit 38dd91f491
parent c03a4d5088
23 changed files with 598 additions and 550 deletions
--- a/jvm-packages/xgboost4j-spark/src/main/scala/org/apache/spark/ml/util/XGBoostReadWrite.scala
+++ b/jvm-packages/xgboost4j-spark/src/main/scala/org/apache/spark/ml/util/XGBoostReadWrite.scala
@ -30,9 +30,6 @@ import org.apache.spark.ml.param.Params
 import org.apache.spark.ml.util.DefaultParamsReader.Metadata
 abstract class XGBoostWriter extends MLWriter {
  /** Currently it's using the "deprecated" format as
   * default, which will be changed into `ubj` in future releases. */
  def getModelFormat(): String = {
    optionMap.getOrElse("format", JBooster.DEFAULT_FORMAT)
  }
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
@ -1,5 +1,5 @@
 /*
- Copyright (c) 2014-2022 by Contributors
+ Copyright (c) 2014-2024 by Contributors
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -432,6 +432,7 @@ class XGBoostClassifierSuite extends AnyFunSuite with PerTest with TmpFolderPerS
    val xgb = new XGBoostClassifier(paramMap)
    val model = xgb.fit(trainingDF)
    // test json
    val modelPath = new File(tempDir.toFile, "xgbc").getPath
    model.write.option("format", "json").save(modelPath)
    val nativeJsonModelPath = new File(tempDir.toFile, "nativeModel.json").getPath
@ -439,21 +440,21 @@ class XGBoostClassifierSuite extends AnyFunSuite with PerTest with TmpFolderPerS
    assert(compareTwoFiles(new File(modelPath, "data/XGBoostClassificationModel").getPath,
      nativeJsonModelPath))
-    // test default "deprecated"
+    // test ubj
    val modelUbjPath = new File(tempDir.toFile, "xgbcUbj").getPath
    model.write.save(modelUbjPath)
-    val nativeDeprecatedModelPath = new File(tempDir.toFile, "nativeModel").getPath
+    val nativeUbjModelPath = new File(tempDir.toFile, "nativeModel.ubj").getPath
-    model.nativeBooster.saveModel(nativeDeprecatedModelPath)
+    model.nativeBooster.saveModel(nativeUbjModelPath)
    assert(compareTwoFiles(new File(modelUbjPath, "data/XGBoostClassificationModel").getPath,
-      nativeDeprecatedModelPath))
+      nativeUbjModelPath))
    // json file should be indifferent with ubj file
    val modelJsonPath = new File(tempDir.toFile, "xgbcJson").getPath
    model.write.option("format", "json").save(modelJsonPath)
-    val nativeUbjModelPath = new File(tempDir.toFile, "nativeModel1.ubj").getPath
+    val nativeUbjModelPath1 = new File(tempDir.toFile, "nativeModel1.ubj").getPath
-    model.nativeBooster.saveModel(nativeUbjModelPath)
+    model.nativeBooster.saveModel(nativeUbjModelPath1)
    assert(!compareTwoFiles(new File(modelJsonPath, "data/XGBoostClassificationModel").getPath,
-      nativeUbjModelPath))
+      nativeUbjModelPath1))
  }
  test("native json model file should store feature_name and feature_type") {
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala
@ -1,5 +1,5 @@
 /*
- Copyright (c) 2014-2022 by Contributors
+ Copyright (c) 2014-2024 by Contributors
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -333,21 +333,24 @@ class XGBoostRegressorSuite extends AnyFunSuite with PerTest with TmpFolderPerSu
    assert(compareTwoFiles(new File(modelPath, "data/XGBoostRegressionModel").getPath,
      nativeJsonModelPath))
-    // test default "deprecated"
+    // test default "ubj"
    val modelUbjPath = new File(tempDir.toFile, "xgbcUbj").getPath
    model.write.save(modelUbjPath)
    val nativeDeprecatedModelPath = new File(tempDir.toFile, "nativeModel").getPath
    model.nativeBooster.saveModel(nativeDeprecatedModelPath)
    assert(compareTwoFiles(new File(modelUbjPath, "data/XGBoostRegressionModel").getPath,
      nativeDeprecatedModelPath))
-    // json file should be indifferent with ubj file
+    val nativeUbjModelPath = new File(tempDir.toFile, "nativeModel.ubj").getPath
    val modelJsonPath = new File(tempDir.toFile, "xgbcJson").getPath
    model.write.option("format", "json").save(modelJsonPath)
    val nativeUbjModelPath = new File(tempDir.toFile, "nativeModel1.ubj").getPath
    model.nativeBooster.saveModel(nativeUbjModelPath)
    assert(!compareTwoFiles(new File(modelJsonPath, "data/XGBoostRegressionModel").getPath,
      nativeUbjModelPath))
  }
    assert(compareTwoFiles(new File(modelUbjPath, "data/XGBoostRegressionModel").getPath,
      nativeUbjModelPath))
    // test the deprecated format
    val modelDeprecatedPath = new File(tempDir.toFile, "modelDeprecated").getPath
    model.write.option("format", "deprecated").save(modelDeprecatedPath)
    val nativeDeprecatedModelPath = new File(tempDir.toFile, "nativeModel.deprecated").getPath
    model.nativeBooster.saveModel(nativeDeprecatedModelPath)
    assert(compareTwoFiles(new File(modelDeprecatedPath, "data/XGBoostRegressionModel").getPath,
      nativeDeprecatedModelPath))
  }
 }
--- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java
+++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java
@ -34,7 +34,7 @@ import org.apache.commons.logging.LogFactory;
 * Booster for xgboost, this is a model API that support interactive build of a XGBoost Model
 */
 public class Booster implements Serializable, KryoSerializable {
-  public static final String DEFAULT_FORMAT = "deprecated";
+  public static final String DEFAULT_FORMAT = "ubj";
  private static final Log logger = LogFactory.getLog(Booster.class);
  // handle to the booster.
  private long handle = 0;
@ -788,8 +788,7 @@ public class Booster implements Serializable, KryoSerializable {
  }
  /**
-   * Save model into raw byte array. Currently it's using the deprecated format as
+   * Save model into raw byte array in the UBJSON ("ubj") format.
   * default, which will be changed into `ubj` in future releases.
   *
   * @return the saved byte array
   * @throws XGBoostError native error
--- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/Booster.scala
+++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/Booster.scala
@ -337,8 +337,7 @@ class Booster private[xgboost4j](private[xgboost4j] var booster: JBooster)
  }
  /**
-    * Save model into a raw byte array. Currently it's using the deprecated format as
+    * Save model into a raw byte array in the UBJSON ("ubj") format.
   *  default, which will be changed into `ubj` in future releases.
    */
  @throws(classOf[XGBoostError])
  def toByteArray: Array[Byte] = {
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@ -2613,7 +2613,7 @@ class Booster:
        else:
            raise TypeError("fname must be a string or os PathLike")
-    def save_raw(self, raw_format: str = "deprecated") -> bytearray:
+    def save_raw(self, raw_format: str = "ubj") -> bytearray:
        """Save the model to a in memory buffer representation instead of file.
        Parameters
--- a/python-package/xgboost/testing/init.py
+++ b/python-package/xgboost/testing/init.py
@ -630,7 +630,7 @@ sparse_datasets_strategy = strategies.sampled_from(
 def make_datasets_with_margin(
    unweighted_strategy: strategies.SearchStrategy,
-) -> Callable:
+) -> Callable[[], strategies.SearchStrategy[TestDataset]]:
    """Factory function for creating strategies that generates datasets with weight and
    base margin.
@ -668,8 +668,7 @@ def make_datasets_with_margin(
 # A strategy for drawing from a set of example datasets. May add random weights to the
 # dataset
-@memory.cache
+def make_dataset_strategy() -> strategies.SearchStrategy[TestDataset]:
 def make_dataset_strategy() -> Callable:
    _unweighted_datasets_strategy = strategies.sampled_from(
        [
            TestDataset(
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@ -1313,10 +1313,8 @@ XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char* fname) {
 namespace {
 void WarnOldModel() {
  if (XGBOOST_VER_MAJOR >= 2) {
  LOG(WARNING) << "Saving into deprecated binary model format, please consider using `json` or "
-                    "`ubj`. Model format will default to JSON in XGBoost 2.2 if not specified.";
+                  "`ubj`. Model format is default to UBJSON in XGBoost 2.1 if not specified.";
  }
 }
 }  // anonymous namespace
@ -1339,14 +1337,14 @@ XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char *fname) {
    save_json(std::ios::out);
  } else if (common::FileExtension(fname) == "ubj") {
    save_json(std::ios::binary);
-  } else if (XGBOOST_VER_MAJOR == 2 && XGBOOST_VER_MINOR >= 2) {
+  } else if (common::FileExtension(fname) == "deprecated") {
    LOG(WARNING) << "Saving model to JSON as default.  You can use file extension `json`, `ubj` or "
                    "`deprecated` to choose between formats.";
    save_json(std::ios::out);
  } else {
    WarnOldModel();
    auto *bst = static_cast<Learner *>(handle);
    bst->SaveModel(fo.get());
  } else {
    LOG(WARNING) << "Saving model in the UBJSON format as default.  You can use file extension:"
                    " `json`, `ubj` or `deprecated` to choose between formats.";
    save_json(std::ios::binary);
  }
  API_END();
 }
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@ -27,6 +27,7 @@ class LintersPaths:
        "tests/python/test_quantile_dmatrix.py",
        "tests/python/test_tree_regularization.py",
        "tests/python/test_shap.py",
        "tests/python/test_model_io.py",
        "tests/python/test_with_pandas.py",
        "tests/python-gpu/",
        "tests/python-sycl/",
@ -83,6 +84,7 @@ class LintersPaths:
        "tests/python/test_multi_target.py",
        "tests/python-gpu/test_gpu_data_iterator.py",
        "tests/python-gpu/load_pickle.py",
        "tests/python/test_model_io.py",
        "tests/test_distributed/test_with_spark/test_data.py",
        "tests/test_distributed/test_gpu_with_spark/test_data.py",
        "tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",
--- a/tests/python/test_basic.py
+++ b/tests/python/test_basic.py
@ -10,46 +10,48 @@ import pytest
 import xgboost as xgb
 from xgboost import testing as tm
-dpath = 'demo/data/'
+dpath = "demo/data/"
 rng = np.random.RandomState(1994)
 class TestBasic:
    def test_compat(self):
        from xgboost.compat import lazy_isinstance
        a = np.array([1, 2, 3])
-        assert lazy_isinstance(a, 'numpy', 'ndarray')
+        assert lazy_isinstance(a, "numpy", "ndarray")
-        assert not lazy_isinstance(a, 'numpy', 'dataframe')
+        assert not lazy_isinstance(a, "numpy", "dataframe")
    def test_basic(self):
        dtrain, dtest = tm.load_agaricus(__file__)
-        param = {'max_depth': 2, 'eta': 1,
+        param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
                 'objective': 'binary:logistic'}
        # specify validations set to watch performance
-        watchlist = [(dtrain, 'train')]
+        watchlist = [(dtrain, "train")]
        num_round = 2
-        bst = xgb.train(param, dtrain, num_round, watchlist, verbose_eval=True)
+        bst = xgb.train(param, dtrain, num_round, evals=watchlist, verbose_eval=True)
        preds = bst.predict(dtrain)
        labels = dtrain.get_label()
-        err = sum(1 for i in range(len(preds))
+        err = sum(
-                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
+            1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
        ) / float(len(preds))
        # error must be smaller than 10%
        assert err < 0.1
        preds = bst.predict(dtest)
        labels = dtest.get_label()
-        err = sum(1 for i in range(len(preds))
+        err = sum(
-                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
+            1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
        ) / float(len(preds))
        # error must be smaller than 10%
        assert err < 0.1
        with tempfile.TemporaryDirectory() as tmpdir:
-            dtest_path = os.path.join(tmpdir, 'dtest.dmatrix')
+            dtest_path = os.path.join(tmpdir, "dtest.dmatrix")
            # save dmatrix into binary buffer
            dtest.save_binary(dtest_path)
            # save model
-            model_path = os.path.join(tmpdir, 'model.booster')
+            model_path = os.path.join(tmpdir, "model.ubj")
            bst.save_model(model_path)
            # load model and data in
            bst2 = xgb.Booster(model_file=model_path)
@ -59,17 +61,21 @@ class TestBasic:
            assert np.sum(np.abs(preds2 - preds)) == 0
    def test_metric_config(self):
-        # Make sure that the metric configuration happens in booster so the
+        # Make sure that the metric configuration happens in booster so the string
-        # string `['error', 'auc']` doesn't get passed down to core.
+        # `['error', 'auc']` doesn't get passed down to core.
        dtrain, dtest = tm.load_agaricus(__file__)
-        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
+        param = {
-                 'objective': 'binary:logistic', 'eval_metric': ['error', 'auc']}
+            "max_depth": 2,
-        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
+            "eta": 1,
            "objective": "binary:logistic",
            "eval_metric": ["error", "auc"],
        }
        watchlist = [(dtest, "eval"), (dtrain, "train")]
        num_round = 2
-        booster = xgb.train(param, dtrain, num_round, watchlist)
+        booster = xgb.train(param, dtrain, num_round, evals=watchlist)
        predt_0 = booster.predict(dtrain)
        with tempfile.TemporaryDirectory() as tmpdir:
-            path = os.path.join(tmpdir, 'model.json')
+            path = os.path.join(tmpdir, "model.json")
            booster.save_model(path)
            booster = xgb.Booster(params=param, model_file=path)
@ -78,22 +84,23 @@ class TestBasic:
    def test_multiclass(self):
        dtrain, dtest = tm.load_agaricus(__file__)
-        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'num_class': 2}
+        param = {"max_depth": 2, "eta": 1, "num_class": 2}
        # specify validations set to watch performance
-        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
+        watchlist = [(dtest, "eval"), (dtrain, "train")]
        num_round = 2
-        bst = xgb.train(param, dtrain, num_round, watchlist)
+        bst = xgb.train(param, dtrain, num_round, evals=watchlist)
        # this is prediction
        preds = bst.predict(dtest)
        labels = dtest.get_label()
-        err = sum(1 for i in range(len(preds))
+        err = sum(1 for i in range(len(preds)) if preds[i] != labels[i]) / float(
-                  if preds[i] != labels[i]) / float(len(preds))
+            len(preds)
        )
        # error must be smaller than 10%
        assert err < 0.1
        with tempfile.TemporaryDirectory() as tmpdir:
-            dtest_path = os.path.join(tmpdir, 'dtest.buffer')
+            dtest_path = os.path.join(tmpdir, "dtest.buffer")
-            model_path = os.path.join(tmpdir, 'xgb.model')
+            model_path = os.path.join(tmpdir, "model.ubj")
            # save dmatrix into binary buffer
            dtest.save_binary(dtest_path)
            # save model
@ -108,33 +115,39 @@ class TestBasic:
    def test_dump(self):
        data = np.random.randn(100, 2)
        target = np.array([0, 1] * 50)
-        features = ['Feature1', 'Feature2']
+        features = ["Feature1", "Feature2"]
        dm = xgb.DMatrix(data, label=target, feature_names=features)
-        params = {'objective': 'binary:logistic',
+        params = {
-                  'eval_metric': 'logloss',
+            "objective": "binary:logistic",
-                  'eta': 0.3,
+            "eval_metric": "logloss",
-                  'max_depth': 1}
+            "eta": 0.3,
            "max_depth": 1,
        }
        bst = xgb.train(params, dm, num_boost_round=1)
        # number of feature importances should == number of features
        dump1 = bst.get_dump()
-        assert len(dump1) == 1, 'Expected only 1 tree to be dumped.'
+        assert len(dump1) == 1, "Expected only 1 tree to be dumped."
-        len(dump1[0].splitlines()) == 3, 'Expected 1 root and 2 leaves - 3 lines in dump.'
+        len(
            dump1[0].splitlines()
        ) == 3, "Expected 1 root and 2 leaves - 3 lines in dump."
        dump2 = bst.get_dump(with_stats=True)
-        assert dump2[0].count('\n') == 3, 'Expected 1 root and 2 leaves - 3 lines in dump.'
+        assert (
-        msg = 'Expected more info when with_stats=True is given.'
+            dump2[0].count("\n") == 3
-        assert dump2[0].find('\n') > dump1[0].find('\n'), msg
+        ), "Expected 1 root and 2 leaves - 3 lines in dump."
        msg = "Expected more info when with_stats=True is given."
        assert dump2[0].find("\n") > dump1[0].find("\n"), msg
        dump3 = bst.get_dump(dump_format="json")
        dump3j = json.loads(dump3[0])
-        assert dump3j['nodeid'] == 0, 'Expected the root node on top.'
+        assert dump3j["nodeid"] == 0, "Expected the root node on top."
        dump4 = bst.get_dump(dump_format="json", with_stats=True)
        dump4j = json.loads(dump4[0])
-        assert 'gain' in dump4j, "Expected 'gain' to be dumped in JSON."
+        assert "gain" in dump4j, "Expected 'gain' to be dumped in JSON."
        with pytest.raises(ValueError):
            bst.get_dump(fmap="foo")
@ -163,12 +176,14 @@ class TestBasic:
    def test_load_file_invalid(self):
        with pytest.raises(xgb.core.XGBoostError):
-            xgb.Booster(model_file='incorrect_path')
+            xgb.Booster(model_file="incorrect_path")
        with pytest.raises(xgb.core.XGBoostError):
-            xgb.Booster(model_file=u'不正なパス')
+            xgb.Booster(model_file="不正なパス")
-    @pytest.mark.parametrize("path", ["모델.ubj", "がうる・ぐら.json"], ids=["path-0", "path-1"])
+    @pytest.mark.parametrize(
        "path", ["모델.ubj", "がうる・ぐら.json"], ids=["path-0", "path-1"]
    )
    def test_unicode_path(self, tmpdir, path):
        model_path = pathlib.Path(tmpdir) / path
        dtrain, _ = tm.load_agaricus(__file__)
@ -180,12 +195,11 @@ class TestBasic:
        assert bst.get_dump(dump_format="text") == bst2.get_dump(dump_format="text")
    def test_dmatrix_numpy_init_omp(self):
        rows = [1000, 11326, 15000]
        cols = 50
        for row in rows:
            X = np.random.randn(row, cols)
-            y = np.random.randn(row).astype('f')
+            y = np.random.randn(row).astype("f")
            dm = xgb.DMatrix(X, y, nthread=0)
            np.testing.assert_array_equal(dm.get_label(), y)
            assert dm.num_row() == row
@ -198,8 +212,7 @@ class TestBasic:
    def test_cv(self):
        dm, _ = tm.load_agaricus(__file__)
-        params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
+        params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
                  'objective': 'binary:logistic'}
        # return np.ndarray
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False)
@ -208,19 +221,18 @@ class TestBasic:
    def test_cv_no_shuffle(self):
        dm, _ = tm.load_agaricus(__file__)
-        params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
+        params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
                  'objective': 'binary:logistic'}
        # return np.ndarray
-        cv = xgb.cv(params, dm, num_boost_round=10, shuffle=False, nfold=10,
+        cv = xgb.cv(
-                    as_pandas=False)
+            params, dm, num_boost_round=10, shuffle=False, nfold=10, as_pandas=False
        )
        assert isinstance(cv, dict)
        assert len(cv) == (4)
    def test_cv_explicit_fold_indices(self):
        dm, _ = tm.load_agaricus(__file__)
-        params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
+        params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
                  'binary:logistic'}
        folds = [
            # Train        Test
            ([1, 3], [5, 8]),
@ -228,15 +240,13 @@ class TestBasic:
        ]
        # return np.ndarray
-        cv = xgb.cv(params, dm, num_boost_round=10, folds=folds,
+        cv = xgb.cv(params, dm, num_boost_round=10, folds=folds, as_pandas=False)
                    as_pandas=False)
        assert isinstance(cv, dict)
        assert len(cv) == (4)
    @pytest.mark.skipif(**tm.skip_s390x())
    def test_cv_explicit_fold_indices_labels(self):
-        params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
+        params = {"max_depth": 2, "eta": 1, "objective": "reg:squarederror"}
                  'reg:squarederror'}
        N = 100
        F = 3
        dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N))
@ -252,9 +262,10 @@ class TestBasic:
                super().__init__()
            def after_iteration(
-                self, model,
+                self,
                model,
                epoch: int,
-                evals_log: xgb.callback.TrainingCallback.EvalsLog
+                evals_log: xgb.callback.TrainingCallback.EvalsLog,
            ):
                print([fold.dtest.get_label() for fold in model.cvfolds])
@ -263,12 +274,18 @@ class TestBasic:
        # Run cross validation and capture standard out to test callback result
        with tm.captured_output() as (out, err):
            xgb.cv(
-                params, dm, num_boost_round=1, folds=folds, callbacks=[cb],
+                params,
-                as_pandas=False
+                dm,
                num_boost_round=1,
                folds=folds,
                callbacks=[cb],
                as_pandas=False,
            )
            output = out.getvalue().strip()
-        solution = ('[array([5., 8.], dtype=float32), array([23., 43., 11.],' +
+        solution = (
-                    ' dtype=float32)]')
+            "[array([5., 8.], dtype=float32), array([23., 43., 11.],"
            + " dtype=float32)]"
        )
        assert output == solution
@ -285,7 +302,7 @@ class TestBasicPathLike:
        """Saving to a binary file using pathlib from a DMatrix."""
        data = np.random.randn(100, 2)
        target = np.array([0, 1] * 50)
-        features = ['Feature1', 'Feature2']
+        features = ["Feature1", "Feature2"]
        dm = xgb.DMatrix(data, label=target, feature_names=features)
@ -299,42 +316,3 @@ class TestBasicPathLike:
        """An invalid model_file path should raise XGBoostError."""
        with pytest.raises(xgb.core.XGBoostError):
            xgb.Booster(model_file=Path("invalidpath"))
    def test_Booster_save_and_load(self):
        """Saving and loading model files from paths."""
        save_path = Path("saveload.model")
        data = np.random.randn(100, 2)
        target = np.array([0, 1] * 50)
        features = ['Feature1', 'Feature2']
        dm = xgb.DMatrix(data, label=target, feature_names=features)
        params = {'objective': 'binary:logistic',
                  'eval_metric': 'logloss',
                  'eta': 0.3,
                  'max_depth': 1}
        bst = xgb.train(params, dm, num_boost_round=1)
        # save, assert exists
        bst.save_model(save_path)
        assert save_path.exists()
        def dump_assertions(dump):
            """Assertions for the expected dump from Booster"""
            assert len(dump) == 1, 'Exepcted only 1 tree to be dumped.'
            assert len(dump[0].splitlines()) == 3, 'Expected 1 root and 2 leaves - 3 lines.'
        # load the model again using Path
        bst2 = xgb.Booster(model_file=save_path)
        dump2 = bst2.get_dump()
        dump_assertions(dump2)
        # load again using load_model
        bst3 = xgb.Booster()
        bst3.load_model(save_path)
        dump3 = bst3.get_dump()
        dump_assertions(dump3)
        # remove file
        Path.unlink(save_path)
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@ -15,33 +15,9 @@ dpath = tm.data_dir(__file__)
 rng = np.random.RandomState(1994)
 def json_model(model_path: str, parameters: dict) -> dict:
    datasets = pytest.importorskip("sklearn.datasets")
    X, y = datasets.make_classification(64, n_features=8, n_classes=3, n_informative=6)
    if parameters.get("objective", None) == "multi:softmax":
        parameters["num_class"] = 3
    dm1 = xgb.DMatrix(X, y)
    bst = xgb.train(parameters, dm1)
    bst.save_model(model_path)
    if model_path.endswith("ubj"):
        import ubjson
        with open(model_path, "rb") as ubjfd:
            model = ubjson.load(ubjfd)
    else:
        with open(model_path, "r") as fd:
            model = json.load(fd)
    return model
 class TestModels:
    def test_glm(self):
-        param = {'verbosity': 0, 'objective': 'binary:logistic',
+        param = {'objective': 'binary:logistic',
                 'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1,
                 'nthread': 1}
        dtrain, dtest = tm.load_agaricus(__file__)
@ -73,7 +49,7 @@ class TestModels:
        with tempfile.TemporaryDirectory() as tmpdir:
            dtest_path = os.path.join(tmpdir, 'dtest.dmatrix')
-            model_path = os.path.join(tmpdir, 'xgboost.model.dart')
+            model_path = os.path.join(tmpdir, "xgboost.model.dart.ubj")
            # save dmatrix into binary buffer
            dtest.save_binary(dtest_path)
            model_path = model_path
@ -101,7 +77,6 @@ class TestModels:
        # check whether sample_type and normalize_type work
        num_round = 50
        param['verbosity'] = 0
        param['learning_rate'] = 0.1
        param['rate_drop'] = 0.1
        preds_list = []
@ -214,8 +189,7 @@ class TestModels:
        assert set(evals_result['eval'].keys()) == {'auc', 'error', 'logloss'}
    def test_fpreproc(self):
-        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
+        param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
                 'objective': 'binary:logistic'}
        num_round = 2
        def fpreproc(dtrain, dtest, param):
@ -229,8 +203,7 @@ class TestModels:
               metrics={'auc'}, seed=0, fpreproc=fpreproc)
    def test_show_stdv(self):
-        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
+        param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
                 'objective': 'binary:logistic'}
        num_round = 2
        dtrain, _ = tm.load_agaricus(__file__)
        xgb.cv(param, dtrain, num_round, nfold=5,
@ -273,142 +246,6 @@ class TestModels:
        bst = xgb.train([], dm2)
        bst.predict(dm2)  # success
    def test_model_binary_io(self):
        model_path = 'test_model_binary_io.bin'
        parameters = {'tree_method': 'hist', 'booster': 'gbtree',
                      'scale_pos_weight': '0.5'}
        X = np.random.random((10, 3))
        y = np.random.random((10,))
        dtrain = xgb.DMatrix(X, y)
        bst = xgb.train(parameters, dtrain, num_boost_round=2)
        bst.save_model(model_path)
        bst = xgb.Booster(model_file=model_path)
        os.remove(model_path)
        config = json.loads(bst.save_config())
        assert float(config['learner']['objective'][
            'reg_loss_param']['scale_pos_weight']) == 0.5
        buf = bst.save_raw()
        from_raw = xgb.Booster()
        from_raw.load_model(buf)
        buf_from_raw = from_raw.save_raw()
        assert buf == buf_from_raw
    def run_model_json_io(self, parameters: dict, ext: str) -> None:
        if ext == "ubj" and tm.no_ubjson()["condition"]:
            pytest.skip(tm.no_ubjson()["reason"])
        loc = locale.getpreferredencoding(False)
        model_path = 'test_model_json_io.' + ext
        j_model = json_model(model_path, parameters)
        assert isinstance(j_model['learner'], dict)
        bst = xgb.Booster(model_file=model_path)
        bst.save_model(fname=model_path)
        if ext == "ubj":
            import ubjson
            with open(model_path, "rb") as ubjfd:
                j_model = ubjson.load(ubjfd)
        else:
            with open(model_path, 'r') as fd:
                j_model = json.load(fd)
        assert isinstance(j_model['learner'], dict)
        os.remove(model_path)
        assert locale.getpreferredencoding(False) == loc
        json_raw = bst.save_raw(raw_format="json")
        from_jraw = xgb.Booster()
        from_jraw.load_model(json_raw)
        ubj_raw = bst.save_raw(raw_format="ubj")
        from_ubjraw = xgb.Booster()
        from_ubjraw.load_model(ubj_raw)
        if parameters.get("multi_strategy", None) != "multi_output_tree":
            # old binary model is not supported.
            old_from_json = from_jraw.save_raw(raw_format="deprecated")
            old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
            assert old_from_json == old_from_ubj
        raw_json = bst.save_raw(raw_format="json")
        pretty = json.dumps(json.loads(raw_json), indent=2) + "\n\n"
        bst.load_model(bytearray(pretty, encoding="ascii"))
        if parameters.get("multi_strategy", None) != "multi_output_tree":
            # old binary model is not supported.
            old_from_json = from_jraw.save_raw(raw_format="deprecated")
            old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
            assert old_from_json == old_from_ubj
        rng = np.random.default_rng()
        X = rng.random(size=from_jraw.num_features() * 10).reshape(
            (10, from_jraw.num_features())
        )
        predt_from_jraw = from_jraw.predict(xgb.DMatrix(X))
        predt_from_bst = bst.predict(xgb.DMatrix(X))
        np.testing.assert_allclose(predt_from_jraw, predt_from_bst)
    @pytest.mark.parametrize("ext", ["json", "ubj"])
    def test_model_json_io(self, ext: str) -> None:
        parameters = {"booster": "gbtree", "tree_method": "hist"}
        self.run_model_json_io(parameters, ext)
        parameters = {
            "booster": "gbtree",
            "tree_method": "hist",
            "multi_strategy": "multi_output_tree",
            "objective": "multi:softmax",
        }
        self.run_model_json_io(parameters, ext)
        parameters = {"booster": "gblinear"}
        self.run_model_json_io(parameters, ext)
        parameters = {"booster": "dart", "tree_method": "hist"}
        self.run_model_json_io(parameters, ext)
    @pytest.mark.skipif(**tm.no_json_schema())
    def test_json_io_schema(self):
        import jsonschema
        model_path = 'test_json_schema.json'
        path = os.path.dirname(
            os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
        doc = os.path.join(path, 'doc', 'model.schema')
        with open(doc, 'r') as fd:
            schema = json.load(fd)
        parameters = {'tree_method': 'hist', 'booster': 'gbtree'}
        jsonschema.validate(instance=json_model(model_path, parameters),
                            schema=schema)
        os.remove(model_path)
        parameters = {'tree_method': 'hist', 'booster': 'dart'}
        jsonschema.validate(instance=json_model(model_path, parameters),
                            schema=schema)
        os.remove(model_path)
        try:
            dtrain, _ = tm.load_agaricus(__file__)
            xgb.train({'objective': 'foo'}, dtrain, num_boost_round=1)
        except ValueError as e:
            e_str = str(e)
            beg = e_str.find('Objective candidate')
            end = e_str.find('Stack trace')
            e_str = e_str[beg: end]
            e_str = e_str.strip()
            splited = e_str.splitlines()
            objectives = [s.split(': ')[1] for s in splited]
            j_objectives = schema['properties']['learner']['properties'][
                'objective']['oneOf']
            objectives_from_schema = set()
            for j_obj in j_objectives:
                objectives_from_schema.add(
                    j_obj['properties']['name']['const'])
            objectives = set(objectives)
            assert objectives == objectives_from_schema
    @pytest.mark.skipif(**tm.no_json_schema())
    def test_json_dump_schema(self):
        import jsonschema
@ -470,29 +307,6 @@ class TestModels:
        for d in text_dump:
            assert d.find(r"feature \"2\"") != -1
    def test_categorical_model_io(self):
        X, y = tm.make_categorical(256, 16, 71, False)
        Xy = xgb.DMatrix(X, y, enable_categorical=True)
        booster = xgb.train({"tree_method": "approx"}, Xy, num_boost_round=16)
        predt_0 = booster.predict(Xy)
        with tempfile.TemporaryDirectory() as tempdir:
            path = os.path.join(tempdir, "model.binary")
            with pytest.raises(ValueError, match=r".*JSON/UBJSON.*"):
                booster.save_model(path)
            path = os.path.join(tempdir, "model.json")
            booster.save_model(path)
            booster = xgb.Booster(model_file=path)
            predt_1 = booster.predict(Xy)
            np.testing.assert_allclose(predt_0, predt_1)
            path = os.path.join(tempdir, "model.ubj")
            booster.save_model(path)
            booster = xgb.Booster(model_file=path)
            predt_1 = booster.predict(Xy)
            np.testing.assert_allclose(predt_0, predt_1)
    @pytest.mark.skipif(**tm.no_sklearn())
    def test_attributes(self):
        from sklearn.datasets import load_iris
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@ -278,14 +278,18 @@ class TestCallbacks:
        dtrain, dtest = tm.load_agaricus(__file__)
-        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
+        watchlist = [(dtest, "eval"), (dtrain, "train")]
        num_round = 4
        # learning_rates as a list
        # init eta with 0 to check whether learning_rates work
-        param = {'max_depth': 2, 'eta': 0, 'verbosity': 0,
+        param = {
-                 'objective': 'binary:logistic', 'eval_metric': 'error',
+            "max_depth": 2,
-                 'tree_method': tree_method}
+            "eta": 0,
            "objective": "binary:logistic",
            "eval_metric": "error",
            "tree_method": tree_method,
        }
        evals_result = {}
        bst = xgb.train(
            param,
@ -295,15 +299,19 @@ class TestCallbacks:
            callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],
            evals_result=evals_result,
        )
-        eval_errors_0 = list(map(float, evals_result['eval']['error']))
+        eval_errors_0 = list(map(float, evals_result["eval"]["error"]))
        assert isinstance(bst, xgb.core.Booster)
        # validation error should decrease, if eta > 0
        assert eval_errors_0[0] > eval_errors_0[-1]
        # init learning_rate with 0 to check whether learning_rates work
-        param = {'max_depth': 2, 'learning_rate': 0, 'verbosity': 0,
+        param = {
-                 'objective': 'binary:logistic', 'eval_metric': 'error',
+            "max_depth": 2,
-                 'tree_method': tree_method}
+            "learning_rate": 0,
            "objective": "binary:logistic",
            "eval_metric": "error",
            "tree_method": tree_method,
        }
        evals_result = {}
        bst = xgb.train(
@ -314,15 +322,17 @@ class TestCallbacks:
            callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],
            evals_result=evals_result,
        )
-        eval_errors_1 = list(map(float, evals_result['eval']['error']))
+        eval_errors_1 = list(map(float, evals_result["eval"]["error"]))
        assert isinstance(bst, xgb.core.Booster)
        # validation error should decrease, if learning_rate > 0
        assert eval_errors_1[0] > eval_errors_1[-1]
        # check if learning_rates override default value of eta/learning_rate
        param = {
-            'max_depth': 2, 'verbosity': 0, 'objective': 'binary:logistic',
+            "max_depth": 2,
-            'eval_metric': 'error', 'tree_method': tree_method
+            "objective": "binary:logistic",
            "eval_metric": "error",
            "tree_method": tree_method,
        }
        evals_result = {}
        bst = xgb.train(
--- a/tests/python/test_config.py
+++ b/tests/python/test_config.py
@ -12,6 +12,7 @@ def test_global_config_verbosity(verbosity_level):
        return xgb.get_config()["verbosity"]
    old_verbosity = get_current_verbosity()
    assert old_verbosity == 1
    with xgb.config_context(verbosity=verbosity_level):
        new_verbosity = get_current_verbosity()
        assert new_verbosity == verbosity_level
@ -30,7 +31,10 @@ def test_global_config_use_rmm(use_rmm):
    assert old_use_rmm_flag == get_current_use_rmm_flag()
-def test_nested_config():
+def test_nested_config() -> None:
    verbosity = xgb.get_config()["verbosity"]
    assert verbosity == 1
    with xgb.config_context(verbosity=3):
        assert xgb.get_config()["verbosity"] == 3
        with xgb.config_context(verbosity=2):
@ -45,13 +49,15 @@ def test_nested_config():
        with xgb.config_context(verbosity=None):
            assert xgb.get_config()["verbosity"] == 3  # None has no effect
    verbosity = xgb.get_config()["verbosity"]
    xgb.set_config(verbosity=2)
    assert xgb.get_config()["verbosity"] == 2
    with xgb.config_context(verbosity=3):
        assert xgb.get_config()["verbosity"] == 3
    xgb.set_config(verbosity=verbosity)  # reset
    verbosity = xgb.get_config()["verbosity"]
    assert verbosity == 1
 def test_thread_safty():
    n_threads = multiprocessing.cpu_count()
--- a/tests/python/test_dmatrix.py
+++ b/tests/python/test_dmatrix.py
@ -1,6 +1,7 @@
 import csv
 import os
 import tempfile
 import warnings
 import numpy as np
 import pytest
@ -24,20 +25,18 @@ class TestDMatrix:
        with pytest.warns(UserWarning):
            data._warn_unused_missing("uri", 4)
-        with pytest.warns(None) as record:
+        with warnings.catch_warnings():
            warnings.simplefilter("error")
            data._warn_unused_missing("uri", None)
            data._warn_unused_missing("uri", np.nan)
-            assert len(record) == 0
+        with warnings.catch_warnings():
-
+            warnings.simplefilter("error")
        with pytest.warns(None) as record:
            x = rng.randn(10, 10)
            y = rng.randn(10)
            xgb.DMatrix(x, y, missing=4)
            assert len(record) == 0
    def test_dmatrix_numpy_init(self):
        data = np.random.randn(5, 5)
        dm = xgb.DMatrix(data)
@ -264,7 +263,7 @@ class TestDMatrix:
        dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow))
        assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol)
        watchlist = [(dtrain, "train")]
-        param = {"max_depth": 3, "objective": "binary:logistic", "verbosity": 0}
+        param = {"max_depth": 3, "objective": "binary:logistic"}
        bst = xgb.train(param, dtrain, 5, watchlist)
        bst.predict(dtrain)
@ -302,7 +301,7 @@ class TestDMatrix:
        dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow))
        assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol)
        watchlist = [(dtrain, "train")]
-        param = {"max_depth": 3, "objective": "binary:logistic", "verbosity": 0}
+        param = {"max_depth": 3, "objective": "binary:logistic"}
        bst = xgb.train(param, dtrain, 5, watchlist)
        bst.predict(dtrain)
@ -475,8 +474,10 @@ class TestDMatrixColumnSplit:
    def test_uri(self):
        def verify_uri():
            rank = xgb.collective.get_rank()
            with tempfile.TemporaryDirectory() as tmpdir:
                filename = os.path.join(tmpdir, f"test_data_{rank}.csv")
                data = np.random.rand(5, 5)
            filename = f"test_data_{rank}.csv"
                with open(filename, mode="w", newline="") as file:
                    writer = csv.writer(file)
                    for row in data:
--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@ -67,8 +67,10 @@ class TestEarlyStopping:
        X = digits['data']
        y = digits['target']
        dm = xgb.DMatrix(X, label=y)
-        params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
+        params = {
-                  'objective': 'binary:logistic', 'eval_metric': 'error'}
+            'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic',
            'eval_metric': 'error'
        }
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
                    early_stopping_rounds=10)
--- a/tests/python/test_eval_metrics.py
+++ b/tests/python/test_eval_metrics.py
@ -9,29 +9,13 @@ rng = np.random.RandomState(1337)
 class TestEvalMetrics:
-    xgb_params_01 = {
+    xgb_params_01 = {'nthread': 1, 'eval_metric': 'error'}
        'verbosity': 0,
        'nthread': 1,
        'eval_metric': 'error'
    }
-    xgb_params_02 = {
+    xgb_params_02 = {'nthread': 1, 'eval_metric': ['error']}
        'verbosity': 0,
        'nthread': 1,
        'eval_metric': ['error']
    }
-    xgb_params_03 = {
+    xgb_params_03 = {'nthread': 1, 'eval_metric': ['rmse', 'error']}
        'verbosity': 0,
        'nthread': 1,
        'eval_metric': ['rmse', 'error']
    }
-    xgb_params_04 = {
+    xgb_params_04 = {'nthread': 1, 'eval_metric': ['error', 'rmse']}
        'verbosity': 0,
        'nthread': 1,
        'eval_metric': ['error', 'rmse']
    }
    def evalerror_01(self, preds, dtrain):
        labels = dtrain.get_label()
--- a/tests/python/test_linear.py
+++ b/tests/python/test_linear.py
@ -22,8 +22,14 @@ coord_strategy = strategies.fixed_dictionaries({
 def train_result(param, dmat, num_rounds):
    result = {}
-    xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
+    xgb.train(
-              evals_result=result)
+        param,
        dmat,
        num_rounds,
        evals=[(dmat, "train")],
        verbose_eval=False,
        evals_result=result,
    )
    return result
--- a/tests/python/test_model_io.py
+++ b/tests/python/test_model_io.py
@ -0,0 +1,406 @@
 import json
 import locale
 import os
 import pickle
 import tempfile
 from pathlib import Path
 from typing import List
 import numpy as np
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 def json_model(model_path: str, parameters: dict) -> dict:
    datasets = pytest.importorskip("sklearn.datasets")
    X, y = datasets.make_classification(64, n_features=8, n_classes=3, n_informative=6)
    if parameters.get("objective", None) == "multi:softmax":
        parameters["num_class"] = 3
    dm1 = xgb.DMatrix(X, y)
    bst = xgb.train(parameters, dm1)
    bst.save_model(model_path)
    if model_path.endswith("ubj"):
        import ubjson
        with open(model_path, "rb") as ubjfd:
            model = ubjson.load(ubjfd)
    else:
        with open(model_path, "r") as fd:
            model = json.load(fd)
    return model
 class TestBoosterIO:
    def run_model_json_io(self, parameters: dict, ext: str) -> None:
        config = xgb.config.get_config()
        assert config["verbosity"] == 1
        if ext == "ubj" and tm.no_ubjson()["condition"]:
            pytest.skip(tm.no_ubjson()["reason"])
        loc = locale.getpreferredencoding(False)
        model_path = "test_model_json_io." + ext
        j_model = json_model(model_path, parameters)
        assert isinstance(j_model["learner"], dict)
        bst = xgb.Booster(model_file=model_path)
        bst.save_model(fname=model_path)
        if ext == "ubj":
            import ubjson
            with open(model_path, "rb") as ubjfd:
                j_model = ubjson.load(ubjfd)
        else:
            with open(model_path, "r") as fd:
                j_model = json.load(fd)
        assert isinstance(j_model["learner"], dict)
        os.remove(model_path)
        assert locale.getpreferredencoding(False) == loc
        json_raw = bst.save_raw(raw_format="json")
        from_jraw = xgb.Booster()
        from_jraw.load_model(json_raw)
        ubj_raw = bst.save_raw(raw_format="ubj")
        from_ubjraw = xgb.Booster()
        from_ubjraw.load_model(ubj_raw)
        if parameters.get("multi_strategy", None) != "multi_output_tree":
            # Old binary model is not supported for vector leaf.
            with pytest.warns(Warning, match="Model format is default to UBJSON"):
                old_from_json = from_jraw.save_raw(raw_format="deprecated")
                old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
            assert old_from_json == old_from_ubj
        raw_json = bst.save_raw(raw_format="json")
        pretty = json.dumps(json.loads(raw_json), indent=2) + "\n\n"
        bst.load_model(bytearray(pretty, encoding="ascii"))
        if parameters.get("multi_strategy", None) != "multi_output_tree":
            # old binary model is not supported.
            with pytest.warns(Warning, match="Model format is default to UBJSON"):
                old_from_json = from_jraw.save_raw(raw_format="deprecated")
                old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
            assert old_from_json == old_from_ubj
        rng = np.random.default_rng()
        X = rng.random(size=from_jraw.num_features() * 10).reshape(
            (10, from_jraw.num_features())
        )
        predt_from_jraw = from_jraw.predict(xgb.DMatrix(X))
        predt_from_bst = bst.predict(xgb.DMatrix(X))
        np.testing.assert_allclose(predt_from_jraw, predt_from_bst)
    @pytest.mark.parametrize("ext", ["json", "ubj"])
    def test_model_json_io(self, ext: str) -> None:
        parameters = {"booster": "gbtree", "tree_method": "hist"}
        self.run_model_json_io(parameters, ext)
        parameters = {
            "booster": "gbtree",
            "tree_method": "hist",
            "multi_strategy": "multi_output_tree",
            "objective": "multi:softmax",
        }
        self.run_model_json_io(parameters, ext)
        parameters = {"booster": "gblinear"}
        self.run_model_json_io(parameters, ext)
        parameters = {"booster": "dart", "tree_method": "hist"}
        self.run_model_json_io(parameters, ext)
    def test_categorical_model_io(self) -> None:
        X, y = tm.make_categorical(256, 16, 71, False)
        Xy = xgb.DMatrix(X, y, enable_categorical=True)
        booster = xgb.train({"tree_method": "approx"}, Xy, num_boost_round=16)
        predt_0 = booster.predict(Xy)
        with tempfile.TemporaryDirectory() as tempdir:
            path = os.path.join(tempdir, "model.deprecated")
            with pytest.raises(ValueError, match=r".*JSON/UBJSON.*"):
                with pytest.warns(Warning, match="Model format is default to UBJSON"):
                    booster.save_model(path)
            path = os.path.join(tempdir, "model.json")
            booster.save_model(path)
            booster = xgb.Booster(model_file=path)
            predt_1 = booster.predict(Xy)
            np.testing.assert_allclose(predt_0, predt_1)
            path = os.path.join(tempdir, "model.ubj")
            booster.save_model(path)
            booster = xgb.Booster(model_file=path)
            predt_1 = booster.predict(Xy)
            np.testing.assert_allclose(predt_0, predt_1)
    @pytest.mark.skipif(**tm.no_json_schema())
    def test_json_io_schema(self) -> None:
        import jsonschema
        model_path = "test_json_schema.json"
        path = os.path.dirname(
            os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        )
        doc = os.path.join(path, "doc", "model.schema")
        with open(doc, "r") as fd:
            schema = json.load(fd)
        parameters = {"tree_method": "hist", "booster": "gbtree"}
        jsonschema.validate(instance=json_model(model_path, parameters), schema=schema)
        os.remove(model_path)
        parameters = {"tree_method": "hist", "booster": "dart"}
        jsonschema.validate(instance=json_model(model_path, parameters), schema=schema)
        os.remove(model_path)
        try:
            dtrain, _ = tm.load_agaricus(__file__)
            xgb.train({"objective": "foo"}, dtrain, num_boost_round=1)
        except ValueError as e:
            e_str = str(e)
            beg = e_str.find("Objective candidate")
            end = e_str.find("Stack trace")
            e_str = e_str[beg:end]
            e_str = e_str.strip()
            splited = e_str.splitlines()
            objectives = [s.split(": ")[1] for s in splited]
            j_objectives = schema["properties"]["learner"]["properties"]["objective"][
                "oneOf"
            ]
            objectives_from_schema = set()
            for j_obj in j_objectives:
                objectives_from_schema.add(j_obj["properties"]["name"]["const"])
            assert set(objectives) == objectives_from_schema
    def test_model_binary_io(self) -> None:
        model_path = "test_model_binary_io.deprecated"
        parameters = {
            "tree_method": "hist",
            "booster": "gbtree",
            "scale_pos_weight": "0.5",
        }
        X = np.random.random((10, 3))
        y = np.random.random((10,))
        dtrain = xgb.DMatrix(X, y)
        bst = xgb.train(parameters, dtrain, num_boost_round=2)
        with pytest.warns(Warning, match="Model format is default to UBJSON"):
            bst.save_model(model_path)
        bst = xgb.Booster(model_file=model_path)
        os.remove(model_path)
        config = json.loads(bst.save_config())
        assert (
            float(config["learner"]["objective"]["reg_loss_param"]["scale_pos_weight"])
            == 0.5
        )
        buf = bst.save_raw()
        from_raw = xgb.Booster()
        from_raw.load_model(buf)
        buf_from_raw = from_raw.save_raw()
        assert buf == buf_from_raw
    def test_with_pathlib(self) -> None:
        """Saving and loading model files from paths."""
        save_path = Path("model.ubj")
        rng = np.random.default_rng(1994)
        data = rng.normal(size=(100, 2))
        target = np.array([0, 1] * 50)
        features = ["Feature1", "Feature2"]
        dm = xgb.DMatrix(data, label=target, feature_names=features)
        params = {
            "objective": "binary:logistic",
            "eval_metric": "logloss",
            "eta": 0.3,
            "max_depth": 1,
        }
        bst = xgb.train(params, dm, num_boost_round=1)
        # save, assert exists
        bst.save_model(save_path)
        assert save_path.exists()
        def dump_assertions(dump: List[str]) -> None:
            """Assertions for the expected dump from Booster"""
            assert len(dump) == 1, "Exepcted only 1 tree to be dumped."
            assert (
                len(dump[0].splitlines()) == 3
            ), "Expected 1 root and 2 leaves - 3 lines."
        # load the model again using Path
        bst2 = xgb.Booster(model_file=save_path)
        dump2 = bst2.get_dump()
        dump_assertions(dump2)
        # load again using load_model
        bst3 = xgb.Booster()
        bst3.load_model(save_path)
        dump3 = bst3.get_dump()
        dump_assertions(dump3)
        # remove file
        Path.unlink(save_path)
 def save_load_model(model_path: str) -> None:
    from sklearn.datasets import load_digits
    from sklearn.model_selection import KFold
    rng = np.random.RandomState(1994)
    digits = load_digits(n_class=2)
    y = digits["target"]
    X = digits["data"]
    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
    for train_index, test_index in kf.split(X, y):
        xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
        xgb_model.save_model(model_path)
        xgb_model = xgb.XGBClassifier()
        xgb_model.load_model(model_path)
        assert isinstance(xgb_model.classes_, np.ndarray)
        np.testing.assert_equal(xgb_model.classes_, np.array([0, 1]))
        assert isinstance(xgb_model._Booster, xgb.Booster)
        preds = xgb_model.predict(X[test_index])
        labels = y[test_index]
        err = sum(
            1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
        ) / float(len(preds))
        assert err < 0.1
        assert xgb_model.get_booster().attr("scikit_learn") is None
        # test native booster
        preds = xgb_model.predict(X[test_index], output_margin=True)
        booster = xgb.Booster(model_file=model_path)
        predt_1 = booster.predict(xgb.DMatrix(X[test_index]), output_margin=True)
        assert np.allclose(preds, predt_1)
        with pytest.raises(TypeError):
            xgb_model = xgb.XGBModel()
            xgb_model.load_model(model_path)
    clf = xgb.XGBClassifier(booster="gblinear", early_stopping_rounds=1)
    clf.fit(X, y, eval_set=[(X, y)])
    best_iteration = clf.best_iteration
    best_score = clf.best_score
    predt_0 = clf.predict(X)
    clf.save_model(model_path)
    clf.load_model(model_path)
    assert clf.booster == "gblinear"
    predt_1 = clf.predict(X)
    np.testing.assert_allclose(predt_0, predt_1)
    assert clf.best_iteration == best_iteration
    assert clf.best_score == best_score
    clfpkl = pickle.dumps(clf)
    clf = pickle.loads(clfpkl)
    predt_2 = clf.predict(X)
    np.testing.assert_allclose(predt_0, predt_2)
    assert clf.best_iteration == best_iteration
    assert clf.best_score == best_score
@pytest.mark.skipif(**tm.no_sklearn())
 def test_sklearn_model() -> None:
    from sklearn.datasets import load_digits
    from sklearn.model_selection import train_test_split
    with tempfile.TemporaryDirectory() as tempdir:
        model_path = os.path.join(tempdir, "digits.deprecated")
        with pytest.warns(Warning, match="Model format is default to UBJSON"):
            save_load_model(model_path)
    with tempfile.TemporaryDirectory() as tempdir:
        model_path = os.path.join(tempdir, "digits.model.json")
        save_load_model(model_path)
    with tempfile.TemporaryDirectory() as tempdir:
        model_path = os.path.join(tempdir, "digits.model.ubj")
        digits = load_digits(n_class=2)
        y = digits["target"]
        X = digits["data"]
        booster = xgb.train(
            {"tree_method": "hist", "objective": "binary:logistic"},
            dtrain=xgb.DMatrix(X, y),
            num_boost_round=4,
        )
        predt_0 = booster.predict(xgb.DMatrix(X))
        booster.save_model(model_path)
        cls = xgb.XGBClassifier()
        cls.load_model(model_path)
        proba = cls.predict_proba(X)
        assert proba.shape[0] == X.shape[0]
        assert proba.shape[1] == 2  # binary
        predt_1 = cls.predict_proba(X)[:, 1]
        assert np.allclose(predt_0, predt_1)
        cls = xgb.XGBModel()
        cls.load_model(model_path)
        predt_1 = cls.predict(X)
        assert np.allclose(predt_0, predt_1)
        # mclass
        X, y = load_digits(n_class=10, return_X_y=True)
        # small test_size to force early stop
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.01, random_state=1
        )
        clf = xgb.XGBClassifier(
            n_estimators=64, tree_method="hist", early_stopping_rounds=2
        )
        clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
        score = clf.best_score
        clf.save_model(model_path)
        clf = xgb.XGBClassifier()
        clf.load_model(model_path)
        assert clf.classes_.size == 10
        assert clf.objective == "multi:softprob"
        np.testing.assert_equal(clf.classes_, np.arange(10))
        assert clf.n_classes_ == 10
        assert clf.best_iteration == 27
        assert clf.best_score == score
@pytest.mark.skipif(**tm.no_sklearn())
 def test_with_sklearn_obj_metric() -> None:
    from sklearn.metrics import mean_squared_error
    X, y = tm.datasets.make_regression()
    reg = xgb.XGBRegressor(objective=tm.ls_obj, eval_metric=mean_squared_error)
    reg.fit(X, y)
    pkl = pickle.dumps(reg)
    reg_1 = pickle.loads(pkl)
    assert callable(reg_1.objective)
    assert callable(reg_1.eval_metric)
    with tempfile.TemporaryDirectory() as tmpdir:
        path = os.path.join(tmpdir, "model.json")
        reg.save_model(path)
        reg_2 = xgb.XGBRegressor()
        reg_2.load_model(path)
    assert not callable(reg_2.objective)
    assert not callable(reg_2.eval_metric)
    assert reg_2.eval_metric is None
--- a/tests/python/test_pickling.py
+++ b/tests/python/test_pickling.py
@ -1,13 +1,10 @@
 import json
 import os
 import pickle
 import tempfile
 import numpy as np
 import pytest
 import xgboost as xgb
 from xgboost import testing as tm
 kRows = 100
 kCols = 10
@ -64,27 +61,3 @@ class TestPickling:
        params = {"nthread": 8, "tree_method": "exact", "subsample": 0.5}
        config = self.run_model_pickling(params)
        check(config)
    @pytest.mark.skipif(**tm.no_sklearn())
    def test_with_sklearn_obj_metric(self) -> None:
        from sklearn.metrics import mean_squared_error
        X, y = tm.datasets.make_regression()
        reg = xgb.XGBRegressor(objective=tm.ls_obj, eval_metric=mean_squared_error)
        reg.fit(X, y)
        pkl = pickle.dumps(reg)
        reg_1 = pickle.loads(pkl)
        assert callable(reg_1.objective)
        assert callable(reg_1.eval_metric)
        with tempfile.TemporaryDirectory() as tmpdir:
            path = os.path.join(tmpdir, "model.json")
            reg.save_model(path)
            reg_2 = xgb.XGBRegressor()
            reg_2.load_model(path)
        assert not callable(reg_2.objective)
        assert not callable(reg_2.eval_metric)
        assert reg_2.eval_metric is None
--- a/tests/python/test_shap.py
+++ b/tests/python/test_shap.py
@ -49,7 +49,7 @@ class TestSHAP:
        def fn(max_depth: int, num_rounds: int) -> None:
            # train
-            params = {"max_depth": max_depth, "eta": 1, "verbosity": 0}
+            params = {"max_depth": max_depth, "eta": 1}
            bst = xgb.train(params, dtrain, num_boost_round=num_rounds)
            # predict
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@ -117,7 +117,6 @@ class TestTreeMethod:
        ag_param = {'max_depth': 2,
                    'tree_method': 'hist',
                    'eta': 1,
                    'verbosity': 0,
                    'objective': 'binary:logistic',
                    'eval_metric': 'auc'}
        hist_res = {}
@ -340,6 +339,7 @@ class TestTreeMethod:
        assert get_score(config_0) == get_score(config_1)
        with pytest.warns(Warning, match="Model format is default to UBJSON"):
            raw_booster = booster_1.save_raw(raw_format="deprecated")
        booster_2 = xgb.Booster(model_file=raw_booster)
        config_2 = json.loads(booster_2.save_config())
--- a/tests/python/test_with_pandas.py
+++ b/tests/python/test_with_pandas.py
@ -341,7 +341,6 @@ class TestPandas:
        params = {
            "max_depth": 2,
            "eta": 1,
            "verbosity": 0,
            "objective": "binary:logistic",
            "eval_metric": "error",
        }
@ -372,7 +371,6 @@ class TestPandas:
        params = {
            "max_depth": 2,
            "eta": 1,
            "verbosity": 0,
            "objective": "binary:logistic",
            "eval_metric": "auc",
        }
@ -383,7 +381,6 @@ class TestPandas:
        params = {
            "max_depth": 2,
            "eta": 1,
            "verbosity": 0,
            "objective": "binary:logistic",
            "eval_metric": ["auc"],
        }
@ -394,7 +391,6 @@ class TestPandas:
        params = {
            "max_depth": 2,
            "eta": 1,
            "verbosity": 0,
            "objective": "binary:logistic",
            "eval_metric": ["auc"],
        }
@ -413,7 +409,6 @@ class TestPandas:
        params = {
            "max_depth": 2,
            "eta": 1,
            "verbosity": 0,
            "objective": "binary:logistic",
        }
        cv = xgb.cv(
@ -424,7 +419,6 @@ class TestPandas:
        params = {
            "max_depth": 2,
            "eta": 1,
            "verbosity": 0,
            "objective": "binary:logistic",
        }
        cv = xgb.cv(
@ -435,7 +429,6 @@ class TestPandas:
        params = {
            "max_depth": 2,
            "eta": 1,
            "verbosity": 0,
            "objective": "binary:logistic",
            "eval_metric": ["auc"],
        }
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@ -678,7 +678,6 @@ def test_split_value_histograms():
    params = {
        "max_depth": 6,
        "eta": 0.01,
        "verbosity": 0,
        "objective": "binary:logistic",
        "base_score": 0.5,
    }
@ -897,128 +896,6 @@ def test_validation_weights():
    run_validation_weights(xgb.XGBClassifier)
 def save_load_model(model_path):
    from sklearn.datasets import load_digits
    from sklearn.model_selection import KFold
    digits = load_digits(n_class=2)
    y = digits['target']
    X = digits['data']
    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
    for train_index, test_index in kf.split(X, y):
        xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
        xgb_model.save_model(model_path)
        xgb_model = xgb.XGBClassifier()
        xgb_model.load_model(model_path)
        assert isinstance(xgb_model.classes_, np.ndarray)
        np.testing.assert_equal(xgb_model.classes_, np.array([0, 1]))
        assert isinstance(xgb_model._Booster, xgb.Booster)
        preds = xgb_model.predict(X[test_index])
        labels = y[test_index]
        err = sum(1 for i in range(len(preds))
                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
        assert err < 0.1
        assert xgb_model.get_booster().attr('scikit_learn') is None
        # test native booster
        preds = xgb_model.predict(X[test_index], output_margin=True)
        booster = xgb.Booster(model_file=model_path)
        predt_1 = booster.predict(xgb.DMatrix(X[test_index]),
                                  output_margin=True)
        assert np.allclose(preds, predt_1)
        with pytest.raises(TypeError):
            xgb_model = xgb.XGBModel()
            xgb_model.load_model(model_path)
    clf = xgb.XGBClassifier(booster="gblinear", early_stopping_rounds=1)
    clf.fit(X, y, eval_set=[(X, y)])
    best_iteration = clf.best_iteration
    best_score = clf.best_score
    predt_0 = clf.predict(X)
    clf.save_model(model_path)
    clf.load_model(model_path)
    assert clf.booster == "gblinear"
    predt_1 = clf.predict(X)
    np.testing.assert_allclose(predt_0, predt_1)
    assert clf.best_iteration == best_iteration
    assert clf.best_score == best_score
    clfpkl = pickle.dumps(clf)
    clf = pickle.loads(clfpkl)
    predt_2 = clf.predict(X)
    np.testing.assert_allclose(predt_0, predt_2)
    assert clf.best_iteration == best_iteration
    assert clf.best_score == best_score
 def test_save_load_model():
    with tempfile.TemporaryDirectory() as tempdir:
        model_path = os.path.join(tempdir, "digits.model")
        save_load_model(model_path)
    with tempfile.TemporaryDirectory() as tempdir:
        model_path = os.path.join(tempdir, "digits.model.json")
        save_load_model(model_path)
    from sklearn.datasets import load_digits
    from sklearn.model_selection import train_test_split
    with tempfile.TemporaryDirectory() as tempdir:
        model_path = os.path.join(tempdir, "digits.model.ubj")
        digits = load_digits(n_class=2)
        y = digits["target"]
        X = digits["data"]
        booster = xgb.train(
            {"tree_method": "hist", "objective": "binary:logistic"},
            dtrain=xgb.DMatrix(X, y),
            num_boost_round=4,
        )
        predt_0 = booster.predict(xgb.DMatrix(X))
        booster.save_model(model_path)
        cls = xgb.XGBClassifier()
        cls.load_model(model_path)
        proba = cls.predict_proba(X)
        assert proba.shape[0] == X.shape[0]
        assert proba.shape[1] == 2  # binary
        predt_1 = cls.predict_proba(X)[:, 1]
        assert np.allclose(predt_0, predt_1)
        cls = xgb.XGBModel()
        cls.load_model(model_path)
        predt_1 = cls.predict(X)
        assert np.allclose(predt_0, predt_1)
        # mclass
        X, y = load_digits(n_class=10, return_X_y=True)
        # small test_size to force early stop
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.01, random_state=1
        )
        clf = xgb.XGBClassifier(
            n_estimators=64, tree_method="hist", early_stopping_rounds=2
        )
        clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
        score = clf.best_score
        clf.save_model(model_path)
        clf = xgb.XGBClassifier()
        clf.load_model(model_path)
        assert clf.classes_.size == 10
        assert clf.objective == "multi:softprob"
        np.testing.assert_equal(clf.classes_, np.arange(10))
        assert clf.n_classes_ == 10
        assert clf.best_iteration == 27
        assert clf.best_score == score
 def test_RFECV():
    from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris
    from sklearn.feature_selection import RFECV