Save model in ubj as the default. (#9947)

2024-01-05 17:53:36 +08:00
parent c03a4d5088
commit 38dd91f491
23 changed files with 598 additions and 550 deletions
--- a/jvm-packages/xgboost4j-spark/src/main/scala/org/apache/spark/ml/util/XGBoostReadWrite.scala
+++ b/jvm-packages/xgboost4j-spark/src/main/scala/org/apache/spark/ml/util/XGBoostReadWrite.scala
@@ -30,9 +30,6 @@ import org.apache.spark.ml.param.Params
 import org.apache.spark.ml.util.DefaultParamsReader.Metadata

 abstract class XGBoostWriter extends MLWriter {
-
-  /** Currently it's using the "deprecated" format as
-   * default, which will be changed into `ubj` in future releases. */
  def getModelFormat(): String = {
    optionMap.getOrElse("format", JBooster.DEFAULT_FORMAT)
  }
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
@@ -1,5 +1,5 @@
 /*
- Copyright (c) 2014-2022 by Contributors
+ Copyright (c) 2014-2024 by Contributors

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -432,6 +432,7 @@ class XGBoostClassifierSuite extends AnyFunSuite with PerTest with TmpFolderPerS
    val xgb = new XGBoostClassifier(paramMap)
    val model = xgb.fit(trainingDF)

+    // test json
    val modelPath = new File(tempDir.toFile, "xgbc").getPath
    model.write.option("format", "json").save(modelPath)
    val nativeJsonModelPath = new File(tempDir.toFile, "nativeModel.json").getPath
@@ -439,21 +440,21 @@ class XGBoostClassifierSuite extends AnyFunSuite with PerTest with TmpFolderPerS
    assert(compareTwoFiles(new File(modelPath, "data/XGBoostClassificationModel").getPath,
      nativeJsonModelPath))

-    // test default "deprecated"
+    // test ubj
    val modelUbjPath = new File(tempDir.toFile, "xgbcUbj").getPath
    model.write.save(modelUbjPath)
-    val nativeDeprecatedModelPath = new File(tempDir.toFile, "nativeModel").getPath
-    model.nativeBooster.saveModel(nativeDeprecatedModelPath)
+    val nativeUbjModelPath = new File(tempDir.toFile, "nativeModel.ubj").getPath
+    model.nativeBooster.saveModel(nativeUbjModelPath)
    assert(compareTwoFiles(new File(modelUbjPath, "data/XGBoostClassificationModel").getPath,
-      nativeDeprecatedModelPath))
+      nativeUbjModelPath))

    // json file should be indifferent with ubj file
    val modelJsonPath = new File(tempDir.toFile, "xgbcJson").getPath
    model.write.option("format", "json").save(modelJsonPath)
-    val nativeUbjModelPath = new File(tempDir.toFile, "nativeModel1.ubj").getPath
-    model.nativeBooster.saveModel(nativeUbjModelPath)
+    val nativeUbjModelPath1 = new File(tempDir.toFile, "nativeModel1.ubj").getPath
+    model.nativeBooster.saveModel(nativeUbjModelPath1)
    assert(!compareTwoFiles(new File(modelJsonPath, "data/XGBoostClassificationModel").getPath,
-      nativeUbjModelPath))
+      nativeUbjModelPath1))
  }

  test("native json model file should store feature_name and feature_type") {
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala
@@ -1,5 +1,5 @@
 /*
- Copyright (c) 2014-2022 by Contributors
+ Copyright (c) 2014-2024 by Contributors

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -333,21 +333,24 @@ class XGBoostRegressorSuite extends AnyFunSuite with PerTest with TmpFolderPerSu
    assert(compareTwoFiles(new File(modelPath, "data/XGBoostRegressionModel").getPath,
      nativeJsonModelPath))

-    // test default "deprecated"
+    // test default "ubj"
    val modelUbjPath = new File(tempDir.toFile, "xgbcUbj").getPath
    model.write.save(modelUbjPath)
-    val nativeDeprecatedModelPath = new File(tempDir.toFile, "nativeModel").getPath
-    model.nativeBooster.saveModel(nativeDeprecatedModelPath)
-    assert(compareTwoFiles(new File(modelUbjPath, "data/XGBoostRegressionModel").getPath,
-      nativeDeprecatedModelPath))

-    // json file should be indifferent with ubj file
-    val modelJsonPath = new File(tempDir.toFile, "xgbcJson").getPath
-    model.write.option("format", "json").save(modelJsonPath)
-    val nativeUbjModelPath = new File(tempDir.toFile, "nativeModel1.ubj").getPath
+    val nativeUbjModelPath = new File(tempDir.toFile, "nativeModel.ubj").getPath
    model.nativeBooster.saveModel(nativeUbjModelPath)
-    assert(!compareTwoFiles(new File(modelJsonPath, "data/XGBoostRegressionModel").getPath,
-      nativeUbjModelPath))
-  }

+    assert(compareTwoFiles(new File(modelUbjPath, "data/XGBoostRegressionModel").getPath,
+      nativeUbjModelPath))
+
+    // test the deprecated format
+    val modelDeprecatedPath = new File(tempDir.toFile, "modelDeprecated").getPath
+    model.write.option("format", "deprecated").save(modelDeprecatedPath)
+
+    val nativeDeprecatedModelPath = new File(tempDir.toFile, "nativeModel.deprecated").getPath
+    model.nativeBooster.saveModel(nativeDeprecatedModelPath)
+
+    assert(compareTwoFiles(new File(modelDeprecatedPath, "data/XGBoostRegressionModel").getPath,
+      nativeDeprecatedModelPath))
+  }
 }
--- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java
+++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java
@@ -34,7 +34,7 @@ import org.apache.commons.logging.LogFactory;
 * Booster for xgboost, this is a model API that support interactive build of a XGBoost Model
 */
 public class Booster implements Serializable, KryoSerializable {
-  public static final String DEFAULT_FORMAT = "deprecated";
+  public static final String DEFAULT_FORMAT = "ubj";
  private static final Log logger = LogFactory.getLog(Booster.class);
  // handle to the booster.
  private long handle = 0;
@@ -788,8 +788,7 @@ public class Booster implements Serializable, KryoSerializable {
  }

  /**
-   * Save model into raw byte array. Currently it's using the deprecated format as
-   * default, which will be changed into `ubj` in future releases.
+   * Save model into raw byte array in the UBJSON ("ubj") format.
   *
   * @return the saved byte array
   * @throws XGBoostError native error
--- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/Booster.scala
+++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/Booster.scala
@@ -337,8 +337,7 @@ class Booster private[xgboost4j](private[xgboost4j] var booster: JBooster)
  }

  /**
-    * Save model into a raw byte array. Currently it's using the deprecated format as
-   *  default, which will be changed into `ubj` in future releases.
+    * Save model into a raw byte array in the UBJSON ("ubj") format.
    */
  @throws(classOf[XGBoostError])
  def toByteArray: Array[Byte] = {
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -2613,7 +2613,7 @@ class Booster:
        else:
            raise TypeError("fname must be a string or os PathLike")

-    def save_raw(self, raw_format: str = "deprecated") -> bytearray:
+    def save_raw(self, raw_format: str = "ubj") -> bytearray:
        """Save the model to a in memory buffer representation instead of file.

        Parameters
--- a/python-package/xgboost/testing/init.py
+++ b/python-package/xgboost/testing/init.py
@@ -630,7 +630,7 @@ sparse_datasets_strategy = strategies.sampled_from(

 def make_datasets_with_margin(
    unweighted_strategy: strategies.SearchStrategy,
-) -> Callable:
+) -> Callable[[], strategies.SearchStrategy[TestDataset]]:
    """Factory function for creating strategies that generates datasets with weight and
    base margin.

@@ -668,8 +668,7 @@ def make_datasets_with_margin(

 # A strategy for drawing from a set of example datasets. May add random weights to the
 # dataset
-@memory.cache
-def make_dataset_strategy() -> Callable:
+def make_dataset_strategy() -> strategies.SearchStrategy[TestDataset]:
    _unweighted_datasets_strategy = strategies.sampled_from(
        [
            TestDataset(
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -1313,10 +1313,8 @@ XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char* fname) {

 namespace {
 void WarnOldModel() {
-  if (XGBOOST_VER_MAJOR >= 2) {
-    LOG(WARNING) << "Saving into deprecated binary model format, please consider using `json` or "
-                    "`ubj`. Model format will default to JSON in XGBoost 2.2 if not specified.";
-  }
+  LOG(WARNING) << "Saving into deprecated binary model format, please consider using `json` or "
+                  "`ubj`. Model format is default to UBJSON in XGBoost 2.1 if not specified.";
 }
 }  // anonymous namespace

@@ -1339,14 +1337,14 @@ XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char *fname) {
    save_json(std::ios::out);
  } else if (common::FileExtension(fname) == "ubj") {
    save_json(std::ios::binary);
-  } else if (XGBOOST_VER_MAJOR == 2 && XGBOOST_VER_MINOR >= 2) {
-    LOG(WARNING) << "Saving model to JSON as default.  You can use file extension `json`, `ubj` or "
-                    "`deprecated` to choose between formats.";
-    save_json(std::ios::out);
-  } else {
+  } else if (common::FileExtension(fname) == "deprecated") {
    WarnOldModel();
    auto *bst = static_cast<Learner *>(handle);
    bst->SaveModel(fo.get());
+  } else {
+    LOG(WARNING) << "Saving model in the UBJSON format as default.  You can use file extension:"
+                    " `json`, `ubj` or `deprecated` to choose between formats.";
+    save_json(std::ios::binary);
  }
  API_END();
 }
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@@ -27,6 +27,7 @@ class LintersPaths:
        "tests/python/test_quantile_dmatrix.py",
        "tests/python/test_tree_regularization.py",
        "tests/python/test_shap.py",
+        "tests/python/test_model_io.py",
        "tests/python/test_with_pandas.py",
        "tests/python-gpu/",
        "tests/python-sycl/",
@@ -83,6 +84,7 @@ class LintersPaths:
        "tests/python/test_multi_target.py",
        "tests/python-gpu/test_gpu_data_iterator.py",
        "tests/python-gpu/load_pickle.py",
+        "tests/python/test_model_io.py",
        "tests/test_distributed/test_with_spark/test_data.py",
        "tests/test_distributed/test_gpu_with_spark/test_data.py",
        "tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",
--- a/tests/python/test_basic.py
+++ b/tests/python/test_basic.py
@@ -10,46 +10,48 @@ import pytest
 import xgboost as xgb
 from xgboost import testing as tm

-dpath = 'demo/data/'
+dpath = "demo/data/"
 rng = np.random.RandomState(1994)


 class TestBasic:
    def test_compat(self):
        from xgboost.compat import lazy_isinstance
+
        a = np.array([1, 2, 3])
-        assert lazy_isinstance(a, 'numpy', 'ndarray')
-        assert not lazy_isinstance(a, 'numpy', 'dataframe')
+        assert lazy_isinstance(a, "numpy", "ndarray")
+        assert not lazy_isinstance(a, "numpy", "dataframe")

    def test_basic(self):
        dtrain, dtest = tm.load_agaricus(__file__)
-        param = {'max_depth': 2, 'eta': 1,
-                 'objective': 'binary:logistic'}
+        param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
        # specify validations set to watch performance
-        watchlist = [(dtrain, 'train')]
+        watchlist = [(dtrain, "train")]
        num_round = 2
-        bst = xgb.train(param, dtrain, num_round, watchlist, verbose_eval=True)
+        bst = xgb.train(param, dtrain, num_round, evals=watchlist, verbose_eval=True)

        preds = bst.predict(dtrain)
        labels = dtrain.get_label()
-        err = sum(1 for i in range(len(preds))
-                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
+        err = sum(
+            1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
+        ) / float(len(preds))
        # error must be smaller than 10%
        assert err < 0.1

        preds = bst.predict(dtest)
        labels = dtest.get_label()
-        err = sum(1 for i in range(len(preds))
-                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
+        err = sum(
+            1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
+        ) / float(len(preds))
        # error must be smaller than 10%
        assert err < 0.1

        with tempfile.TemporaryDirectory() as tmpdir:
-            dtest_path = os.path.join(tmpdir, 'dtest.dmatrix')
+            dtest_path = os.path.join(tmpdir, "dtest.dmatrix")
            # save dmatrix into binary buffer
            dtest.save_binary(dtest_path)
            # save model
-            model_path = os.path.join(tmpdir, 'model.booster')
+            model_path = os.path.join(tmpdir, "model.ubj")
            bst.save_model(model_path)
            # load model and data in
            bst2 = xgb.Booster(model_file=model_path)
@@ -59,17 +61,21 @@ class TestBasic:
            assert np.sum(np.abs(preds2 - preds)) == 0

    def test_metric_config(self):
-        # Make sure that the metric configuration happens in booster so the
-        # string `['error', 'auc']` doesn't get passed down to core.
+        # Make sure that the metric configuration happens in booster so the string
+        # `['error', 'auc']` doesn't get passed down to core.
        dtrain, dtest = tm.load_agaricus(__file__)
-        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
-                 'objective': 'binary:logistic', 'eval_metric': ['error', 'auc']}
-        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
+        param = {
+            "max_depth": 2,
+            "eta": 1,
+            "objective": "binary:logistic",
+            "eval_metric": ["error", "auc"],
+        }
+        watchlist = [(dtest, "eval"), (dtrain, "train")]
        num_round = 2
-        booster = xgb.train(param, dtrain, num_round, watchlist)
+        booster = xgb.train(param, dtrain, num_round, evals=watchlist)
        predt_0 = booster.predict(dtrain)
        with tempfile.TemporaryDirectory() as tmpdir:
-            path = os.path.join(tmpdir, 'model.json')
+            path = os.path.join(tmpdir, "model.json")
            booster.save_model(path)

            booster = xgb.Booster(params=param, model_file=path)
@@ -78,22 +84,23 @@ class TestBasic:

    def test_multiclass(self):
        dtrain, dtest = tm.load_agaricus(__file__)
-        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'num_class': 2}
+        param = {"max_depth": 2, "eta": 1, "num_class": 2}
        # specify validations set to watch performance
-        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
+        watchlist = [(dtest, "eval"), (dtrain, "train")]
        num_round = 2
-        bst = xgb.train(param, dtrain, num_round, watchlist)
+        bst = xgb.train(param, dtrain, num_round, evals=watchlist)
        # this is prediction
        preds = bst.predict(dtest)
        labels = dtest.get_label()
-        err = sum(1 for i in range(len(preds))
-                  if preds[i] != labels[i]) / float(len(preds))
+        err = sum(1 for i in range(len(preds)) if preds[i] != labels[i]) / float(
+            len(preds)
+        )
        # error must be smaller than 10%
        assert err < 0.1

        with tempfile.TemporaryDirectory() as tmpdir:
-            dtest_path = os.path.join(tmpdir, 'dtest.buffer')
-            model_path = os.path.join(tmpdir, 'xgb.model')
+            dtest_path = os.path.join(tmpdir, "dtest.buffer")
+            model_path = os.path.join(tmpdir, "model.ubj")
            # save dmatrix into binary buffer
            dtest.save_binary(dtest_path)
            # save model
@@ -108,33 +115,39 @@ class TestBasic:
    def test_dump(self):
        data = np.random.randn(100, 2)
        target = np.array([0, 1] * 50)
-        features = ['Feature1', 'Feature2']
+        features = ["Feature1", "Feature2"]

        dm = xgb.DMatrix(data, label=target, feature_names=features)
-        params = {'objective': 'binary:logistic',
-                  'eval_metric': 'logloss',
-                  'eta': 0.3,
-                  'max_depth': 1}
+        params = {
+            "objective": "binary:logistic",
+            "eval_metric": "logloss",
+            "eta": 0.3,
+            "max_depth": 1,
+        }

        bst = xgb.train(params, dm, num_boost_round=1)

        # number of feature importances should == number of features
        dump1 = bst.get_dump()
-        assert len(dump1) == 1, 'Expected only 1 tree to be dumped.'
-        len(dump1[0].splitlines()) == 3, 'Expected 1 root and 2 leaves - 3 lines in dump.'
+        assert len(dump1) == 1, "Expected only 1 tree to be dumped."
+        len(
+            dump1[0].splitlines()
+        ) == 3, "Expected 1 root and 2 leaves - 3 lines in dump."

        dump2 = bst.get_dump(with_stats=True)
-        assert dump2[0].count('\n') == 3, 'Expected 1 root and 2 leaves - 3 lines in dump.'
-        msg = 'Expected more info when with_stats=True is given.'
-        assert dump2[0].find('\n') > dump1[0].find('\n'), msg
+        assert (
+            dump2[0].count("\n") == 3
+        ), "Expected 1 root and 2 leaves - 3 lines in dump."
+        msg = "Expected more info when with_stats=True is given."
+        assert dump2[0].find("\n") > dump1[0].find("\n"), msg

        dump3 = bst.get_dump(dump_format="json")
        dump3j = json.loads(dump3[0])
-        assert dump3j['nodeid'] == 0, 'Expected the root node on top.'
+        assert dump3j["nodeid"] == 0, "Expected the root node on top."

        dump4 = bst.get_dump(dump_format="json", with_stats=True)
        dump4j = json.loads(dump4[0])
-        assert 'gain' in dump4j, "Expected 'gain' to be dumped in JSON."
+        assert "gain" in dump4j, "Expected 'gain' to be dumped in JSON."

        with pytest.raises(ValueError):
            bst.get_dump(fmap="foo")
@@ -163,12 +176,14 @@ class TestBasic:

    def test_load_file_invalid(self):
        with pytest.raises(xgb.core.XGBoostError):
-            xgb.Booster(model_file='incorrect_path')
+            xgb.Booster(model_file="incorrect_path")

        with pytest.raises(xgb.core.XGBoostError):
-            xgb.Booster(model_file=u'不正なパス')
+            xgb.Booster(model_file="不正なパス")

-    @pytest.mark.parametrize("path", ["모델.ubj", "がうる・ぐら.json"], ids=["path-0", "path-1"])
+    @pytest.mark.parametrize(
+        "path", ["모델.ubj", "がうる・ぐら.json"], ids=["path-0", "path-1"]
+    )
    def test_unicode_path(self, tmpdir, path):
        model_path = pathlib.Path(tmpdir) / path
        dtrain, _ = tm.load_agaricus(__file__)
@@ -180,12 +195,11 @@ class TestBasic:
        assert bst.get_dump(dump_format="text") == bst2.get_dump(dump_format="text")

    def test_dmatrix_numpy_init_omp(self):
-
        rows = [1000, 11326, 15000]
        cols = 50
        for row in rows:
            X = np.random.randn(row, cols)
-            y = np.random.randn(row).astype('f')
+            y = np.random.randn(row).astype("f")
            dm = xgb.DMatrix(X, y, nthread=0)
            np.testing.assert_array_equal(dm.get_label(), y)
            assert dm.num_row() == row
@@ -198,8 +212,7 @@ class TestBasic:

    def test_cv(self):
        dm, _ = tm.load_agaricus(__file__)
-        params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
-                  'objective': 'binary:logistic'}
+        params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}

        # return np.ndarray
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False)
@@ -208,19 +221,18 @@ class TestBasic:

    def test_cv_no_shuffle(self):
        dm, _ = tm.load_agaricus(__file__)
-        params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
-                  'objective': 'binary:logistic'}
+        params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}

        # return np.ndarray
-        cv = xgb.cv(params, dm, num_boost_round=10, shuffle=False, nfold=10,
-                    as_pandas=False)
+        cv = xgb.cv(
+            params, dm, num_boost_round=10, shuffle=False, nfold=10, as_pandas=False
+        )
        assert isinstance(cv, dict)
        assert len(cv) == (4)

    def test_cv_explicit_fold_indices(self):
        dm, _ = tm.load_agaricus(__file__)
-        params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
-                  'binary:logistic'}
+        params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
        folds = [
            # Train        Test
            ([1, 3], [5, 8]),
@@ -228,15 +240,13 @@ class TestBasic:
        ]

        # return np.ndarray
-        cv = xgb.cv(params, dm, num_boost_round=10, folds=folds,
-                    as_pandas=False)
+        cv = xgb.cv(params, dm, num_boost_round=10, folds=folds, as_pandas=False)
        assert isinstance(cv, dict)
        assert len(cv) == (4)

    @pytest.mark.skipif(**tm.skip_s390x())
    def test_cv_explicit_fold_indices_labels(self):
-        params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
-                  'reg:squarederror'}
+        params = {"max_depth": 2, "eta": 1, "objective": "reg:squarederror"}
        N = 100
        F = 3
        dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N))
@@ -252,9 +262,10 @@ class TestBasic:
                super().__init__()

            def after_iteration(
-                self, model,
+                self,
+                model,
                epoch: int,
-                evals_log: xgb.callback.TrainingCallback.EvalsLog
+                evals_log: xgb.callback.TrainingCallback.EvalsLog,
            ):
                print([fold.dtest.get_label() for fold in model.cvfolds])

@@ -263,12 +274,18 @@ class TestBasic:
        # Run cross validation and capture standard out to test callback result
        with tm.captured_output() as (out, err):
            xgb.cv(
-                params, dm, num_boost_round=1, folds=folds, callbacks=[cb],
-                as_pandas=False
+                params,
+                dm,
+                num_boost_round=1,
+                folds=folds,
+                callbacks=[cb],
+                as_pandas=False,
            )
            output = out.getvalue().strip()
-        solution = ('[array([5., 8.], dtype=float32), array([23., 43., 11.],' +
-                    ' dtype=float32)]')
+        solution = (
+            "[array([5., 8.], dtype=float32), array([23., 43., 11.],"
+            + " dtype=float32)]"
+        )
        assert output == solution


@@ -285,7 +302,7 @@ class TestBasicPathLike:
        """Saving to a binary file using pathlib from a DMatrix."""
        data = np.random.randn(100, 2)
        target = np.array([0, 1] * 50)
-        features = ['Feature1', 'Feature2']
+        features = ["Feature1", "Feature2"]

        dm = xgb.DMatrix(data, label=target, feature_names=features)

@@ -299,42 +316,3 @@ class TestBasicPathLike:
        """An invalid model_file path should raise XGBoostError."""
        with pytest.raises(xgb.core.XGBoostError):
            xgb.Booster(model_file=Path("invalidpath"))
-
-    def test_Booster_save_and_load(self):
-        """Saving and loading model files from paths."""
-        save_path = Path("saveload.model")
-
-        data = np.random.randn(100, 2)
-        target = np.array([0, 1] * 50)
-        features = ['Feature1', 'Feature2']
-
-        dm = xgb.DMatrix(data, label=target, feature_names=features)
-        params = {'objective': 'binary:logistic',
-                  'eval_metric': 'logloss',
-                  'eta': 0.3,
-                  'max_depth': 1}
-
-        bst = xgb.train(params, dm, num_boost_round=1)
-
-        # save, assert exists
-        bst.save_model(save_path)
-        assert save_path.exists()
-
-        def dump_assertions(dump):
-            """Assertions for the expected dump from Booster"""
-            assert len(dump) == 1, 'Exepcted only 1 tree to be dumped.'
-            assert len(dump[0].splitlines()) == 3, 'Expected 1 root and 2 leaves - 3 lines.'
-
-        # load the model again using Path
-        bst2 = xgb.Booster(model_file=save_path)
-        dump2 = bst2.get_dump()
-        dump_assertions(dump2)
-
-        # load again using load_model
-        bst3 = xgb.Booster()
-        bst3.load_model(save_path)
-        dump3 = bst3.get_dump()
-        dump_assertions(dump3)
-
-        # remove file
-        Path.unlink(save_path)
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@@ -15,33 +15,9 @@ dpath = tm.data_dir(__file__)
 rng = np.random.RandomState(1994)


-def json_model(model_path: str, parameters: dict) -> dict:
-    datasets = pytest.importorskip("sklearn.datasets")
-
-    X, y = datasets.make_classification(64, n_features=8, n_classes=3, n_informative=6)
-    if parameters.get("objective", None) == "multi:softmax":
-        parameters["num_class"] = 3
-
-    dm1 = xgb.DMatrix(X, y)
-
-    bst = xgb.train(parameters, dm1)
-    bst.save_model(model_path)
-
-    if model_path.endswith("ubj"):
-        import ubjson
-
-        with open(model_path, "rb") as ubjfd:
-            model = ubjson.load(ubjfd)
-    else:
-        with open(model_path, "r") as fd:
-            model = json.load(fd)
-
-    return model
-
-
 class TestModels:
    def test_glm(self):
-        param = {'verbosity': 0, 'objective': 'binary:logistic',
+        param = {'objective': 'binary:logistic',
                 'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1,
                 'nthread': 1}
        dtrain, dtest = tm.load_agaricus(__file__)
@@ -73,7 +49,7 @@ class TestModels:

        with tempfile.TemporaryDirectory() as tmpdir:
            dtest_path = os.path.join(tmpdir, 'dtest.dmatrix')
-            model_path = os.path.join(tmpdir, 'xgboost.model.dart')
+            model_path = os.path.join(tmpdir, "xgboost.model.dart.ubj")
            # save dmatrix into binary buffer
            dtest.save_binary(dtest_path)
            model_path = model_path
@@ -101,7 +77,6 @@ class TestModels:

        # check whether sample_type and normalize_type work
        num_round = 50
-        param['verbosity'] = 0
        param['learning_rate'] = 0.1
        param['rate_drop'] = 0.1
        preds_list = []
@@ -214,8 +189,7 @@ class TestModels:
        assert set(evals_result['eval'].keys()) == {'auc', 'error', 'logloss'}

    def test_fpreproc(self):
-        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
-                 'objective': 'binary:logistic'}
+        param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
        num_round = 2

        def fpreproc(dtrain, dtest, param):
@@ -229,8 +203,7 @@ class TestModels:
               metrics={'auc'}, seed=0, fpreproc=fpreproc)

    def test_show_stdv(self):
-        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
-                 'objective': 'binary:logistic'}
+        param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
        num_round = 2
        dtrain, _ = tm.load_agaricus(__file__)
        xgb.cv(param, dtrain, num_round, nfold=5,
@@ -273,142 +246,6 @@ class TestModels:
        bst = xgb.train([], dm2)
        bst.predict(dm2)  # success

-    def test_model_binary_io(self):
-        model_path = 'test_model_binary_io.bin'
-        parameters = {'tree_method': 'hist', 'booster': 'gbtree',
-                      'scale_pos_weight': '0.5'}
-        X = np.random.random((10, 3))
-        y = np.random.random((10,))
-        dtrain = xgb.DMatrix(X, y)
-        bst = xgb.train(parameters, dtrain, num_boost_round=2)
-        bst.save_model(model_path)
-        bst = xgb.Booster(model_file=model_path)
-        os.remove(model_path)
-        config = json.loads(bst.save_config())
-        assert float(config['learner']['objective'][
-            'reg_loss_param']['scale_pos_weight']) == 0.5
-
-        buf = bst.save_raw()
-        from_raw = xgb.Booster()
-        from_raw.load_model(buf)
-
-        buf_from_raw = from_raw.save_raw()
-        assert buf == buf_from_raw
-
-    def run_model_json_io(self, parameters: dict, ext: str) -> None:
-        if ext == "ubj" and tm.no_ubjson()["condition"]:
-            pytest.skip(tm.no_ubjson()["reason"])
-
-        loc = locale.getpreferredencoding(False)
-        model_path = 'test_model_json_io.' + ext
-        j_model = json_model(model_path, parameters)
-        assert isinstance(j_model['learner'], dict)
-
-        bst = xgb.Booster(model_file=model_path)
-
-        bst.save_model(fname=model_path)
-        if ext == "ubj":
-            import ubjson
-            with open(model_path, "rb") as ubjfd:
-                j_model = ubjson.load(ubjfd)
-        else:
-            with open(model_path, 'r') as fd:
-                j_model = json.load(fd)
-
-        assert isinstance(j_model['learner'], dict)
-
-        os.remove(model_path)
-        assert locale.getpreferredencoding(False) == loc
-
-        json_raw = bst.save_raw(raw_format="json")
-        from_jraw = xgb.Booster()
-        from_jraw.load_model(json_raw)
-
-        ubj_raw = bst.save_raw(raw_format="ubj")
-        from_ubjraw = xgb.Booster()
-        from_ubjraw.load_model(ubj_raw)
-
-        if parameters.get("multi_strategy", None) != "multi_output_tree":
-            # old binary model is not supported.
-            old_from_json = from_jraw.save_raw(raw_format="deprecated")
-            old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
-
-            assert old_from_json == old_from_ubj
-
-        raw_json = bst.save_raw(raw_format="json")
-        pretty = json.dumps(json.loads(raw_json), indent=2) + "\n\n"
-        bst.load_model(bytearray(pretty, encoding="ascii"))
-
-        if parameters.get("multi_strategy", None) != "multi_output_tree":
-            # old binary model is not supported.
-            old_from_json = from_jraw.save_raw(raw_format="deprecated")
-            old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
-
-            assert old_from_json == old_from_ubj
-
-        rng = np.random.default_rng()
-        X = rng.random(size=from_jraw.num_features() * 10).reshape(
-            (10, from_jraw.num_features())
-        )
-        predt_from_jraw = from_jraw.predict(xgb.DMatrix(X))
-        predt_from_bst = bst.predict(xgb.DMatrix(X))
-        np.testing.assert_allclose(predt_from_jraw, predt_from_bst)
-
-    @pytest.mark.parametrize("ext", ["json", "ubj"])
-    def test_model_json_io(self, ext: str) -> None:
-        parameters = {"booster": "gbtree", "tree_method": "hist"}
-        self.run_model_json_io(parameters, ext)
-        parameters = {
-            "booster": "gbtree",
-            "tree_method": "hist",
-            "multi_strategy": "multi_output_tree",
-            "objective": "multi:softmax",
-        }
-        self.run_model_json_io(parameters, ext)
-        parameters = {"booster": "gblinear"}
-        self.run_model_json_io(parameters, ext)
-        parameters = {"booster": "dart", "tree_method": "hist"}
-        self.run_model_json_io(parameters, ext)
-
-    @pytest.mark.skipif(**tm.no_json_schema())
-    def test_json_io_schema(self):
-        import jsonschema
-        model_path = 'test_json_schema.json'
-        path = os.path.dirname(
-            os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-        doc = os.path.join(path, 'doc', 'model.schema')
-        with open(doc, 'r') as fd:
-            schema = json.load(fd)
-        parameters = {'tree_method': 'hist', 'booster': 'gbtree'}
-        jsonschema.validate(instance=json_model(model_path, parameters),
-                            schema=schema)
-        os.remove(model_path)
-
-        parameters = {'tree_method': 'hist', 'booster': 'dart'}
-        jsonschema.validate(instance=json_model(model_path, parameters),
-                            schema=schema)
-        os.remove(model_path)
-
-        try:
-            dtrain, _ = tm.load_agaricus(__file__)
-            xgb.train({'objective': 'foo'}, dtrain, num_boost_round=1)
-        except ValueError as e:
-            e_str = str(e)
-            beg = e_str.find('Objective candidate')
-            end = e_str.find('Stack trace')
-            e_str = e_str[beg: end]
-            e_str = e_str.strip()
-            splited = e_str.splitlines()
-            objectives = [s.split(': ')[1] for s in splited]
-            j_objectives = schema['properties']['learner']['properties'][
-                'objective']['oneOf']
-            objectives_from_schema = set()
-            for j_obj in j_objectives:
-                objectives_from_schema.add(
-                    j_obj['properties']['name']['const'])
-            objectives = set(objectives)
-            assert objectives == objectives_from_schema
-
    @pytest.mark.skipif(**tm.no_json_schema())
    def test_json_dump_schema(self):
        import jsonschema
@@ -470,29 +307,6 @@ class TestModels:
        for d in text_dump:
            assert d.find(r"feature \"2\"") != -1

-    def test_categorical_model_io(self):
-        X, y = tm.make_categorical(256, 16, 71, False)
-        Xy = xgb.DMatrix(X, y, enable_categorical=True)
-        booster = xgb.train({"tree_method": "approx"}, Xy, num_boost_round=16)
-        predt_0 = booster.predict(Xy)
-
-        with tempfile.TemporaryDirectory() as tempdir:
-            path = os.path.join(tempdir, "model.binary")
-            with pytest.raises(ValueError, match=r".*JSON/UBJSON.*"):
-                booster.save_model(path)
-
-            path = os.path.join(tempdir, "model.json")
-            booster.save_model(path)
-            booster = xgb.Booster(model_file=path)
-            predt_1 = booster.predict(Xy)
-            np.testing.assert_allclose(predt_0, predt_1)
-
-            path = os.path.join(tempdir, "model.ubj")
-            booster.save_model(path)
-            booster = xgb.Booster(model_file=path)
-            predt_1 = booster.predict(Xy)
-            np.testing.assert_allclose(predt_0, predt_1)
-
    @pytest.mark.skipif(**tm.no_sklearn())
    def test_attributes(self):
        from sklearn.datasets import load_iris
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@@ -278,14 +278,18 @@ class TestCallbacks:

        dtrain, dtest = tm.load_agaricus(__file__)

-        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
+        watchlist = [(dtest, "eval"), (dtrain, "train")]
        num_round = 4

        # learning_rates as a list
        # init eta with 0 to check whether learning_rates work
-        param = {'max_depth': 2, 'eta': 0, 'verbosity': 0,
-                 'objective': 'binary:logistic', 'eval_metric': 'error',
-                 'tree_method': tree_method}
+        param = {
+            "max_depth": 2,
+            "eta": 0,
+            "objective": "binary:logistic",
+            "eval_metric": "error",
+            "tree_method": tree_method,
+        }
        evals_result = {}
        bst = xgb.train(
            param,
@@ -295,15 +299,19 @@ class TestCallbacks:
            callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],
            evals_result=evals_result,
        )
-        eval_errors_0 = list(map(float, evals_result['eval']['error']))
+        eval_errors_0 = list(map(float, evals_result["eval"]["error"]))
        assert isinstance(bst, xgb.core.Booster)
        # validation error should decrease, if eta > 0
        assert eval_errors_0[0] > eval_errors_0[-1]

        # init learning_rate with 0 to check whether learning_rates work
-        param = {'max_depth': 2, 'learning_rate': 0, 'verbosity': 0,
-                 'objective': 'binary:logistic', 'eval_metric': 'error',
-                 'tree_method': tree_method}
+        param = {
+            "max_depth": 2,
+            "learning_rate": 0,
+            "objective": "binary:logistic",
+            "eval_metric": "error",
+            "tree_method": tree_method,
+        }
        evals_result = {}

        bst = xgb.train(
@@ -314,15 +322,17 @@ class TestCallbacks:
            callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],
            evals_result=evals_result,
        )
-        eval_errors_1 = list(map(float, evals_result['eval']['error']))
+        eval_errors_1 = list(map(float, evals_result["eval"]["error"]))
        assert isinstance(bst, xgb.core.Booster)
        # validation error should decrease, if learning_rate > 0
        assert eval_errors_1[0] > eval_errors_1[-1]

        # check if learning_rates override default value of eta/learning_rate
        param = {
-            'max_depth': 2, 'verbosity': 0, 'objective': 'binary:logistic',
-            'eval_metric': 'error', 'tree_method': tree_method
+            "max_depth": 2,
+            "objective": "binary:logistic",
+            "eval_metric": "error",
+            "tree_method": tree_method,
        }
        evals_result = {}
        bst = xgb.train(
--- a/tests/python/test_config.py
+++ b/tests/python/test_config.py
@@ -12,6 +12,7 @@ def test_global_config_verbosity(verbosity_level):
        return xgb.get_config()["verbosity"]

    old_verbosity = get_current_verbosity()
+    assert old_verbosity == 1
    with xgb.config_context(verbosity=verbosity_level):
        new_verbosity = get_current_verbosity()
        assert new_verbosity == verbosity_level
@@ -30,7 +31,10 @@ def test_global_config_use_rmm(use_rmm):
    assert old_use_rmm_flag == get_current_use_rmm_flag()


-def test_nested_config():
+def test_nested_config() -> None:
+    verbosity = xgb.get_config()["verbosity"]
+    assert verbosity == 1
+
    with xgb.config_context(verbosity=3):
        assert xgb.get_config()["verbosity"] == 3
        with xgb.config_context(verbosity=2):
@@ -45,13 +49,15 @@ def test_nested_config():
        with xgb.config_context(verbosity=None):
            assert xgb.get_config()["verbosity"] == 3  # None has no effect

-    verbosity = xgb.get_config()["verbosity"]
    xgb.set_config(verbosity=2)
    assert xgb.get_config()["verbosity"] == 2
    with xgb.config_context(verbosity=3):
        assert xgb.get_config()["verbosity"] == 3
    xgb.set_config(verbosity=verbosity)  # reset

+    verbosity = xgb.get_config()["verbosity"]
+    assert verbosity == 1
+

 def test_thread_safty():
    n_threads = multiprocessing.cpu_count()
--- a/tests/python/test_dmatrix.py
+++ b/tests/python/test_dmatrix.py
@@ -1,6 +1,7 @@
 import csv
 import os
 import tempfile
+import warnings

 import numpy as np
 import pytest
@@ -24,20 +25,18 @@ class TestDMatrix:
        with pytest.warns(UserWarning):
            data._warn_unused_missing("uri", 4)

-        with pytest.warns(None) as record:
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
            data._warn_unused_missing("uri", None)
            data._warn_unused_missing("uri", np.nan)

-            assert len(record) == 0
-
-        with pytest.warns(None) as record:
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
            x = rng.randn(10, 10)
            y = rng.randn(10)

            xgb.DMatrix(x, y, missing=4)

-            assert len(record) == 0
-
    def test_dmatrix_numpy_init(self):
        data = np.random.randn(5, 5)
        dm = xgb.DMatrix(data)
@@ -264,7 +263,7 @@ class TestDMatrix:
        dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow))
        assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol)
        watchlist = [(dtrain, "train")]
-        param = {"max_depth": 3, "objective": "binary:logistic", "verbosity": 0}
+        param = {"max_depth": 3, "objective": "binary:logistic"}
        bst = xgb.train(param, dtrain, 5, watchlist)
        bst.predict(dtrain)

@@ -302,7 +301,7 @@ class TestDMatrix:
        dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow))
        assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol)
        watchlist = [(dtrain, "train")]
-        param = {"max_depth": 3, "objective": "binary:logistic", "verbosity": 0}
+        param = {"max_depth": 3, "objective": "binary:logistic"}
        bst = xgb.train(param, dtrain, 5, watchlist)
        bst.predict(dtrain)

@@ -475,17 +474,19 @@ class TestDMatrixColumnSplit:
    def test_uri(self):
        def verify_uri():
            rank = xgb.collective.get_rank()
-            data = np.random.rand(5, 5)
-            filename = f"test_data_{rank}.csv"
-            with open(filename, mode="w", newline="") as file:
-                writer = csv.writer(file)
-                for row in data:
-                    writer.writerow(row)
-            dtrain = xgb.DMatrix(
-                f"{filename}?format=csv", data_split_mode=DataSplitMode.COL
-            )
-            assert dtrain.num_row() == 5
-            assert dtrain.num_col() == 5 * xgb.collective.get_world_size()
+            with tempfile.TemporaryDirectory() as tmpdir:
+                filename = os.path.join(tmpdir, f"test_data_{rank}.csv")
+
+                data = np.random.rand(5, 5)
+                with open(filename, mode="w", newline="") as file:
+                    writer = csv.writer(file)
+                    for row in data:
+                        writer.writerow(row)
+                dtrain = xgb.DMatrix(
+                    f"{filename}?format=csv", data_split_mode=DataSplitMode.COL
+                )
+                assert dtrain.num_row() == 5
+                assert dtrain.num_col() == 5 * xgb.collective.get_world_size()

        tm.run_with_rabit(world_size=3, test_fn=verify_uri)

--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@@ -67,8 +67,10 @@ class TestEarlyStopping:
        X = digits['data']
        y = digits['target']
        dm = xgb.DMatrix(X, label=y)
-        params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
-                  'objective': 'binary:logistic', 'eval_metric': 'error'}
+        params = {
+            'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic',
+            'eval_metric': 'error'
+        }

        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
                    early_stopping_rounds=10)
--- a/tests/python/test_eval_metrics.py
+++ b/tests/python/test_eval_metrics.py
@@ -9,29 +9,13 @@ rng = np.random.RandomState(1337)


 class TestEvalMetrics:
-    xgb_params_01 = {
-        'verbosity': 0,
-        'nthread': 1,
-        'eval_metric': 'error'
-    }
+    xgb_params_01 = {'nthread': 1, 'eval_metric': 'error'}

-    xgb_params_02 = {
-        'verbosity': 0,
-        'nthread': 1,
-        'eval_metric': ['error']
-    }
+    xgb_params_02 = {'nthread': 1, 'eval_metric': ['error']}

-    xgb_params_03 = {
-        'verbosity': 0,
-        'nthread': 1,
-        'eval_metric': ['rmse', 'error']
-    }
+    xgb_params_03 = {'nthread': 1, 'eval_metric': ['rmse', 'error']}

-    xgb_params_04 = {
-        'verbosity': 0,
-        'nthread': 1,
-        'eval_metric': ['error', 'rmse']
-    }
+    xgb_params_04 = {'nthread': 1, 'eval_metric': ['error', 'rmse']}

    def evalerror_01(self, preds, dtrain):
        labels = dtrain.get_label()
--- a/tests/python/test_linear.py
+++ b/tests/python/test_linear.py
@@ -22,8 +22,14 @@ coord_strategy = strategies.fixed_dictionaries({

 def train_result(param, dmat, num_rounds):
    result = {}
-    xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
-              evals_result=result)
+    xgb.train(
+        param,
+        dmat,
+        num_rounds,
+        evals=[(dmat, "train")],
+        verbose_eval=False,
+        evals_result=result,
+    )
    return result


--- a/tests/python/test_model_io.py
+++ b/tests/python/test_model_io.py
@@ -0,0 +1,406 @@
+import json
+import locale
+import os
+import pickle
+import tempfile
+from pathlib import Path
+from typing import List
+
+import numpy as np
+import pytest
+
+import xgboost as xgb
+from xgboost import testing as tm
+
+
+def json_model(model_path: str, parameters: dict) -> dict:
+    datasets = pytest.importorskip("sklearn.datasets")
+
+    X, y = datasets.make_classification(64, n_features=8, n_classes=3, n_informative=6)
+    if parameters.get("objective", None) == "multi:softmax":
+        parameters["num_class"] = 3
+
+    dm1 = xgb.DMatrix(X, y)
+
+    bst = xgb.train(parameters, dm1)
+    bst.save_model(model_path)
+
+    if model_path.endswith("ubj"):
+        import ubjson
+
+        with open(model_path, "rb") as ubjfd:
+            model = ubjson.load(ubjfd)
+    else:
+        with open(model_path, "r") as fd:
+            model = json.load(fd)
+
+    return model
+
+
+class TestBoosterIO:
+    def run_model_json_io(self, parameters: dict, ext: str) -> None:
+        config = xgb.config.get_config()
+        assert config["verbosity"] == 1
+
+        if ext == "ubj" and tm.no_ubjson()["condition"]:
+            pytest.skip(tm.no_ubjson()["reason"])
+
+        loc = locale.getpreferredencoding(False)
+        model_path = "test_model_json_io." + ext
+        j_model = json_model(model_path, parameters)
+        assert isinstance(j_model["learner"], dict)
+
+        bst = xgb.Booster(model_file=model_path)
+
+        bst.save_model(fname=model_path)
+        if ext == "ubj":
+            import ubjson
+
+            with open(model_path, "rb") as ubjfd:
+                j_model = ubjson.load(ubjfd)
+        else:
+            with open(model_path, "r") as fd:
+                j_model = json.load(fd)
+
+        assert isinstance(j_model["learner"], dict)
+
+        os.remove(model_path)
+        assert locale.getpreferredencoding(False) == loc
+
+        json_raw = bst.save_raw(raw_format="json")
+        from_jraw = xgb.Booster()
+        from_jraw.load_model(json_raw)
+
+        ubj_raw = bst.save_raw(raw_format="ubj")
+        from_ubjraw = xgb.Booster()
+        from_ubjraw.load_model(ubj_raw)
+
+        if parameters.get("multi_strategy", None) != "multi_output_tree":
+            # Old binary model is not supported for vector leaf.
+            with pytest.warns(Warning, match="Model format is default to UBJSON"):
+                old_from_json = from_jraw.save_raw(raw_format="deprecated")
+                old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
+
+            assert old_from_json == old_from_ubj
+
+        raw_json = bst.save_raw(raw_format="json")
+        pretty = json.dumps(json.loads(raw_json), indent=2) + "\n\n"
+        bst.load_model(bytearray(pretty, encoding="ascii"))
+
+        if parameters.get("multi_strategy", None) != "multi_output_tree":
+            # old binary model is not supported.
+            with pytest.warns(Warning, match="Model format is default to UBJSON"):
+                old_from_json = from_jraw.save_raw(raw_format="deprecated")
+                old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
+
+            assert old_from_json == old_from_ubj
+
+        rng = np.random.default_rng()
+        X = rng.random(size=from_jraw.num_features() * 10).reshape(
+            (10, from_jraw.num_features())
+        )
+        predt_from_jraw = from_jraw.predict(xgb.DMatrix(X))
+        predt_from_bst = bst.predict(xgb.DMatrix(X))
+        np.testing.assert_allclose(predt_from_jraw, predt_from_bst)
+
+    @pytest.mark.parametrize("ext", ["json", "ubj"])
+    def test_model_json_io(self, ext: str) -> None:
+        parameters = {"booster": "gbtree", "tree_method": "hist"}
+        self.run_model_json_io(parameters, ext)
+        parameters = {
+            "booster": "gbtree",
+            "tree_method": "hist",
+            "multi_strategy": "multi_output_tree",
+            "objective": "multi:softmax",
+        }
+        self.run_model_json_io(parameters, ext)
+        parameters = {"booster": "gblinear"}
+        self.run_model_json_io(parameters, ext)
+        parameters = {"booster": "dart", "tree_method": "hist"}
+        self.run_model_json_io(parameters, ext)
+
+    def test_categorical_model_io(self) -> None:
+        X, y = tm.make_categorical(256, 16, 71, False)
+        Xy = xgb.DMatrix(X, y, enable_categorical=True)
+        booster = xgb.train({"tree_method": "approx"}, Xy, num_boost_round=16)
+        predt_0 = booster.predict(Xy)
+
+        with tempfile.TemporaryDirectory() as tempdir:
+            path = os.path.join(tempdir, "model.deprecated")
+            with pytest.raises(ValueError, match=r".*JSON/UBJSON.*"):
+                with pytest.warns(Warning, match="Model format is default to UBJSON"):
+                    booster.save_model(path)
+
+            path = os.path.join(tempdir, "model.json")
+            booster.save_model(path)
+            booster = xgb.Booster(model_file=path)
+            predt_1 = booster.predict(Xy)
+            np.testing.assert_allclose(predt_0, predt_1)
+
+            path = os.path.join(tempdir, "model.ubj")
+            booster.save_model(path)
+            booster = xgb.Booster(model_file=path)
+            predt_1 = booster.predict(Xy)
+            np.testing.assert_allclose(predt_0, predt_1)
+
+    @pytest.mark.skipif(**tm.no_json_schema())
+    def test_json_io_schema(self) -> None:
+        import jsonschema
+
+        model_path = "test_json_schema.json"
+        path = os.path.dirname(
+            os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        )
+        doc = os.path.join(path, "doc", "model.schema")
+        with open(doc, "r") as fd:
+            schema = json.load(fd)
+        parameters = {"tree_method": "hist", "booster": "gbtree"}
+        jsonschema.validate(instance=json_model(model_path, parameters), schema=schema)
+        os.remove(model_path)
+
+        parameters = {"tree_method": "hist", "booster": "dart"}
+        jsonschema.validate(instance=json_model(model_path, parameters), schema=schema)
+        os.remove(model_path)
+
+        try:
+            dtrain, _ = tm.load_agaricus(__file__)
+            xgb.train({"objective": "foo"}, dtrain, num_boost_round=1)
+        except ValueError as e:
+            e_str = str(e)
+            beg = e_str.find("Objective candidate")
+            end = e_str.find("Stack trace")
+            e_str = e_str[beg:end]
+            e_str = e_str.strip()
+            splited = e_str.splitlines()
+            objectives = [s.split(": ")[1] for s in splited]
+            j_objectives = schema["properties"]["learner"]["properties"]["objective"][
+                "oneOf"
+            ]
+            objectives_from_schema = set()
+            for j_obj in j_objectives:
+                objectives_from_schema.add(j_obj["properties"]["name"]["const"])
+            assert set(objectives) == objectives_from_schema
+
+    def test_model_binary_io(self) -> None:
+        model_path = "test_model_binary_io.deprecated"
+        parameters = {
+            "tree_method": "hist",
+            "booster": "gbtree",
+            "scale_pos_weight": "0.5",
+        }
+        X = np.random.random((10, 3))
+        y = np.random.random((10,))
+        dtrain = xgb.DMatrix(X, y)
+        bst = xgb.train(parameters, dtrain, num_boost_round=2)
+        with pytest.warns(Warning, match="Model format is default to UBJSON"):
+            bst.save_model(model_path)
+        bst = xgb.Booster(model_file=model_path)
+        os.remove(model_path)
+        config = json.loads(bst.save_config())
+        assert (
+            float(config["learner"]["objective"]["reg_loss_param"]["scale_pos_weight"])
+            == 0.5
+        )
+
+        buf = bst.save_raw()
+        from_raw = xgb.Booster()
+        from_raw.load_model(buf)
+
+        buf_from_raw = from_raw.save_raw()
+        assert buf == buf_from_raw
+
+    def test_with_pathlib(self) -> None:
+        """Saving and loading model files from paths."""
+        save_path = Path("model.ubj")
+
+        rng = np.random.default_rng(1994)
+
+        data = rng.normal(size=(100, 2))
+        target = np.array([0, 1] * 50)
+        features = ["Feature1", "Feature2"]
+
+        dm = xgb.DMatrix(data, label=target, feature_names=features)
+        params = {
+            "objective": "binary:logistic",
+            "eval_metric": "logloss",
+            "eta": 0.3,
+            "max_depth": 1,
+        }
+
+        bst = xgb.train(params, dm, num_boost_round=1)
+
+        # save, assert exists
+        bst.save_model(save_path)
+        assert save_path.exists()
+
+        def dump_assertions(dump: List[str]) -> None:
+            """Assertions for the expected dump from Booster"""
+            assert len(dump) == 1, "Exepcted only 1 tree to be dumped."
+            assert (
+                len(dump[0].splitlines()) == 3
+            ), "Expected 1 root and 2 leaves - 3 lines."
+
+        # load the model again using Path
+        bst2 = xgb.Booster(model_file=save_path)
+        dump2 = bst2.get_dump()
+        dump_assertions(dump2)
+
+        # load again using load_model
+        bst3 = xgb.Booster()
+        bst3.load_model(save_path)
+        dump3 = bst3.get_dump()
+        dump_assertions(dump3)
+
+        # remove file
+        Path.unlink(save_path)
+
+
+def save_load_model(model_path: str) -> None:
+    from sklearn.datasets import load_digits
+    from sklearn.model_selection import KFold
+
+    rng = np.random.RandomState(1994)
+
+    digits = load_digits(n_class=2)
+    y = digits["target"]
+    X = digits["data"]
+    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf.split(X, y):
+        xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
+        xgb_model.save_model(model_path)
+
+        xgb_model = xgb.XGBClassifier()
+        xgb_model.load_model(model_path)
+
+        assert isinstance(xgb_model.classes_, np.ndarray)
+        np.testing.assert_equal(xgb_model.classes_, np.array([0, 1]))
+        assert isinstance(xgb_model._Booster, xgb.Booster)
+
+        preds = xgb_model.predict(X[test_index])
+        labels = y[test_index]
+        err = sum(
+            1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
+        ) / float(len(preds))
+        assert err < 0.1
+        assert xgb_model.get_booster().attr("scikit_learn") is None
+
+        # test native booster
+        preds = xgb_model.predict(X[test_index], output_margin=True)
+        booster = xgb.Booster(model_file=model_path)
+        predt_1 = booster.predict(xgb.DMatrix(X[test_index]), output_margin=True)
+        assert np.allclose(preds, predt_1)
+
+        with pytest.raises(TypeError):
+            xgb_model = xgb.XGBModel()
+            xgb_model.load_model(model_path)
+
+    clf = xgb.XGBClassifier(booster="gblinear", early_stopping_rounds=1)
+    clf.fit(X, y, eval_set=[(X, y)])
+    best_iteration = clf.best_iteration
+    best_score = clf.best_score
+    predt_0 = clf.predict(X)
+    clf.save_model(model_path)
+    clf.load_model(model_path)
+    assert clf.booster == "gblinear"
+    predt_1 = clf.predict(X)
+    np.testing.assert_allclose(predt_0, predt_1)
+    assert clf.best_iteration == best_iteration
+    assert clf.best_score == best_score
+
+    clfpkl = pickle.dumps(clf)
+    clf = pickle.loads(clfpkl)
+    predt_2 = clf.predict(X)
+    np.testing.assert_allclose(predt_0, predt_2)
+    assert clf.best_iteration == best_iteration
+    assert clf.best_score == best_score
+
+
+@pytest.mark.skipif(**tm.no_sklearn())
+def test_sklearn_model() -> None:
+    from sklearn.datasets import load_digits
+    from sklearn.model_selection import train_test_split
+
+    with tempfile.TemporaryDirectory() as tempdir:
+        model_path = os.path.join(tempdir, "digits.deprecated")
+        with pytest.warns(Warning, match="Model format is default to UBJSON"):
+            save_load_model(model_path)
+
+    with tempfile.TemporaryDirectory() as tempdir:
+        model_path = os.path.join(tempdir, "digits.model.json")
+        save_load_model(model_path)
+
+    with tempfile.TemporaryDirectory() as tempdir:
+        model_path = os.path.join(tempdir, "digits.model.ubj")
+        digits = load_digits(n_class=2)
+        y = digits["target"]
+        X = digits["data"]
+        booster = xgb.train(
+            {"tree_method": "hist", "objective": "binary:logistic"},
+            dtrain=xgb.DMatrix(X, y),
+            num_boost_round=4,
+        )
+        predt_0 = booster.predict(xgb.DMatrix(X))
+        booster.save_model(model_path)
+        cls = xgb.XGBClassifier()
+        cls.load_model(model_path)
+
+        proba = cls.predict_proba(X)
+        assert proba.shape[0] == X.shape[0]
+        assert proba.shape[1] == 2  # binary
+
+        predt_1 = cls.predict_proba(X)[:, 1]
+        assert np.allclose(predt_0, predt_1)
+
+        cls = xgb.XGBModel()
+        cls.load_model(model_path)
+        predt_1 = cls.predict(X)
+        assert np.allclose(predt_0, predt_1)
+
+        # mclass
+        X, y = load_digits(n_class=10, return_X_y=True)
+        # small test_size to force early stop
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=0.01, random_state=1
+        )
+        clf = xgb.XGBClassifier(
+            n_estimators=64, tree_method="hist", early_stopping_rounds=2
+        )
+        clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
+        score = clf.best_score
+        clf.save_model(model_path)
+
+        clf = xgb.XGBClassifier()
+        clf.load_model(model_path)
+        assert clf.classes_.size == 10
+        assert clf.objective == "multi:softprob"
+
+        np.testing.assert_equal(clf.classes_, np.arange(10))
+        assert clf.n_classes_ == 10
+
+        assert clf.best_iteration == 27
+        assert clf.best_score == score
+
+
+@pytest.mark.skipif(**tm.no_sklearn())
+def test_with_sklearn_obj_metric() -> None:
+    from sklearn.metrics import mean_squared_error
+
+    X, y = tm.datasets.make_regression()
+    reg = xgb.XGBRegressor(objective=tm.ls_obj, eval_metric=mean_squared_error)
+    reg.fit(X, y)
+
+    pkl = pickle.dumps(reg)
+    reg_1 = pickle.loads(pkl)
+    assert callable(reg_1.objective)
+    assert callable(reg_1.eval_metric)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        path = os.path.join(tmpdir, "model.json")
+        reg.save_model(path)
+
+        reg_2 = xgb.XGBRegressor()
+        reg_2.load_model(path)
+
+    assert not callable(reg_2.objective)
+    assert not callable(reg_2.eval_metric)
+    assert reg_2.eval_metric is None
--- a/tests/python/test_pickling.py
+++ b/tests/python/test_pickling.py
@@ -1,13 +1,10 @@
 import json
 import os
 import pickle
-import tempfile

 import numpy as np
-import pytest

 import xgboost as xgb
-from xgboost import testing as tm

 kRows = 100
 kCols = 10
@@ -64,27 +61,3 @@ class TestPickling:
        params = {"nthread": 8, "tree_method": "exact", "subsample": 0.5}
        config = self.run_model_pickling(params)
        check(config)
-
-    @pytest.mark.skipif(**tm.no_sklearn())
-    def test_with_sklearn_obj_metric(self) -> None:
-        from sklearn.metrics import mean_squared_error
-
-        X, y = tm.datasets.make_regression()
-        reg = xgb.XGBRegressor(objective=tm.ls_obj, eval_metric=mean_squared_error)
-        reg.fit(X, y)
-
-        pkl = pickle.dumps(reg)
-        reg_1 = pickle.loads(pkl)
-        assert callable(reg_1.objective)
-        assert callable(reg_1.eval_metric)
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            path = os.path.join(tmpdir, "model.json")
-            reg.save_model(path)
-
-            reg_2 = xgb.XGBRegressor()
-            reg_2.load_model(path)
-
-        assert not callable(reg_2.objective)
-        assert not callable(reg_2.eval_metric)
-        assert reg_2.eval_metric is None
--- a/tests/python/test_shap.py
+++ b/tests/python/test_shap.py
@@ -49,7 +49,7 @@ class TestSHAP:

        def fn(max_depth: int, num_rounds: int) -> None:
            # train
-            params = {"max_depth": max_depth, "eta": 1, "verbosity": 0}
+            params = {"max_depth": max_depth, "eta": 1}
            bst = xgb.train(params, dtrain, num_boost_round=num_rounds)

            # predict
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -117,7 +117,6 @@ class TestTreeMethod:
        ag_param = {'max_depth': 2,
                    'tree_method': 'hist',
                    'eta': 1,
-                    'verbosity': 0,
                    'objective': 'binary:logistic',
                    'eval_metric': 'auc'}
        hist_res = {}
@@ -340,7 +339,8 @@ class TestTreeMethod:

        assert get_score(config_0) == get_score(config_1)

-        raw_booster = booster_1.save_raw(raw_format="deprecated")
+        with pytest.warns(Warning, match="Model format is default to UBJSON"):
+            raw_booster = booster_1.save_raw(raw_format="deprecated")
        booster_2 = xgb.Booster(model_file=raw_booster)
        config_2 = json.loads(booster_2.save_config())
        assert get_score(config_1) == get_score(config_2)
--- a/tests/python/test_with_pandas.py
+++ b/tests/python/test_with_pandas.py
@@ -341,7 +341,6 @@ class TestPandas:
        params = {
            "max_depth": 2,
            "eta": 1,
-            "verbosity": 0,
            "objective": "binary:logistic",
            "eval_metric": "error",
        }
@@ -372,7 +371,6 @@ class TestPandas:
        params = {
            "max_depth": 2,
            "eta": 1,
-            "verbosity": 0,
            "objective": "binary:logistic",
            "eval_metric": "auc",
        }
@@ -383,7 +381,6 @@ class TestPandas:
        params = {
            "max_depth": 2,
            "eta": 1,
-            "verbosity": 0,
            "objective": "binary:logistic",
            "eval_metric": ["auc"],
        }
@@ -394,7 +391,6 @@ class TestPandas:
        params = {
            "max_depth": 2,
            "eta": 1,
-            "verbosity": 0,
            "objective": "binary:logistic",
            "eval_metric": ["auc"],
        }
@@ -413,7 +409,6 @@ class TestPandas:
        params = {
            "max_depth": 2,
            "eta": 1,
-            "verbosity": 0,
            "objective": "binary:logistic",
        }
        cv = xgb.cv(
@@ -424,7 +419,6 @@ class TestPandas:
        params = {
            "max_depth": 2,
            "eta": 1,
-            "verbosity": 0,
            "objective": "binary:logistic",
        }
        cv = xgb.cv(
@@ -435,7 +429,6 @@ class TestPandas:
        params = {
            "max_depth": 2,
            "eta": 1,
-            "verbosity": 0,
            "objective": "binary:logistic",
            "eval_metric": ["auc"],
        }
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -678,7 +678,6 @@ def test_split_value_histograms():
    params = {
        "max_depth": 6,
        "eta": 0.01,
-        "verbosity": 0,
        "objective": "binary:logistic",
        "base_score": 0.5,
    }
@@ -897,128 +896,6 @@ def test_validation_weights():
    run_validation_weights(xgb.XGBClassifier)


-def save_load_model(model_path):
-    from sklearn.datasets import load_digits
-    from sklearn.model_selection import KFold
-
-    digits = load_digits(n_class=2)
-    y = digits['target']
-    X = digits['data']
-    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
-    for train_index, test_index in kf.split(X, y):
-        xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
-        xgb_model.save_model(model_path)
-
-        xgb_model = xgb.XGBClassifier()
-        xgb_model.load_model(model_path)
-
-        assert isinstance(xgb_model.classes_, np.ndarray)
-        np.testing.assert_equal(xgb_model.classes_, np.array([0, 1]))
-        assert isinstance(xgb_model._Booster, xgb.Booster)
-
-        preds = xgb_model.predict(X[test_index])
-        labels = y[test_index]
-        err = sum(1 for i in range(len(preds))
-                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
-        assert err < 0.1
-        assert xgb_model.get_booster().attr('scikit_learn') is None
-
-        # test native booster
-        preds = xgb_model.predict(X[test_index], output_margin=True)
-        booster = xgb.Booster(model_file=model_path)
-        predt_1 = booster.predict(xgb.DMatrix(X[test_index]),
-                                  output_margin=True)
-        assert np.allclose(preds, predt_1)
-
-        with pytest.raises(TypeError):
-            xgb_model = xgb.XGBModel()
-            xgb_model.load_model(model_path)
-
-    clf = xgb.XGBClassifier(booster="gblinear", early_stopping_rounds=1)
-    clf.fit(X, y, eval_set=[(X, y)])
-    best_iteration = clf.best_iteration
-    best_score = clf.best_score
-    predt_0 = clf.predict(X)
-    clf.save_model(model_path)
-    clf.load_model(model_path)
-    assert clf.booster == "gblinear"
-    predt_1 = clf.predict(X)
-    np.testing.assert_allclose(predt_0, predt_1)
-    assert clf.best_iteration == best_iteration
-    assert clf.best_score == best_score
-
-    clfpkl = pickle.dumps(clf)
-    clf = pickle.loads(clfpkl)
-    predt_2 = clf.predict(X)
-    np.testing.assert_allclose(predt_0, predt_2)
-    assert clf.best_iteration == best_iteration
-    assert clf.best_score == best_score
-
-
-def test_save_load_model():
-    with tempfile.TemporaryDirectory() as tempdir:
-        model_path = os.path.join(tempdir, "digits.model")
-        save_load_model(model_path)
-
-    with tempfile.TemporaryDirectory() as tempdir:
-        model_path = os.path.join(tempdir, "digits.model.json")
-        save_load_model(model_path)
-
-    from sklearn.datasets import load_digits
-    from sklearn.model_selection import train_test_split
-
-    with tempfile.TemporaryDirectory() as tempdir:
-        model_path = os.path.join(tempdir, "digits.model.ubj")
-        digits = load_digits(n_class=2)
-        y = digits["target"]
-        X = digits["data"]
-        booster = xgb.train(
-            {"tree_method": "hist", "objective": "binary:logistic"},
-            dtrain=xgb.DMatrix(X, y),
-            num_boost_round=4,
-        )
-        predt_0 = booster.predict(xgb.DMatrix(X))
-        booster.save_model(model_path)
-        cls = xgb.XGBClassifier()
-        cls.load_model(model_path)
-
-        proba = cls.predict_proba(X)
-        assert proba.shape[0] == X.shape[0]
-        assert proba.shape[1] == 2  # binary
-
-        predt_1 = cls.predict_proba(X)[:, 1]
-        assert np.allclose(predt_0, predt_1)
-
-        cls = xgb.XGBModel()
-        cls.load_model(model_path)
-        predt_1 = cls.predict(X)
-        assert np.allclose(predt_0, predt_1)
-
-        # mclass
-        X, y = load_digits(n_class=10, return_X_y=True)
-        # small test_size to force early stop
-        X_train, X_test, y_train, y_test = train_test_split(
-            X, y, test_size=0.01, random_state=1
-        )
-        clf = xgb.XGBClassifier(
-            n_estimators=64, tree_method="hist", early_stopping_rounds=2
-        )
-        clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
-        score = clf.best_score
-        clf.save_model(model_path)
-
-        clf = xgb.XGBClassifier()
-        clf.load_model(model_path)
-        assert clf.classes_.size == 10
-        assert clf.objective == "multi:softprob"
-
-        np.testing.assert_equal(clf.classes_, np.arange(10))
-        assert clf.n_classes_ == 10
-
-        assert clf.best_iteration == 27
-        assert clf.best_score == score
-
-
 def test_RFECV():
    from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris
    from sklearn.feature_selection import RFECV