Save model in ubj as the default. (#9947)

2024-01-05 17:53:36 +08:00
parent c03a4d5088
commit 38dd91f491
23 changed files with 598 additions and 550 deletions
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -678,7 +678,6 @@ def test_split_value_histograms():
    params = {
        "max_depth": 6,
        "eta": 0.01,
-        "verbosity": 0,
        "objective": "binary:logistic",
        "base_score": 0.5,
    }
@@ -897,128 +896,6 @@ def test_validation_weights():
    run_validation_weights(xgb.XGBClassifier)


-def save_load_model(model_path):
-    from sklearn.datasets import load_digits
-    from sklearn.model_selection import KFold
-
-    digits = load_digits(n_class=2)
-    y = digits['target']
-    X = digits['data']
-    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
-    for train_index, test_index in kf.split(X, y):
-        xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
-        xgb_model.save_model(model_path)
-
-        xgb_model = xgb.XGBClassifier()
-        xgb_model.load_model(model_path)
-
-        assert isinstance(xgb_model.classes_, np.ndarray)
-        np.testing.assert_equal(xgb_model.classes_, np.array([0, 1]))
-        assert isinstance(xgb_model._Booster, xgb.Booster)
-
-        preds = xgb_model.predict(X[test_index])
-        labels = y[test_index]
-        err = sum(1 for i in range(len(preds))
-                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
-        assert err < 0.1
-        assert xgb_model.get_booster().attr('scikit_learn') is None
-
-        # test native booster
-        preds = xgb_model.predict(X[test_index], output_margin=True)
-        booster = xgb.Booster(model_file=model_path)
-        predt_1 = booster.predict(xgb.DMatrix(X[test_index]),
-                                  output_margin=True)
-        assert np.allclose(preds, predt_1)
-
-        with pytest.raises(TypeError):
-            xgb_model = xgb.XGBModel()
-            xgb_model.load_model(model_path)
-
-    clf = xgb.XGBClassifier(booster="gblinear", early_stopping_rounds=1)
-    clf.fit(X, y, eval_set=[(X, y)])
-    best_iteration = clf.best_iteration
-    best_score = clf.best_score
-    predt_0 = clf.predict(X)
-    clf.save_model(model_path)
-    clf.load_model(model_path)
-    assert clf.booster == "gblinear"
-    predt_1 = clf.predict(X)
-    np.testing.assert_allclose(predt_0, predt_1)
-    assert clf.best_iteration == best_iteration
-    assert clf.best_score == best_score
-
-    clfpkl = pickle.dumps(clf)
-    clf = pickle.loads(clfpkl)
-    predt_2 = clf.predict(X)
-    np.testing.assert_allclose(predt_0, predt_2)
-    assert clf.best_iteration == best_iteration
-    assert clf.best_score == best_score
-
-
-def test_save_load_model():
-    with tempfile.TemporaryDirectory() as tempdir:
-        model_path = os.path.join(tempdir, "digits.model")
-        save_load_model(model_path)
-
-    with tempfile.TemporaryDirectory() as tempdir:
-        model_path = os.path.join(tempdir, "digits.model.json")
-        save_load_model(model_path)
-
-    from sklearn.datasets import load_digits
-    from sklearn.model_selection import train_test_split
-
-    with tempfile.TemporaryDirectory() as tempdir:
-        model_path = os.path.join(tempdir, "digits.model.ubj")
-        digits = load_digits(n_class=2)
-        y = digits["target"]
-        X = digits["data"]
-        booster = xgb.train(
-            {"tree_method": "hist", "objective": "binary:logistic"},
-            dtrain=xgb.DMatrix(X, y),
-            num_boost_round=4,
-        )
-        predt_0 = booster.predict(xgb.DMatrix(X))
-        booster.save_model(model_path)
-        cls = xgb.XGBClassifier()
-        cls.load_model(model_path)
-
-        proba = cls.predict_proba(X)
-        assert proba.shape[0] == X.shape[0]
-        assert proba.shape[1] == 2  # binary
-
-        predt_1 = cls.predict_proba(X)[:, 1]
-        assert np.allclose(predt_0, predt_1)
-
-        cls = xgb.XGBModel()
-        cls.load_model(model_path)
-        predt_1 = cls.predict(X)
-        assert np.allclose(predt_0, predt_1)
-
-        # mclass
-        X, y = load_digits(n_class=10, return_X_y=True)
-        # small test_size to force early stop
-        X_train, X_test, y_train, y_test = train_test_split(
-            X, y, test_size=0.01, random_state=1
-        )
-        clf = xgb.XGBClassifier(
-            n_estimators=64, tree_method="hist", early_stopping_rounds=2
-        )
-        clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
-        score = clf.best_score
-        clf.save_model(model_path)
-
-        clf = xgb.XGBClassifier()
-        clf.load_model(model_path)
-        assert clf.classes_.size == 10
-        assert clf.objective == "multi:softprob"
-
-        np.testing.assert_equal(clf.classes_, np.arange(10))
-        assert clf.n_classes_ == 10
-
-        assert clf.best_iteration == 27
-        assert clf.best_score == score
-
-
 def test_RFECV():
    from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris
    from sklearn.feature_selection import RFECV