Init estimation for regression. (#8272)

2023-01-11 02:04:56 +08:00
parent 1b58d81315
commit badeff1d74
29 changed files with 466 additions and 132 deletions
--- a/tests/ci_build/Dockerfile.cpu
+++ b/tests/ci_build/Dockerfile.cpu
@@ -36,10 +36,10 @@ RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \
    rm -rf grpc

 # Create new Conda environment
-COPY conda_env/cpu_test.yml /scripts/
-RUN mamba env create -n cpu_test --file=/scripts/cpu_test.yml && \
+COPY conda_env/linux_cpu_test.yml /scripts/
+RUN mamba env create -n linux_cpu_test --file=/scripts/linux_cpu_test.yml && \
    mamba clean --all && \
-    conda run --no-capture-output -n cpu_test pip install buildkite-test-collector
+    conda run --no-capture-output -n linux_cpu_test pip install buildkite-test-collector

 # Install lightweight sudo (not bound to TTY)
 RUN set -ex; \
--- a/tests/ci_build/conda_env/linux_cpu_test.yml
+++ b/tests/ci_build/conda_env/linux_cpu_test.yml
@@ -1,8 +1,12 @@
-name: cpu_test
+name: linux_cpu_test
 channels:
 - conda-forge
 dependencies:
 - python=3.8
+- cmake
+- c-compiler
+- cxx-compiler
+- ninja
 - pip
 - wheel
 - pyyaml
@@ -33,7 +37,7 @@ dependencies:
 - pyarrow
 - protobuf
 - cloudpickle
- shap
+- shap>=0.41
 - modin
 - pip:
  - datatable
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@@ -146,13 +146,17 @@ def main(args: argparse.Namespace) -> None:
                "tests/python/test_data_iterator.py",
                "tests/python/test_dt.py",
                "tests/python/test_quantile_dmatrix.py",
+                "tests/python/test_tree_regularization.py",
                "tests/python-gpu/test_gpu_data_iterator.py",
+                "tests/ci_build/lint_python.py",
                "tests/test_distributed/test_with_spark/",
                "tests/test_distributed/test_gpu_with_spark/",
                # demo
                "demo/json-model/json_parser.py",
                "demo/guide-python/cat_in_the_dat.py",
                "demo/guide-python/categorical.py",
+                "demo/guide-python/feature_weights.py",
+                "demo/guide-python/sklearn_parallel.py",
                "demo/guide-python/spark_estimator_examples.py",
                # CI
                "tests/ci_build/lint_python.py",
@@ -194,6 +198,7 @@ def main(args: argparse.Namespace) -> None:
                "demo/json-model/json_parser.py",
                "demo/guide-python/external_memory.py",
                "demo/guide-python/cat_in_the_dat.py",
+                "demo/guide-python/feature_weights.py",
                # tests
                "tests/python/test_dt.py",
                "tests/python/test_data_iterator.py",
--- a/tests/ci_build/test_python.sh
+++ b/tests/ci_build/test_python.sh
@@ -76,7 +76,7 @@ case "$suite" in
    ;;

  cpu)
-    source activate cpu_test
+    source activate linux_cpu_test
    set -x
    install_xgboost
    export RAY_OBJECT_STORE_ALLOW_SLOW_STORAGE=1
--- a/tests/python-gpu/test_from_cupy.py
+++ b/tests/python-gpu/test_from_cupy.py
@@ -224,5 +224,6 @@ Arrow specification.'''
        dtrain = dmatrix_from_cupy(
            np.float32, xgb.DeviceQuantileDMatrix, np.nan)
        with pytest.raises(xgb.core.XGBoostError):
-            xgb.train({'tree_method': 'gpu_hist', 'gpu_id': 1},
-                      dtrain, num_boost_round=10)
+            xgb.train(
+                {'tree_method': 'gpu_hist', 'gpu_id': 1}, dtrain, num_boost_round=10
+            )
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -5,6 +5,7 @@ import numpy as np
 import pytest
 from hypothesis import assume, given, note, settings, strategies
 from xgboost.testing.params import cat_parameter_strategy, hist_parameter_strategy
+from xgboost.testing.updater import check_init_estimation

 import xgboost as xgb
 from xgboost import testing as tm
@@ -172,24 +173,25 @@ class TestGPUUpdaters:
        kCols = 100

        X = np.empty((kRows, kCols))
-        y = np.empty((kRows))
+        y = np.empty((kRows,))

        dtrain = xgb.DMatrix(X, y)

-        bst = xgb.train({'verbosity': 2,
-                         'tree_method': 'gpu_hist',
-                         'gpu_id': 0},
-                        dtrain,
-                        verbose_eval=True,
-                        num_boost_round=6,
-                        evals=[(dtrain, 'Train')])
+        bst = xgb.train(
+            {"verbosity": 2, "tree_method": "gpu_hist", "gpu_id": 0},
+            dtrain,
+            verbose_eval=True,
+            num_boost_round=6,
+            evals=[(dtrain, 'Train')]
+        )

        kRows = 100
        X = np.random.randn(kRows, kCols)

        dtest = xgb.DMatrix(X)
        predictions = bst.predict(dtest)
-        np.testing.assert_allclose(predictions, 0.5, 1e-6)
+        # non-distributed, 0.0 is returned due to base_score estimation with 0 gradient.
+        np.testing.assert_allclose(predictions, 0.0, 1e-6)

    @pytest.mark.mgpu
    @given(tm.dataset_strategy, strategies.integers(0, 10))
@@ -204,3 +206,6 @@ class TestGPUUpdaters:
    @pytest.mark.parametrize("weighted", [True, False])
    def test_adaptive(self, weighted) -> None:
        self.cputest.run_adaptive("gpu_hist", weighted)
+
+    def test_init_estimation(self) -> None:
+        check_init_estimation("gpu_hist")
--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pytest
+from xgboost.testing.updater import get_basescore

 import xgboost as xgb
 from xgboost import testing as tm
@@ -11,16 +12,12 @@ class TestEarlyStopping:
    @pytest.mark.skipif(**tm.no_sklearn())
    def test_early_stopping_nonparallel(self):
        from sklearn.datasets import load_digits
-        try:
-            from sklearn.model_selection import train_test_split
-        except ImportError:
-            from sklearn.cross_validation import train_test_split
+        from sklearn.model_selection import train_test_split

        digits = load_digits(n_class=2)
        X = digits['data']
        y = digits['target']
-        X_train, X_test, y_train, y_test = train_test_split(X, y,
-                                                            random_state=0)
+        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
        clf1 = xgb.XGBClassifier(learning_rate=0.1)
        clf1.fit(X_train, y_train, early_stopping_rounds=5, eval_metric="auc",
                 eval_set=[(X_test, y_test)])
@@ -31,9 +28,23 @@ class TestEarlyStopping:
        assert clf1.best_score == clf2.best_score
        assert clf1.best_score != 1
        # check overfit
-        clf3 = xgb.XGBClassifier(learning_rate=0.1)
-        clf3.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
-                 eval_set=[(X_test, y_test)])
+        clf3 = xgb.XGBClassifier(
+            learning_rate=0.1,
+            eval_metric="auc",
+            early_stopping_rounds=10
+        )
+        clf3.fit(X_train, y_train, eval_set=[(X_test, y_test)])
+        base_score = get_basescore(clf3)
+        assert 0.53 > base_score > 0.5
+
+        clf3 = xgb.XGBClassifier(
+            learning_rate=0.1,
+            base_score=.5,
+            eval_metric="auc",
+            early_stopping_rounds=10
+        )
+        clf3.fit(X_train, y_train, eval_set=[(X_test, y_test)])
+
        assert clf3.best_score == 1

    def evalerror(self, preds, dtrain):
--- a/tests/python/test_tree_regularization.py
+++ b/tests/python/test_tree_regularization.py
@@ -9,11 +9,13 @@ train_data = xgb.DMatrix(np.array([[1]]), label=np.array([1]))
 class TestTreeRegularization:
    def test_alpha(self):
        params = {
-            'tree_method': 'exact', 'verbosity': 0,
-            'objective': 'reg:squarederror',
-            'eta': 1,
-            'lambda': 0,
-            'alpha': 0.1
+            "tree_method": "exact",
+            "verbosity": 0,
+            "objective": "reg:squarederror",
+            "eta": 1,
+            "lambda": 0,
+            "alpha": 0.1,
+            "base_score": 0.5,
        }

        model = xgb.train(params, train_data, 1)
@@ -27,11 +29,13 @@ class TestTreeRegularization:

    def test_lambda(self):
        params = {
-            'tree_method': 'exact', 'verbosity': 0,
-            'objective': 'reg:squarederror',
-            'eta': 1,
-            'lambda': 1,
-            'alpha': 0
+            "tree_method": "exact",
+            "verbosity": 0,
+            "objective": "reg:squarederror",
+            "eta": 1,
+            "lambda": 1,
+            "alpha": 0,
+            "base_score": 0.5,
        }

        model = xgb.train(params, train_data, 1)
@@ -45,11 +49,13 @@ class TestTreeRegularization:

    def test_alpha_and_lambda(self):
        params = {
-            'tree_method': 'exact', 'verbosity': 1,
-            'objective': 'reg:squarederror',
-            'eta': 1,
-            'lambda': 1,
-            'alpha': 0.1
+            "tree_method": "exact",
+            "verbosity": 1,
+            "objective": "reg:squarederror",
+            "eta": 1,
+            "lambda": 1,
+            "alpha": 0.1,
+            "base_score": 0.5,
        }

        model = xgb.train(params, train_data, 1)
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -10,6 +10,7 @@ from xgboost.testing.params import (
    exact_parameter_strategy,
    hist_parameter_strategy,
 )
+from xgboost.testing.updater import check_init_estimation

 import xgboost as xgb
 from xgboost import testing as tm
@@ -449,3 +450,6 @@ class TestTreeMethod:
    )
    def test_adaptive(self, tree_method, weighted) -> None:
        self.run_adaptive(tree_method, weighted)
+
+    def test_init_estimation(self) -> None:
+        check_init_estimation("hist")
--- a/tests/python/test_with_shap.py
+++ b/tests/python/test_with_shap.py
@@ -9,6 +9,7 @@ except Exception:
    shap = None
    pass

+
 pytestmark = pytest.mark.skipif(shap is None, reason="Requires shap package")


@@ -16,11 +17,16 @@ pytestmark = pytest.mark.skipif(shap is None, reason="Requires shap package")
 # Changes in binary format may cause problems
 def test_with_shap():
    from sklearn.datasets import fetch_california_housing
+
    X, y = fetch_california_housing(return_X_y=True)
    dtrain = xgb.DMatrix(X, label=y)
    model = xgb.train({"learning_rate": 0.01}, dtrain, 10)
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X)
    margin = model.predict(dtrain, output_margin=True)
-    assert np.allclose(np.sum(shap_values, axis=len(shap_values.shape) - 1),
-                       margin - explainer.expected_value, 1e-3, 1e-3)
+    assert np.allclose(
+        np.sum(shap_values, axis=len(shap_values.shape) - 1),
+        margin - explainer.expected_value,
+        1e-3,
+        1e-3,
+    )
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -9,6 +9,7 @@ import numpy as np
 import pytest
 from sklearn.utils.estimator_checks import parametrize_with_checks
 from xgboost.testing.shared import get_feature_weights, validate_data_initialization
+from xgboost.testing.updater import get_basescore

 import xgboost as xgb
 from xgboost import testing as tm
@@ -196,19 +197,22 @@ def test_stacking_classification():
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
    clf.fit(X_train, y_train).score(X_test, y_test)

-
@pytest.mark.skipif(**tm.no_pandas())
 def test_feature_importances_weight():
    from sklearn.datasets import load_digits

    digits = load_digits(n_class=2)
-    y = digits['target']
-    X = digits['data']
+    y = digits["target"]
+    X = digits["data"]
+
+    xgb_model = xgb.XGBClassifier(
+        random_state=0,
+        tree_method="exact",
+        learning_rate=0.1,
+        importance_type="weight",
+        base_score=0.5,
+    ).fit(X, y)

-    xgb_model = xgb.XGBClassifier(random_state=0,
-                                  tree_method="exact",
-                                  learning_rate=0.1,
-                                  importance_type="weight").fit(X, y)
    exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.00833333, 0.,
                    0., 0., 0., 0., 0., 0., 0., 0.025, 0.14166667, 0., 0., 0.,
                    0., 0., 0., 0.00833333, 0.25833333, 0., 0., 0., 0.,
@@ -223,16 +227,22 @@ def test_feature_importances_weight():
    import pandas as pd
    y = pd.Series(digits['target'])
    X = pd.DataFrame(digits['data'])
-    xgb_model = xgb.XGBClassifier(random_state=0,
-                                  tree_method="exact",
-                                  learning_rate=0.1,
-                                  importance_type="weight").fit(X, y)
+    xgb_model = xgb.XGBClassifier(
+        random_state=0,
+        tree_method="exact",
+        learning_rate=0.1,
+        base_score=.5,
+        importance_type="weight"
+    ).fit(X, y)
    np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)

-    xgb_model = xgb.XGBClassifier(random_state=0,
-                                  tree_method="exact",
-                                  learning_rate=0.1,
-                                  importance_type="weight").fit(X, y)
+    xgb_model = xgb.XGBClassifier(
+        random_state=0,
+        tree_method="exact",
+        learning_rate=0.1,
+        importance_type="weight",
+        base_score=.5,
+    ).fit(X, y)
    np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)

    with pytest.raises(ValueError):
@@ -274,6 +284,7 @@ def test_feature_importances_gain():
        random_state=0, tree_method="exact",
        learning_rate=0.1,
        importance_type="gain",
+        base_score=0.5,
    ).fit(X, y)

    exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
@@ -296,6 +307,7 @@ def test_feature_importances_gain():
        tree_method="exact",
        learning_rate=0.1,
        importance_type="gain",
+        base_score=0.5,
    ).fit(X, y)
    np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)

@@ -304,6 +316,7 @@ def test_feature_importances_gain():
        tree_method="exact",
        learning_rate=0.1,
        importance_type="gain",
+        base_score=0.5,
    ).fit(X, y)
    np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)

@@ -593,18 +606,21 @@ def test_split_value_histograms():

    digits_2class = load_digits(n_class=2)

-    X = digits_2class['data']
-    y = digits_2class['target']
+    X = digits_2class["data"]
+    y = digits_2class["target"]

    dm = xgb.DMatrix(X, label=y)
-    params = {'max_depth': 6, 'eta': 0.01, 'verbosity': 0,
-              'objective': 'binary:logistic'}
+    params = {
+        "max_depth": 6,
+        "eta": 0.01,
+        "verbosity": 0,
+        "objective": "binary:logistic",
+        "base_score": 0.5,
+    }

    gbdt = xgb.train(params, dm, num_boost_round=10)
-    assert gbdt.get_split_value_histogram("not_there",
-                                          as_pandas=True).shape[0] == 0
-    assert gbdt.get_split_value_histogram("not_there",
-                                          as_pandas=False).shape[0] == 0
+    assert gbdt.get_split_value_histogram("not_there", as_pandas=True).shape[0] == 0
+    assert gbdt.get_split_value_histogram("not_there", as_pandas=False).shape[0] == 0
    assert gbdt.get_split_value_histogram("f28", bins=0).shape[0] == 1
    assert gbdt.get_split_value_histogram("f28", bins=1).shape[0] == 1
    assert gbdt.get_split_value_histogram("f28", bins=2).shape[0] == 2
@@ -748,11 +764,7 @@ def test_sklearn_get_default_params():
    cls = xgb.XGBClassifier()
    assert cls.get_params()["base_score"] is None
    cls.fit(X[:4, ...], y[:4, ...])
-    base_score = float(
-        json.loads(cls.get_booster().save_config())["learner"]["learner_model_param"][
-            "base_score"
-        ]
-    )
+    base_score = get_basescore(cls)
    np.testing.assert_equal(base_score, 0.5)


--- a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
+++ b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
@@ -42,6 +42,7 @@ try:
    from dask import array as da
    from dask.distributed import Client
    from dask_cuda import LocalCUDACluster
+    from xgboost.testing.dask import check_init_estimation

    from xgboost import dask as dxgb
 except ImportError:
@@ -220,6 +221,9 @@ class TestDistributedGPU:
        y = dd.from_array(y_, chunksize=50).map_partitions(cudf.from_pandas)
        run_boost_from_prediction_multi_class(X, y, "gpu_hist", local_cuda_client)

+    def test_init_estimation(self, local_cuda_client: Client) -> None:
+        check_init_estimation("gpu_hist", local_cuda_client)
+
    @pytest.mark.skipif(**tm.no_dask_cudf())
    def test_dask_dataframe(self, local_cuda_client: Client) -> None:
        run_with_dask_dataframe(dxgb.DaskDMatrix, local_cuda_client)
--- a/tests/test_distributed/test_with_dask/test_with_dask.py
+++ b/tests/test_distributed/test_with_dask/test_with_dask.py
@@ -12,7 +12,7 @@ from itertools import starmap
 from math import ceil
 from operator import attrgetter, getitem
 from pathlib import Path
-from typing import Any, Dict, Generator, Optional, Tuple, Type, Union
+from typing import Any, Dict, Generator, Optional, Tuple, Type, TypeVar, Union

 import hypothesis
 import numpy as np
@@ -32,7 +32,7 @@ from xgboost.testing.shared import (
 import xgboost as xgb
 from xgboost import testing as tm

-pytestmark = [tm.timeout(30), pytest.mark.skipif(**tm.no_dask())]
+pytestmark = [tm.timeout(60), pytest.mark.skipif(**tm.no_dask())]

 import dask
 import dask.array as da
@@ -40,6 +40,7 @@ import dask.dataframe as dd
 from distributed import Client, LocalCluster
 from toolz import sliding_window  # dependency of dask
 from xgboost.dask import DaskDMatrix
+from xgboost.testing.dask import check_init_estimation

 dask.config.set({"distributed.scheduler.allowed-failures": False})

@@ -52,8 +53,10 @@ else:

@pytest.fixture(scope="module")
 def cluster() -> Generator:
+    n_threads = os.cpu_count()
+    assert n_threads is not None
    with LocalCluster(
-        n_workers=2, threads_per_worker=2, dashboard_address=":0"
+        n_workers=2, threads_per_worker=n_threads // 2, dashboard_address=":0"
    ) as dask_cluster:
        yield dask_cluster

@@ -151,12 +154,15 @@ def deterministic_persist_per_worker(df: dd.DataFrame, client: "Client") -> dd.D
    return df2


+Margin = TypeVar("Margin", dd.DataFrame, dd.Series, None)
+
+
 def deterministic_repartition(
    client: Client,
    X: dd.DataFrame,
    y: dd.Series,
-    m: Optional[Union[dd.DataFrame, dd.Series]],
-) -> Tuple[dd.DataFrame, dd.Series, Optional[Union[dd.DataFrame, dd.Series]]]:
+    m: Margin,
+) -> Tuple[dd.DataFrame, dd.Series, Margin]:
    # force repartition the data to avoid non-deterministic result
    if any(X.map_partitions(lambda x: _is_cudf_df(x)).compute()):
        # dask_cudf seems to be doing fine for now
@@ -474,14 +480,20 @@ def run_boost_from_prediction(
    X, y, margin = deterministic_repartition(client, X, y, margin)
    predictions_1: dd.Series = model_1.predict(X, base_margin=margin)

-    cls_2 = xgb.dask.DaskXGBClassifier(
+    model_2 = xgb.dask.DaskXGBClassifier(
        learning_rate=0.3, n_estimators=8, tree_method=tree_method, max_bin=512
    )
    X, y, _ = deterministic_repartition(client, X, y, None)
-    cls_2.fit(X=X, y=y)
-    predictions_2: dd.Series = cls_2.predict(X)
+    model_2.fit(X=X, y=y)
+    predictions_2: dd.Series = model_2.predict(X)

-    assert np.all(predictions_1.compute() == predictions_2.compute())
+    predt_1 = predictions_1.compute()
+    predt_2 = predictions_2.compute()
+    if hasattr(predt_1, "to_numpy"):
+        predt_1 = predt_1.to_numpy()
+    if hasattr(predt_2, "to_numpy"):
+        predt_2 = predt_2.to_numpy()
+    np.testing.assert_allclose(predt_1, predt_2, atol=1e-5)

    margined = xgb.dask.DaskXGBClassifier(n_estimators=4)
    X, y, margin = deterministic_repartition(client, X, y, margin)
@@ -706,6 +718,7 @@ def run_dask_classifier(
 def test_dask_classifier(model: str, client: "Client") -> None:
    X, y, w = generate_array(with_weights=True)
    y = (y * 10).astype(np.int32)
+    assert w is not None
    run_dask_classifier(X, y, w, model, None, client, 10)

    y_bin = y.copy()
@@ -1386,16 +1399,22 @@ class TestWithDask:
        else:
            w = None

-        m = xgb.dask.DaskDMatrix(
-            client, data=X, label=y, weight=w)
-        history = xgb.dask.train(client, params=params, dtrain=m,
-                                 num_boost_round=num_rounds,
-                                 evals=[(m, 'train')])['history']
+        m = xgb.dask.DaskDMatrix(client, data=X, label=y, weight=w)
+        history = xgb.dask.train(
+            client,
+            params=params,
+            dtrain=m,
+            num_boost_round=num_rounds,
+            evals=[(m, "train")],
+        )["history"]
        note(history)
-        history = history['train'][dataset.metric]
+        history = history["train"][dataset.metric]

-        def is_stump() -> bool:
-            return params["max_depth"] == 1 or params["max_leaves"] == 1
+        def is_stump():
+            return (
+                params.get("max_depth", None) == 1
+                or params.get("max_leaves", None) == 1
+            )

        def minimum_bin() -> bool:
            return "max_bin" in params and params["max_bin"] == 2
@@ -1410,7 +1429,11 @@ class TestWithDask:
        else:
            assert tm.non_increasing(history)
        # Make sure that it's decreasing
-        assert history[-1] < history[0]
+        if is_stump():
+            # we might have already got the best score with base_score.
+            assert history[-1] <= history[0]
+        else:
+            assert history[-1] < history[0]

    @given(params=hist_parameter_strategy,
           dataset=tm.dataset_strategy)
@@ -1646,13 +1669,17 @@ class TestWithDask:

            results_custom = reg.evals_result()

-            reg = xgb.dask.DaskXGBRegressor(n_estimators=rounds, tree_method='hist')
+            reg = xgb.dask.DaskXGBRegressor(
+                n_estimators=rounds, tree_method="hist", base_score=0.5
+            )
            reg.fit(X, y, eval_set=[(X, y)])
            results_native = reg.evals_result()

-            np.testing.assert_allclose(results_custom['validation_0']['rmse'],
-                                       results_native['validation_0']['rmse'])
-            tm.non_increasing(results_native['validation_0']['rmse'])
+            np.testing.assert_allclose(
+                results_custom["validation_0"]["rmse"],
+                results_native["validation_0"]["rmse"],
+            )
+            tm.non_increasing(results_native["validation_0"]["rmse"])

    def test_no_duplicated_partition(self) -> None:
        '''Assert each worker has the correct amount of data, and DMatrix initialization doesn't
@@ -1994,6 +2021,10 @@ def test_parallel_submit_multi_clients() -> None:
            assert f.result().get_booster().num_boosted_rounds() == i + 1


+def test_init_estimation(client: Client) -> None:
+    check_init_estimation("hist", client)
+
+
 class TestDaskCallbacks:
    @pytest.mark.skipif(**tm.no_sklearn())
    def test_early_stopping(self, client: "Client") -> None:
--- a/tests/test_distributed/test_with_spark/test_spark_local_cluster.py
+++ b/tests/test_distributed/test_with_spark/test_spark_local_cluster.py
@@ -1,7 +1,6 @@
 import json
 import os
 import random
-import sys
 import uuid

 import numpy as np
@@ -216,7 +215,7 @@ class XgboostLocalClusterTestCase(SparkLocalClusterTestCase):
            ],
        )
        self.reg_best_score_eval = 5.239e-05
-        self.reg_best_score_weight_and_eval = 4.810e-05
+        self.reg_best_score_weight_and_eval = 4.850e-05

    def test_regressor_basic_with_params(self):
        regressor = SparkXGBRegressor(**self.reg_params)
--- a/tests/test_distributed/test_with_spark/utils.py
+++ b/tests/test_distributed/test_with_spark/utils.py
@@ -4,16 +4,15 @@ import shutil
 import sys
 import tempfile
 import unittest
+from io import StringIO

 import pytest
-from six import StringIO

 from xgboost import testing as tm

 pytestmark = [pytest.mark.skipif(**tm.no_spark())]

-
-from pyspark.sql import SparkSession, SQLContext
+from pyspark.sql import SparkSession
 from xgboost.spark.utils import _get_default_params_from_func