Initial GPU support for the approx tree method. (#9414)

2023-07-31 15:50:28 +08:00
parent 8f0efb4ab3
commit 912e341d57
23 changed files with 639 additions and 360 deletions
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -7,11 +7,18 @@ from hypothesis import assume, given, note, settings, strategies

 import xgboost as xgb
 from xgboost import testing as tm
-from xgboost.testing.params import cat_parameter_strategy, hist_parameter_strategy
+from xgboost.testing.params import (
+    cat_parameter_strategy,
+    exact_parameter_strategy,
+    hist_parameter_strategy,
+)
 from xgboost.testing.updater import (
+    check_categorical_missing,
+    check_categorical_ohe,
    check_get_quantile_cut,
    check_init_estimation,
    check_quantile_loss,
+    train_result,
 )

 sys.path.append("tests/python")
@@ -20,22 +27,6 @@ import test_updaters as test_up
 pytestmark = tm.timeout(30)


-def train_result(param, dmat: xgb.DMatrix, num_rounds: int) -> dict:
-    result: xgb.callback.TrainingCallback.EvalsLog = {}
-    booster = xgb.train(
-        param,
-        dmat,
-        num_rounds,
-        [(dmat, "train")],
-        verbose_eval=False,
-        evals_result=result,
-    )
-    assert booster.num_features() == dmat.num_col()
-    assert booster.num_boosted_rounds() == num_rounds
-
-    return result
-
-
 class TestGPUUpdatersMulti:
    @given(
        hist_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy
@@ -53,14 +44,45 @@ class TestGPUUpdaters:
    cputest = test_up.TestTreeMethod()

    @given(
-        hist_parameter_strategy, strategies.integers(1, 20), tm.make_dataset_strategy()
+        exact_parameter_strategy,
+        hist_parameter_strategy,
+        strategies.integers(1, 20),
+        tm.make_dataset_strategy(),
    )
    @settings(deadline=None, max_examples=50, print_blob=True)
-    def test_gpu_hist(self, param, num_rounds, dataset):
-        param["tree_method"] = "gpu_hist"
+    def test_gpu_hist(
+        self,
+        param: Dict[str, Any],
+        hist_param: Dict[str, Any],
+        num_rounds: int,
+        dataset: tm.TestDataset,
+    ) -> None:
+        param.update({"tree_method": "hist", "device": "cuda"})
+        param.update(hist_param)
        param = dataset.set_params(param)
        result = train_result(param, dataset.get_dmat(), num_rounds)
-        note(result)
+        note(str(result))
+        assert tm.non_increasing(result["train"][dataset.metric])
+
+    @given(
+        exact_parameter_strategy,
+        hist_parameter_strategy,
+        strategies.integers(1, 20),
+        tm.make_dataset_strategy(),
+    )
+    @settings(deadline=None, print_blob=True)
+    def test_gpu_approx(
+        self,
+        param: Dict[str, Any],
+        hist_param: Dict[str, Any],
+        num_rounds: int,
+        dataset: tm.TestDataset,
+    ) -> None:
+        param.update({"tree_method": "approx", "device": "cuda"})
+        param.update(hist_param)
+        param = dataset.set_params(param)
+        result = train_result(param, dataset.get_dmat(), num_rounds)
+        note(str(result))
        assert tm.non_increasing(result["train"][dataset.metric])

    @given(tm.sparse_datasets_strategy)
@@ -69,23 +91,27 @@ class TestGPUUpdaters:
        param = {"tree_method": "hist", "max_bin": 64}
        hist_result = train_result(param, dataset.get_dmat(), 16)
        note(hist_result)
-        assert tm.non_increasing(hist_result['train'][dataset.metric])
+        assert tm.non_increasing(hist_result["train"][dataset.metric])

        param = {"tree_method": "gpu_hist", "max_bin": 64}
        gpu_hist_result = train_result(param, dataset.get_dmat(), 16)
        note(gpu_hist_result)
-        assert tm.non_increasing(gpu_hist_result['train'][dataset.metric])
+        assert tm.non_increasing(gpu_hist_result["train"][dataset.metric])

        np.testing.assert_allclose(
            hist_result["train"]["rmse"], gpu_hist_result["train"]["rmse"], rtol=1e-2
        )

-    @given(strategies.integers(10, 400), strategies.integers(3, 8),
-           strategies.integers(1, 2), strategies.integers(4, 7))
+    @given(
+        strategies.integers(10, 400),
+        strategies.integers(3, 8),
+        strategies.integers(1, 2),
+        strategies.integers(4, 7),
+    )
    @settings(deadline=None, max_examples=20, print_blob=True)
    @pytest.mark.skipif(**tm.no_pandas())
    def test_categorical_ohe(self, rows, cols, rounds, cats):
-        self.cputest.run_categorical_ohe(rows, cols, rounds, cats, "gpu_hist")
+        check_categorical_ohe(rows, cols, rounds, cats, "cuda", "hist")

    @given(
        tm.categorical_dataset_strategy,
@@ -95,7 +121,7 @@ class TestGPUUpdaters:
    )
    @settings(deadline=None, max_examples=20, print_blob=True)
    @pytest.mark.skipif(**tm.no_pandas())
-    def test_categorical(
+    def test_categorical_hist(
        self,
        dataset: tm.TestDataset,
        hist_parameters: Dict[str, Any],
@@ -103,7 +129,30 @@ class TestGPUUpdaters:
        n_rounds: int,
    ) -> None:
        cat_parameters.update(hist_parameters)
-        cat_parameters["tree_method"] = "gpu_hist"
+        cat_parameters["tree_method"] = "hist"
+        cat_parameters["device"] = "cuda"
+
+        results = train_result(cat_parameters, dataset.get_dmat(), n_rounds)
+        tm.non_increasing(results["train"]["rmse"])
+
+    @given(
+        tm.categorical_dataset_strategy,
+        hist_parameter_strategy,
+        cat_parameter_strategy,
+        strategies.integers(4, 32),
+    )
+    @settings(deadline=None, max_examples=20, print_blob=True)
+    @pytest.mark.skipif(**tm.no_pandas())
+    def test_categorical_approx(
+        self,
+        dataset: tm.TestDataset,
+        hist_parameters: Dict[str, Any],
+        cat_parameters: Dict[str, Any],
+        n_rounds: int,
+    ) -> None:
+        cat_parameters.update(hist_parameters)
+        cat_parameters["tree_method"] = "approx"
+        cat_parameters["device"] = "cuda"

        results = train_result(cat_parameters, dataset.get_dmat(), n_rounds)
        tm.non_increasing(results["train"]["rmse"])
@@ -129,24 +178,25 @@ class TestGPUUpdaters:
    @given(
        strategies.integers(10, 400),
        strategies.integers(3, 8),
-        strategies.integers(4, 7)
+        strategies.integers(4, 7),
    )
    @settings(deadline=None, max_examples=20, print_blob=True)
    @pytest.mark.skipif(**tm.no_pandas())
    def test_categorical_missing(self, rows, cols, cats):
-        self.cputest.run_categorical_missing(rows, cols, cats, "gpu_hist")
+        check_categorical_missing(rows, cols, cats, "cuda", "approx")
+        check_categorical_missing(rows, cols, cats, "cuda", "hist")

    @pytest.mark.skipif(**tm.no_pandas())
    def test_max_cat(self) -> None:
        self.cputest.run_max_cat("gpu_hist")

    def test_categorical_32_cat(self):
-        '''32 hits the bound of integer bitset, so special test'''
+        """32 hits the bound of integer bitset, so special test"""
        rows = 1000
        cols = 10
        cats = 32
        rounds = 4
-        self.cputest.run_categorical_ohe(rows, cols, rounds, cats, "gpu_hist")
+        check_categorical_ohe(rows, cols, rounds, cats, "cuda", "hist")

    @pytest.mark.skipif(**tm.no_cupy())
    def test_invalid_category(self):
@@ -164,15 +214,15 @@ class TestGPUUpdaters:
    ) -> None:
        # We cannot handle empty dataset yet
        assume(len(dataset.y) > 0)
-        param['tree_method'] = 'gpu_hist'
+        param["tree_method"] = "gpu_hist"
        param = dataset.set_params(param)
        result = train_result(
            param,
            dataset.get_device_dmat(max_bin=param.get("max_bin", None)),
-            num_rounds
+            num_rounds,
        )
        note(result)
-        assert tm.non_increasing(result['train'][dataset.metric], tolerance=1e-3)
+        assert tm.non_increasing(result["train"][dataset.metric], tolerance=1e-3)

    @given(
        hist_parameter_strategy,
@@ -185,12 +235,12 @@ class TestGPUUpdaters:
            return
        # We cannot handle empty dataset yet
        assume(len(dataset.y) > 0)
-        param['tree_method'] = 'gpu_hist'
+        param["tree_method"] = "gpu_hist"
        param = dataset.set_params(param)
        m = dataset.get_external_dmat()
        external_result = train_result(param, m, num_rounds)
        del m
-        assert tm.non_increasing(external_result['train'][dataset.metric])
+        assert tm.non_increasing(external_result["train"][dataset.metric])

    def test_empty_dmatrix_prediction(self):
        # FIXME(trivialfis): This should be done with all updaters
@@ -207,7 +257,7 @@ class TestGPUUpdaters:
            dtrain,
            verbose_eval=True,
            num_boost_round=6,
-            evals=[(dtrain, 'Train')]
+            evals=[(dtrain, "Train")],
        )

        kRows = 100
@@ -222,10 +272,10 @@ class TestGPUUpdaters:
    @given(tm.make_dataset_strategy(), strategies.integers(0, 10))
    @settings(deadline=None, max_examples=10, print_blob=True)
    def test_specified_gpu_id_gpu_update(self, dataset, gpu_id):
-        param = {'tree_method': 'gpu_hist', 'gpu_id': gpu_id}
+        param = {"tree_method": "gpu_hist", "gpu_id": gpu_id}
        param = dataset.set_params(param)
        result = train_result(param, dataset.get_dmat(), 10)
-        assert tm.non_increasing(result['train'][dataset.metric])
+        assert tm.non_increasing(result["train"][dataset.metric])

    @pytest.mark.skipif(**tm.no_sklearn())
    @pytest.mark.parametrize("weighted", [True, False])