Handle the new device parameter in dask and demos. (#9386)

* Handle the new `device` parameter in dask and demos. - Check no ordinal is specified in the dask interface. - Update demos. - Update dask doc. - Update the condition for QDM.
2023-07-15 19:11:20 +08:00
parent 9da5050643
commit 16eb41936d
31 changed files with 631 additions and 450 deletions
--- a/demo/guide-python/callbacks.py
+++ b/demo/guide-python/callbacks.py
@@ -71,7 +71,8 @@ def custom_callback():
        {
            'objective': 'binary:logistic',
            'eval_metric': ['error', 'rmse'],
-            'tree_method': 'gpu_hist'
+            'tree_method': 'hist',
+            "device": "cuda",
        },
        D_train,
        evals=[(D_train, 'Train'), (D_valid, 'Valid')],
--- a/demo/guide-python/cat_in_the_dat.py
+++ b/demo/guide-python/cat_in_the_dat.py
@@ -63,7 +63,8 @@ def load_cat_in_the_dat() -> tuple[pd.DataFrame, pd.Series]:


 params = {
-    "tree_method": "gpu_hist",
+    "tree_method": "hist",
+    "device": "cuda",
    "n_estimators": 32,
    "colsample_bylevel": 0.7,
 }
--- a/demo/guide-python/categorical.py
+++ b/demo/guide-python/categorical.py
@@ -58,13 +58,13 @@ def main() -> None:
    # Specify `enable_categorical` to True, also we use onehot encoding based split
    # here for demonstration. For details see the document of `max_cat_to_onehot`.
    reg = xgb.XGBRegressor(
-        tree_method="gpu_hist", enable_categorical=True, max_cat_to_onehot=5
+        tree_method="hist", enable_categorical=True, max_cat_to_onehot=5, device="cuda"
    )
    reg.fit(X, y, eval_set=[(X, y)])

    # Pass in already encoded data
    X_enc, y_enc = make_categorical(100, 10, 4, True)
-    reg_enc = xgb.XGBRegressor(tree_method="gpu_hist")
+    reg_enc = xgb.XGBRegressor(tree_method="hist", device="cuda")
    reg_enc.fit(X_enc, y_enc, eval_set=[(X_enc, y_enc)])

    reg_results = np.array(reg.evals_result()["validation_0"]["rmse"])
--- a/demo/guide-python/external_memory.py
+++ b/demo/guide-python/external_memory.py
@@ -82,8 +82,9 @@ def main(tmpdir: str) -> xgboost.Booster:
    missing = np.NaN
    Xy = xgboost.DMatrix(it, missing=missing, enable_categorical=False)

-    # Other tree methods including ``approx``, and ``gpu_hist`` are supported. GPU
-    # behaves differently than CPU tree methods. See tutorial in doc for details.
+    # ``approx`` is also supported, but less efficient due to sketching. GPU behaves
+    # differently than CPU tree methods as it uses a hybrid approach. See tutorial in
+    # doc for details.
    booster = xgboost.train(
        {"tree_method": "hist", "max_depth": 4},
        Xy,
--- a/demo/guide-python/learning_to_rank.py
+++ b/demo/guide-python/learning_to_rank.py
@@ -104,7 +104,8 @@ def ranking_demo(args: argparse.Namespace) -> None:
    qid_test = qid_test[sorted_idx]

    ranker = xgb.XGBRanker(
-        tree_method="gpu_hist",
+        tree_method="hist",
+        device="cuda",
        lambdarank_pair_method="topk",
        lambdarank_num_pair_per_sample=13,
        eval_metric=["ndcg@1", "ndcg@8"],
@@ -161,7 +162,8 @@ def click_data_demo(args: argparse.Namespace) -> None:

    ranker = xgb.XGBRanker(
        n_estimators=512,
-        tree_method="gpu_hist",
+        tree_method="hist",
+        device="cuda",
        learning_rate=0.01,
        reg_lambda=1.5,
        subsample=0.8,
--- a/demo/guide-python/quantile_data_iterator.py
+++ b/demo/guide-python/quantile_data_iterator.py
@@ -23,22 +23,23 @@ import numpy
 import xgboost

 COLS = 64
-ROWS_PER_BATCH = 1000            # data is splited by rows
+ROWS_PER_BATCH = 1000  # data is splited by rows
 BATCHES = 32


 class IterForDMatrixDemo(xgboost.core.DataIter):
-    '''A data iterator for XGBoost DMatrix.
+    """A data iterator for XGBoost DMatrix.

    `reset` and `next` are required for any data iterator, other functions here
    are utilites for demonstration's purpose.

-    '''
+    """
+
    def __init__(self):
-        '''Generate some random data for demostration.
+        """Generate some random data for demostration.

        Actual data can be anything that is currently supported by XGBoost.
-        '''
+        """
        self.rows = ROWS_PER_BATCH
        self.cols = COLS
        rng = cupy.random.RandomState(1994)
@@ -46,7 +47,7 @@ class IterForDMatrixDemo(xgboost.core.DataIter):
        self._labels = [rng.randn(self.rows)] * BATCHES
        self._weights = [rng.uniform(size=self.rows)] * BATCHES

-        self.it = 0             # set iterator to 0
+        self.it = 0  # set iterator to 0
        super().__init__()

    def as_array(self):
@@ -59,27 +60,26 @@ class IterForDMatrixDemo(xgboost.core.DataIter):
        return cupy.concatenate(self._weights)

    def data(self):
-        '''Utility function for obtaining current batch of data.'''
+        """Utility function for obtaining current batch of data."""
        return self._data[self.it]

    def labels(self):
-        '''Utility function for obtaining current batch of label.'''
+        """Utility function for obtaining current batch of label."""
        return self._labels[self.it]

    def weights(self):
        return self._weights[self.it]

    def reset(self):
-        '''Reset the iterator'''
+        """Reset the iterator"""
        self.it = 0

    def next(self, input_data):
-        '''Yield next batch of data.'''
+        """Yield next batch of data."""
        if self.it == len(self._data):
            # Return 0 when there's no more batch.
            return 0
-        input_data(data=self.data(), label=self.labels(),
-                   weight=self.weights())
+        input_data(data=self.data(), label=self.labels(), weight=self.weights())
        self.it += 1
        return 1

@@ -103,18 +103,19 @@ def main():

    assert m_with_it.num_col() == m.num_col()
    assert m_with_it.num_row() == m.num_row()
-    # Tree meethod must be one of the `hist` or `gpu_hist`. We use `gpu_hist` for GPU
-    # input here.
+    # Tree meethod must be `hist`.
    reg_with_it = xgboost.train(
-        {"tree_method": "gpu_hist"}, m_with_it, num_boost_round=rounds
+        {"tree_method": "hist", "device": "cuda"}, m_with_it, num_boost_round=rounds
    )
    predict_with_it = reg_with_it.predict(m_with_it)

-    reg = xgboost.train({"tree_method": "gpu_hist"}, m, num_boost_round=rounds)
+    reg = xgboost.train(
+        {"tree_method": "hist", "device": "cuda"}, m, num_boost_round=rounds
+    )
    predict = reg.predict(m)

    numpy.testing.assert_allclose(predict_with_it, predict, rtol=1e6)


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/demo/guide-python/update_process.py
+++ b/demo/guide-python/update_process.py
@@ -24,7 +24,7 @@ def main():
    Xy = xgb.DMatrix(X_train, y_train)
    evals_result: xgb.callback.EvaluationMonitor.EvalsLog = {}
    booster = xgb.train(
-        {"tree_method": "gpu_hist", "max_depth": 6},
+        {"tree_method": "hist", "max_depth": 6, "device": "cuda"},
        Xy,
        num_boost_round=n_rounds,
        evals=[(Xy, "Train")],
@@ -33,8 +33,8 @@ def main():
    SHAP = booster.predict(Xy, pred_contribs=True)

    # Refresh the leaf value and tree statistic
-    X_refresh = X[X.shape[0] // 2:]
-    y_refresh = y[y.shape[0] // 2:]
+    X_refresh = X[X.shape[0] // 2 :]
+    y_refresh = y[y.shape[0] // 2 :]
    Xy_refresh = xgb.DMatrix(X_refresh, y_refresh)
    # The model will adapt to other half of the data by changing leaf value (no change in
    # split condition) with refresh_leaf set to True.
@@ -87,7 +87,7 @@ def main():
    np.testing.assert_allclose(
        np.array(prune_result["Original"]["rmse"]),
        np.array(prune_result["Train"]["rmse"]),
-        atol=1e-5
+        atol=1e-5,
    )