[doc] Add typing to dask demos. (#10207)

2024-04-23 00:57:05 +08:00 · 2024-04-23 00:57:05 +08:00 · 59d7b8dc72
commit 59d7b8dc72
parent 3fbb221fec
6 changed files with 17 additions and 9 deletions
--- a/demo/dask/cpu_survival.py
+++ b/demo/dask/cpu_survival.py
@ -6,6 +6,7 @@ Example of training survival model with Dask on CPU

 import os

+import dask.array as da
 import dask.dataframe as dd
 from dask.distributed import Client, LocalCluster

@ -13,7 +14,7 @@ from xgboost import dask as dxgb
 from xgboost.dask import DaskDMatrix


-def main(client):
+def main(client: Client) -> da.Array:
    # Load an example survival data from CSV into a Dask data frame.
    # The Veterans' Administration Lung Cancer Trial
    # The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980)
--- a/demo/dask/cpu_training.py
+++ b/demo/dask/cpu_training.py
@ -11,7 +11,7 @@ from xgboost import dask as dxgb
 from xgboost.dask import DaskDMatrix


-def main(client):
+def main(client: Client) -> None:
    # generate some random data for demonstration
    m = 100000
    n = 100
--- a/demo/dask/dask_callbacks.py
+++ b/demo/dask/dask_callbacks.py
@ -3,6 +3,8 @@ Example of using callbacks with Dask
 ====================================
 """

+from typing import Any
+
 import numpy as np
 from dask.distributed import Client, LocalCluster
 from dask_ml.datasets import make_regression
@ -13,7 +15,7 @@ import xgboost.dask as dxgb
 from xgboost.dask import DaskDMatrix


-def probability_for_going_backward(epoch):
+def probability_for_going_backward(epoch: int) -> float:
    return 0.999 / (1.0 + 0.05 * np.log(1.0 + epoch))


@ -23,7 +25,9 @@ class CustomEarlyStopping(xgb.callback.TrainingCallback):
    In the beginning, allow the metric to become worse with a probability of 0.999.
    As boosting progresses, the probability should be adjusted downward"""

-    def __init__(self, *, validation_set, target_metric, maximize, seed):
+    def __init__(
+        self, *, validation_set: str, target_metric: str, maximize: bool, seed: int
+    ) -> None:
        self.validation_set = validation_set
        self.target_metric = target_metric
        self.maximize = maximize
@ -34,7 +38,9 @@ class CustomEarlyStopping(xgb.callback.TrainingCallback):
        else:
            self.better = lambda x, y: x < y

-    def after_iteration(self, model, epoch, evals_log):
+    def after_iteration(
+        self, model: Any, epoch: int, evals_log: xgb.callback.TrainingCallback.EvalsLog
+    ) -> bool:
        metric_history = evals_log[self.validation_set][self.target_metric]
        if len(metric_history) < 2 or self.better(
            metric_history[-1], metric_history[-2]
@ -42,7 +48,7 @@ class CustomEarlyStopping(xgb.callback.TrainingCallback):
            return False  # continue training
        p = probability_for_going_backward(epoch)
        go_backward = self.rng.choice(2, size=(1,), replace=True, p=[1 - p, p]).astype(
-            np.bool
+            np.bool_
        )[0]
        print(
            "The validation metric went into the wrong direction. "
@ -54,7 +60,7 @@ class CustomEarlyStopping(xgb.callback.TrainingCallback):
            return True  # stop training


-def main(client):
+def main(client: Client) -> None:
    m = 100000
    n = 100
    X, y = make_regression(n_samples=m, n_features=n, chunks=200, random_state=0)
--- a/demo/dask/sklearn_cpu_training.py
+++ b/demo/dask/sklearn_cpu_training.py
@ -9,7 +9,7 @@ from dask.distributed import Client, LocalCluster
 from xgboost import dask as dxgb


-def main(client):
+def main(client: Client) -> dxgb.Booster:
    # generate some random data for demonstration
    n = 100
    m = 10000
--- a/demo/dask/sklearn_gpu_training.py
+++ b/demo/dask/sklearn_gpu_training.py
@ -12,7 +12,7 @@ from dask_cuda import LocalCUDACluster
 from xgboost import dask as dxgb


-def main(client):
+def main(client: Client) -> dxgb.Booster:
    # generate some random data for demonstration
    n = 100
    m = 1000000
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@ -98,6 +98,7 @@ class LintersPaths:
        "tests/test_distributed/test_gpu_with_spark/test_data.py",
        "tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",
        # demo
+        "demo/dask/",
        "demo/json-model/json_parser.py",
        "demo/guide-python/external_memory.py",
        "demo/guide-python/sklearn_examples.py",