[doc] Add typing to dask demos. (#10207)

This commit is contained in:
Jiaming Yuan 2024-04-23 00:57:05 +08:00 committed by GitHub
parent 3fbb221fec
commit 59d7b8dc72
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 17 additions and 9 deletions

View File

@ -6,6 +6,7 @@ Example of training survival model with Dask on CPU
import os import os
import dask.array as da
import dask.dataframe as dd import dask.dataframe as dd
from dask.distributed import Client, LocalCluster from dask.distributed import Client, LocalCluster
@ -13,7 +14,7 @@ from xgboost import dask as dxgb
from xgboost.dask import DaskDMatrix from xgboost.dask import DaskDMatrix
def main(client): def main(client: Client) -> da.Array:
# Load an example survival data from CSV into a Dask data frame. # Load an example survival data from CSV into a Dask data frame.
# The Veterans' Administration Lung Cancer Trial # The Veterans' Administration Lung Cancer Trial
# The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980) # The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980)

View File

@ -11,7 +11,7 @@ from xgboost import dask as dxgb
from xgboost.dask import DaskDMatrix from xgboost.dask import DaskDMatrix
def main(client): def main(client: Client) -> None:
# generate some random data for demonstration # generate some random data for demonstration
m = 100000 m = 100000
n = 100 n = 100

View File

@ -3,6 +3,8 @@ Example of using callbacks with Dask
==================================== ====================================
""" """
from typing import Any
import numpy as np import numpy as np
from dask.distributed import Client, LocalCluster from dask.distributed import Client, LocalCluster
from dask_ml.datasets import make_regression from dask_ml.datasets import make_regression
@ -13,7 +15,7 @@ import xgboost.dask as dxgb
from xgboost.dask import DaskDMatrix from xgboost.dask import DaskDMatrix
def probability_for_going_backward(epoch): def probability_for_going_backward(epoch: int) -> float:
return 0.999 / (1.0 + 0.05 * np.log(1.0 + epoch)) return 0.999 / (1.0 + 0.05 * np.log(1.0 + epoch))
@ -23,7 +25,9 @@ class CustomEarlyStopping(xgb.callback.TrainingCallback):
In the beginning, allow the metric to become worse with a probability of 0.999. In the beginning, allow the metric to become worse with a probability of 0.999.
As boosting progresses, the probability should be adjusted downward""" As boosting progresses, the probability should be adjusted downward"""
def __init__(self, *, validation_set, target_metric, maximize, seed): def __init__(
self, *, validation_set: str, target_metric: str, maximize: bool, seed: int
) -> None:
self.validation_set = validation_set self.validation_set = validation_set
self.target_metric = target_metric self.target_metric = target_metric
self.maximize = maximize self.maximize = maximize
@ -34,7 +38,9 @@ class CustomEarlyStopping(xgb.callback.TrainingCallback):
else: else:
self.better = lambda x, y: x < y self.better = lambda x, y: x < y
def after_iteration(self, model, epoch, evals_log): def after_iteration(
self, model: Any, epoch: int, evals_log: xgb.callback.TrainingCallback.EvalsLog
) -> bool:
metric_history = evals_log[self.validation_set][self.target_metric] metric_history = evals_log[self.validation_set][self.target_metric]
if len(metric_history) < 2 or self.better( if len(metric_history) < 2 or self.better(
metric_history[-1], metric_history[-2] metric_history[-1], metric_history[-2]
@ -42,7 +48,7 @@ class CustomEarlyStopping(xgb.callback.TrainingCallback):
return False # continue training return False # continue training
p = probability_for_going_backward(epoch) p = probability_for_going_backward(epoch)
go_backward = self.rng.choice(2, size=(1,), replace=True, p=[1 - p, p]).astype( go_backward = self.rng.choice(2, size=(1,), replace=True, p=[1 - p, p]).astype(
np.bool np.bool_
)[0] )[0]
print( print(
"The validation metric went into the wrong direction. " "The validation metric went into the wrong direction. "
@ -54,7 +60,7 @@ class CustomEarlyStopping(xgb.callback.TrainingCallback):
return True # stop training return True # stop training
def main(client): def main(client: Client) -> None:
m = 100000 m = 100000
n = 100 n = 100
X, y = make_regression(n_samples=m, n_features=n, chunks=200, random_state=0) X, y = make_regression(n_samples=m, n_features=n, chunks=200, random_state=0)

View File

@ -9,7 +9,7 @@ from dask.distributed import Client, LocalCluster
from xgboost import dask as dxgb from xgboost import dask as dxgb
def main(client): def main(client: Client) -> dxgb.Booster:
# generate some random data for demonstration # generate some random data for demonstration
n = 100 n = 100
m = 10000 m = 10000

View File

@ -12,7 +12,7 @@ from dask_cuda import LocalCUDACluster
from xgboost import dask as dxgb from xgboost import dask as dxgb
def main(client): def main(client: Client) -> dxgb.Booster:
# generate some random data for demonstration # generate some random data for demonstration
n = 100 n = 100
m = 1000000 m = 1000000

View File

@ -98,6 +98,7 @@ class LintersPaths:
"tests/test_distributed/test_gpu_with_spark/test_data.py", "tests/test_distributed/test_gpu_with_spark/test_data.py",
"tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py", "tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",
# demo # demo
"demo/dask/",
"demo/json-model/json_parser.py", "demo/json-model/json_parser.py",
"demo/guide-python/external_memory.py", "demo/guide-python/external_memory.py",
"demo/guide-python/sklearn_examples.py", "demo/guide-python/sklearn_examples.py",