Merge branch 'master' into sync-2024Jan24

2024-02-01 14:41:48 -08:00
parent 2cb579ff3c 662854c7d7
commit 44db1cef54
99 changed files with 2476 additions and 283 deletions
--- a/demo/aft_survival/aft_survival_viz_demo.py
+++ b/demo/aft_survival/aft_survival_viz_demo.py
@@ -6,6 +6,7 @@ This demo uses 1D toy data and visualizes how XGBoost fits a tree ensemble. The
 model starts out as a flat line and evolves into a step function in order to account for
 all ranged labels.
 """
+
 import matplotlib.pyplot as plt
 import numpy as np

--- a/demo/dask/cpu_training.py
+++ b/demo/dask/cpu_training.py
@@ -3,6 +3,7 @@ Example of training with Dask on CPU
 ====================================

 """
+
 from dask import array as da
 from dask.distributed import Client, LocalCluster

@@ -14,8 +15,9 @@ def main(client):
    # generate some random data for demonstration
    m = 100000
    n = 100
-    X = da.random.random(size=(m, n), chunks=100)
-    y = da.random.random(size=(m,), chunks=100)
+    rng = da.random.default_rng(1)
+    X = rng.normal(size=(m, n))
+    y = X.sum(axis=1)

    # DaskDMatrix acts like normal DMatrix, works as a proxy for local
    # DMatrix scatter around workers.
--- a/demo/dask/dask_callbacks.py
+++ b/demo/dask/dask_callbacks.py
@@ -2,6 +2,7 @@
 Example of using callbacks with Dask
 ====================================
 """
+
 import numpy as np
 from dask.distributed import Client, LocalCluster
 from dask_ml.datasets import make_regression
--- a/demo/dask/gpu_training.py
+++ b/demo/dask/gpu_training.py
@@ -2,6 +2,8 @@
 Example of training with Dask on GPU
 ====================================
 """
+
+import cupy as cp
 import dask_cudf
 from dask import array as da
 from dask import dataframe as dd
@@ -72,10 +74,12 @@ if __name__ == "__main__":
    with LocalCUDACluster(n_workers=2, threads_per_worker=4) as cluster:
        with Client(cluster) as client:
            # generate some random data for demonstration
+            rng = da.random.default_rng(1)
+
            m = 100000
            n = 100
-            X = da.random.random(size=(m, n), chunks=10000)
-            y = da.random.random(size=(m,), chunks=10000)
+            X = rng.normal(size=(m, n))
+            y = X.sum(axis=1)

            print("Using DaskQuantileDMatrix")
            from_ddqdm = using_quantile_device_dmatrix(client, X, y)
--- a/demo/dask/sklearn_cpu_training.py
+++ b/demo/dask/sklearn_cpu_training.py
@@ -2,6 +2,7 @@
 Use scikit-learn regressor interface with CPU histogram tree method
 ===================================================================
 """
+
 from dask import array as da
 from dask.distributed import Client, LocalCluster

--- a/demo/guide-python/callbacks.py
+++ b/demo/guide-python/callbacks.py
@@ -4,6 +4,7 @@ Demo for using and defining callback functions

    .. versionadded:: 1.3.0
 """
+
 import argparse
 import os
 import tempfile
--- a/demo/guide-python/cat_pipeline.py
+++ b/demo/guide-python/cat_pipeline.py
@@ -13,6 +13,7 @@ See Also
 - :ref:`sphx_glr_python_examples_cat_in_the_dat.py`

 """
+
 from typing import List, Tuple

 import numpy as np
--- a/demo/guide-python/categorical.py
+++ b/demo/guide-python/categorical.py
@@ -17,6 +17,7 @@ See Also
 - :ref:`sphx_glr_python_examples_cat_pipeline.py`

 """
+
 from typing import Tuple

 import numpy as np
--- a/demo/guide-python/external_memory.py
+++ b/demo/guide-python/external_memory.py
@@ -11,6 +11,7 @@ instead of Quantile DMatrix.  The feature is not ready for production use yet.
 See :doc:`the tutorial </tutorials/external_memory>` for more details.

 """
+
 import os
 import tempfile
 from typing import Callable, List, Tuple
--- a/demo/guide-python/individual_trees.py
+++ b/demo/guide-python/individual_trees.py
@@ -2,6 +2,7 @@
 Demo for prediction using individual trees and model slices
 ===========================================================
 """
+
 import os

 import numpy as np
--- a/demo/guide-python/learning_to_rank.py
+++ b/demo/guide-python/learning_to_rank.py
@@ -15,6 +15,7 @@ position debiasing training.
 For an overview of learning to rank in XGBoost, please see
 :doc:`Learning to Rank </tutorials/learning_to_rank>`.
 """
+
 from __future__ import annotations

 import argparse
--- a/demo/guide-python/quantile_regression.py
+++ b/demo/guide-python/quantile_regression.py
@@ -13,6 +13,7 @@ https://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_qu
    crossing can happen due to limitation in the algorithm.

 """
+
 import argparse
 from typing import Dict

--- a/demo/guide-python/sklearn_examples.py
+++ b/demo/guide-python/sklearn_examples.py
@@ -9,6 +9,7 @@ Created on 1 Apr 2015

@author: Jamie Hall
 """
+
 import pickle

 import numpy as np
--- a/demo/guide-python/sklearn_parallel.py
+++ b/demo/guide-python/sklearn_parallel.py
@@ -2,6 +2,7 @@
 Demo for using xgboost with sklearn
 ===================================
 """
+
 import multiprocessing

 from sklearn.datasets import fetch_california_housing
--- a/demo/guide-python/spark_estimator_examples.py
+++ b/demo/guide-python/spark_estimator_examples.py
@@ -4,6 +4,7 @@ Collection of examples for using xgboost.spark estimator interface

@author: Weichen Xu
 """
+
 import sklearn.datasets
 from pyspark.ml.evaluation import MulticlassClassificationEvaluator, RegressionEvaluator
 from pyspark.ml.linalg import Vectors
--- a/demo/rmm_plugin/rmm_mgpu_with_dask.py
+++ b/demo/rmm_plugin/rmm_mgpu_with_dask.py
@@ -2,6 +2,7 @@
 Using rmm with Dask
 ===================
 """
+
 import dask
 from dask.distributed import Client
 from dask_cuda import LocalCUDACluster
--- a/demo/rmm_plugin/rmm_singlegpu.py
+++ b/demo/rmm_plugin/rmm_singlegpu.py
@@ -2,6 +2,7 @@
 Using rmm on a single node device
 =================================
 """
+
 import rmm
 from sklearn.datasets import make_classification