""" Use scikit-learn regressor interface with GPU histogram tree method =================================================================== """ from dask import array as da from dask.distributed import Client # It's recommended to use dask_cuda for GPU assignment from dask_cuda import LocalCUDACluster from xgboost import dask as dxgb def main(client: Client) -> dxgb.Booster: # generate some random data for demonstration n = 100 m = 1000000 partition_size = 10000 X = da.random.random((m, n), partition_size) y = da.random.random(m, partition_size) regressor = dxgb.DaskXGBRegressor(verbosity=1) # set the device to CUDA regressor.set_params(tree_method="hist", device="cuda") # assigning client here is optional regressor.client = client regressor.fit(X, y, eval_set=[(X, y)]) prediction = regressor.predict(X) bst = regressor.get_booster() history = regressor.evals_result() print("Evaluation history:", history) # returned prediction is always a dask array. assert isinstance(prediction, da.Array) return bst # returning the trained model if __name__ == "__main__": # With dask cuda, one can scale up XGBoost to arbitrary GPU clusters. # `LocalCUDACluster` used here is only for demonstration purpose. with LocalCUDACluster() as cluster: with Client(cluster) as client: main(client)