diff --git a/demo/CLI/binary_classification/mknfold.py b/demo/CLI/binary_classification/mknfold.py index f5e237e36..3f178e055 100755 --- a/demo/CLI/binary_classification/mknfold.py +++ b/demo/CLI/binary_classification/mknfold.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 -import sys import random +import sys if len(sys.argv) < 2: print ('Usage: [nfold = 5]') diff --git a/demo/CLI/regression/mknfold.py b/demo/CLI/regression/mknfold.py index 3e11934d8..14b5ab4e9 100755 --- a/demo/CLI/regression/mknfold.py +++ b/demo/CLI/regression/mknfold.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 -import sys import random +import sys if len(sys.argv) < 2: print('Usage: [nfold = 5]') diff --git a/demo/CLI/yearpredMSD/csv2libsvm.py b/demo/CLI/yearpredMSD/csv2libsvm.py index 0f763501c..ead362ae2 100755 --- a/demo/CLI/yearpredMSD/csv2libsvm.py +++ b/demo/CLI/yearpredMSD/csv2libsvm.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import sys + fo = open(sys.argv[2], 'w') for l in open(sys.argv[1]): diff --git a/demo/aft_survival/aft_survival_demo.py b/demo/aft_survival/aft_survival_demo.py index 7046548b3..93359c835 100644 --- a/demo/aft_survival/aft_survival_demo.py +++ b/demo/aft_survival/aft_survival_demo.py @@ -6,9 +6,11 @@ Demo for survival analysis (regression). using Accelerated Failure Time (AFT) mo """ import os -from sklearn.model_selection import ShuffleSplit -import pandas as pd + import numpy as np +import pandas as pd +from sklearn.model_selection import ShuffleSplit + import xgboost as xgb # The Veterans' Administration Lung Cancer Trial diff --git a/demo/aft_survival/aft_survival_demo_with_optuna.py b/demo/aft_survival/aft_survival_demo_with_optuna.py index a6cf2aaf6..11c1d32f6 100644 --- a/demo/aft_survival/aft_survival_demo_with_optuna.py +++ b/demo/aft_survival/aft_survival_demo_with_optuna.py @@ -6,11 +6,12 @@ Demo for survival analysis (regression) using Accelerated Failure Time (AFT) mod using Optuna to tune hyperparameters """ -from sklearn.model_selection import ShuffleSplit -import pandas as pd import numpy as np -import xgboost as xgb import optuna +import pandas as pd +from sklearn.model_selection import ShuffleSplit + +import xgboost as xgb # The Veterans' Administration Lung Cancer Trial # The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980) diff --git a/demo/aft_survival/aft_survival_viz_demo.py b/demo/aft_survival/aft_survival_viz_demo.py index beb0db40c..a17c55edf 100644 --- a/demo/aft_survival/aft_survival_viz_demo.py +++ b/demo/aft_survival/aft_survival_viz_demo.py @@ -6,9 +6,10 @@ This demo uses 1D toy data and visualizes how XGBoost fits a tree ensemble. The model starts out as a flat line and evolves into a step function in order to account for all ranged labels. """ -import numpy as np -import xgboost as xgb import matplotlib.pyplot as plt +import numpy as np + +import xgboost as xgb plt.rcParams.update({'font.size': 13}) diff --git a/demo/dask/cpu_survival.py b/demo/dask/cpu_survival.py index c79f7d96c..629667b12 100644 --- a/demo/dask/cpu_survival.py +++ b/demo/dask/cpu_survival.py @@ -4,12 +4,14 @@ Example of training survival model with Dask on CPU """ -import xgboost as xgb import os -from xgboost.dask import DaskDMatrix + import dask.dataframe as dd -from dask.distributed import Client -from dask.distributed import LocalCluster +from dask.distributed import Client, LocalCluster +from xgboost.dask import DaskDMatrix + +import xgboost as xgb + def main(client): # Load an example survival data from CSV into a Dask data frame. diff --git a/demo/dask/cpu_training.py b/demo/dask/cpu_training.py index 6ee91dafa..7fc5d2d1c 100644 --- a/demo/dask/cpu_training.py +++ b/demo/dask/cpu_training.py @@ -3,11 +3,11 @@ Example of training with Dask on CPU ==================================== """ -import xgboost as xgb -from xgboost.dask import DaskDMatrix -from dask.distributed import Client -from dask.distributed import LocalCluster from dask import array as da +from dask.distributed import Client, LocalCluster +from xgboost.dask import DaskDMatrix + +import xgboost as xgb def main(client): diff --git a/demo/dask/dask_callbacks.py b/demo/dask/dask_callbacks.py index 64d7b0f28..a80ede01f 100644 --- a/demo/dask/dask_callbacks.py +++ b/demo/dask/dask_callbacks.py @@ -3,12 +3,12 @@ Example of using callbacks with Dask ==================================== """ import numpy as np -import xgboost as xgb -from xgboost.dask import DaskDMatrix -from dask.distributed import Client -from dask.distributed import LocalCluster +from dask.distributed import Client, LocalCluster from dask_ml.datasets import make_regression from dask_ml.model_selection import train_test_split +from xgboost.dask import DaskDMatrix + +import xgboost as xgb def probability_for_going_backward(epoch): diff --git a/demo/dask/gpu_training.py b/demo/dask/gpu_training.py index 34c6e824f..a3effb801 100644 --- a/demo/dask/gpu_training.py +++ b/demo/dask/gpu_training.py @@ -2,14 +2,15 @@ Example of training with Dask on GPU ==================================== """ -from dask_cuda import LocalCUDACluster import dask_cudf -from dask.distributed import Client from dask import array as da from dask import dataframe as dd +from dask.distributed import Client +from dask_cuda import LocalCUDACluster +from xgboost.dask import DaskDMatrix + import xgboost as xgb from xgboost import dask as dxgb -from xgboost.dask import DaskDMatrix def using_dask_matrix(client: Client, X, y): diff --git a/demo/dask/sklearn_cpu_training.py b/demo/dask/sklearn_cpu_training.py index 69f5dc788..12d55493c 100644 --- a/demo/dask/sklearn_cpu_training.py +++ b/demo/dask/sklearn_cpu_training.py @@ -2,9 +2,9 @@ Use scikit-learn regressor interface with CPU histogram tree method =================================================================== """ -from dask.distributed import Client -from dask.distributed import LocalCluster from dask import array as da +from dask.distributed import Client, LocalCluster + import xgboost diff --git a/demo/dask/sklearn_gpu_training.py b/demo/dask/sklearn_gpu_training.py index 3031d9705..4c544e4e8 100644 --- a/demo/dask/sklearn_gpu_training.py +++ b/demo/dask/sklearn_gpu_training.py @@ -3,10 +3,12 @@ Use scikit-learn regressor interface with GPU histogram tree method =================================================================== """ +from dask import array as da from dask.distributed import Client + # It's recommended to use dask_cuda for GPU assignment from dask_cuda import LocalCUDACluster -from dask import array as da + import xgboost diff --git a/demo/gpu_acceleration/cover_type.py b/demo/gpu_acceleration/cover_type.py index 8e44a3ddc..1f2322d05 100644 --- a/demo/gpu_acceleration/cover_type.py +++ b/demo/gpu_acceleration/cover_type.py @@ -1,7 +1,9 @@ -import xgboost as xgb +import time + from sklearn.datasets import fetch_covtype from sklearn.model_selection import train_test_split -import time + +import xgboost as xgb # Fetch dataset using sklearn cov = fetch_covtype() diff --git a/demo/guide-python/basic_walkthrough.py b/demo/guide-python/basic_walkthrough.py index 06c9fac60..90318f5fe 100644 --- a/demo/guide-python/basic_walkthrough.py +++ b/demo/guide-python/basic_walkthrough.py @@ -9,13 +9,14 @@ interfaces in the Python package like scikit-learn interface and Dask interface. See :doc:`/python/python_intro` and :doc:`/tutorials/index` for other references. """ -import numpy as np -import pickle -import xgboost as xgb import os +import pickle +import numpy as np from sklearn.datasets import load_svmlight_file +import xgboost as xgb + # Make sure the demo knows where to load the data. CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) XGBOOST_ROOT_DIR = os.path.dirname(os.path.dirname(CURRENT_DIR)) diff --git a/demo/guide-python/boost_from_prediction.py b/demo/guide-python/boost_from_prediction.py index 0be021725..53a45549a 100644 --- a/demo/guide-python/boost_from_prediction.py +++ b/demo/guide-python/boost_from_prediction.py @@ -3,8 +3,8 @@ Demo for boosting from prediction ================================= """ import os -import xgboost as xgb +import xgboost as xgb CURRENT_DIR = os.path.dirname(__file__) dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) diff --git a/demo/guide-python/callbacks.py b/demo/guide-python/callbacks.py index b2d1afb74..817a65939 100644 --- a/demo/guide-python/callbacks.py +++ b/demo/guide-python/callbacks.py @@ -4,14 +4,16 @@ Demo for using and defining callback functions .. versionadded:: 1.3.0 ''' -import xgboost as xgb -import tempfile +import argparse import os +import tempfile + import numpy as np +from matplotlib import pyplot as plt from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split -from matplotlib import pyplot as plt -import argparse + +import xgboost as xgb class Plotting(xgb.callback.TrainingCallback): diff --git a/demo/guide-python/continuation.py b/demo/guide-python/continuation.py index 5cddc3108..84afc3710 100644 --- a/demo/guide-python/continuation.py +++ b/demo/guide-python/continuation.py @@ -3,11 +3,13 @@ Demo for training continuation ============================== """ -from sklearn.datasets import load_breast_cancer -import xgboost +import os import pickle import tempfile -import os + +from sklearn.datasets import load_breast_cancer + +import xgboost def training_continuation(tmpdir: str, use_pickle: bool) -> None: diff --git a/demo/guide-python/cross_validation.py b/demo/guide-python/cross_validation.py index 2ca3f0201..2565b02c9 100644 --- a/demo/guide-python/cross_validation.py +++ b/demo/guide-python/cross_validation.py @@ -3,7 +3,9 @@ Demo for using cross validation =============================== """ import os + import numpy as np + import xgboost as xgb # load data in do training diff --git a/demo/guide-python/custom_rmsle.py b/demo/guide-python/custom_rmsle.py index bc21f9022..b4a7d94ec 100644 --- a/demo/guide-python/custom_rmsle.py +++ b/demo/guide-python/custom_rmsle.py @@ -14,14 +14,16 @@ The `SLE` objective reduces impact of outliers in training dataset, hence here w compare its performance with standard squared error. """ -import numpy as np -import xgboost as xgb -from typing import Tuple, Dict, List -from time import time import argparse +from time import time +from typing import Dict, List, Tuple + import matplotlib +import numpy as np from matplotlib import pyplot as plt +import xgboost as xgb + # shape of generated data. kRows = 4096 kCols = 16 diff --git a/demo/guide-python/custom_softmax.py b/demo/guide-python/custom_softmax.py index e7064f463..153c5d43b 100644 --- a/demo/guide-python/custom_softmax.py +++ b/demo/guide-python/custom_softmax.py @@ -10,11 +10,13 @@ See :doc:`/tutorials/custom_metric_obj` for detailed tutorial and notes. ''' -import numpy as np -import xgboost as xgb -from matplotlib import pyplot as plt import argparse +import numpy as np +from matplotlib import pyplot as plt + +import xgboost as xgb + np.random.seed(1994) kRows = 100 diff --git a/demo/guide-python/evals_result.py b/demo/guide-python/evals_result.py index bb4f44a9f..bba8862f5 100644 --- a/demo/guide-python/evals_result.py +++ b/demo/guide-python/evals_result.py @@ -3,6 +3,7 @@ This script demonstrate how to access the eval metrics ====================================================== """ import os + import xgboost as xgb CURRENT_DIR = os.path.dirname(__file__) diff --git a/demo/guide-python/external_memory.py b/demo/guide-python/external_memory.py index 703ee8f6c..cc5527611 100644 --- a/demo/guide-python/external_memory.py +++ b/demo/guide-python/external_memory.py @@ -12,11 +12,13 @@ See :doc:`the tutorial ` for more details. """ import os -import xgboost -from typing import Callable, List, Tuple -from sklearn.datasets import make_regression import tempfile +from typing import Callable, List, Tuple + import numpy as np +from sklearn.datasets import make_regression + +import xgboost def make_batches( diff --git a/demo/guide-python/feature_weights.py b/demo/guide-python/feature_weights.py index 34c8ed440..6e761d300 100644 --- a/demo/guide-python/feature_weights.py +++ b/demo/guide-python/feature_weights.py @@ -5,11 +5,13 @@ Demo for using feature weight to change column sampling .. versionadded:: 1.3.0 ''' -import numpy as np -import xgboost -from matplotlib import pyplot as plt import argparse +import numpy as np +from matplotlib import pyplot as plt + +import xgboost + def main(args): rng = np.random.RandomState(1994) diff --git a/demo/guide-python/gamma_regression.py b/demo/guide-python/gamma_regression.py index 28b71a5d0..74d256990 100644 --- a/demo/guide-python/gamma_regression.py +++ b/demo/guide-python/gamma_regression.py @@ -2,9 +2,10 @@ Demo for gamma regression ========================= """ -import xgboost as xgb import numpy as np +import xgboost as xgb + # this script demonstrates how to fit gamma regression model (with log link function) # in xgboost, before running the demo you need to generate the autoclaims dataset # by running gen_autoclaims.R located in xgboost/demo/data. diff --git a/demo/guide-python/generalized_linear_model.py b/demo/guide-python/generalized_linear_model.py index f409fb960..976428f13 100644 --- a/demo/guide-python/generalized_linear_model.py +++ b/demo/guide-python/generalized_linear_model.py @@ -3,7 +3,9 @@ Demo for GLM ============ """ import os + import xgboost as xgb + ## # this script demonstrate how to fit generalized linear model in xgboost # basically, we are using linear model, instead of tree for our boosters diff --git a/demo/guide-python/multioutput_regression.py b/demo/guide-python/multioutput_regression.py index 0de03fb12..375377e4e 100644 --- a/demo/guide-python/multioutput_regression.py +++ b/demo/guide-python/multioutput_regression.py @@ -10,10 +10,11 @@ See :doc:`/tutorials/multioutput` for more information. """ import argparse -from typing import Dict, Tuple, List +from typing import Dict, List, Tuple import numpy as np from matplotlib import pyplot as plt + import xgboost as xgb diff --git a/demo/guide-python/predict_first_ntree.py b/demo/guide-python/predict_first_ntree.py index fb7837728..55f7c61af 100644 --- a/demo/guide-python/predict_first_ntree.py +++ b/demo/guide-python/predict_first_ntree.py @@ -3,10 +3,12 @@ Demo for prediction using number of trees ========================================= """ import os + import numpy as np -import xgboost as xgb from sklearn.datasets import load_svmlight_file +import xgboost as xgb + CURRENT_DIR = os.path.dirname(__file__) train = os.path.join(CURRENT_DIR, "../data/agaricus.txt.train") test = os.path.join(CURRENT_DIR, "../data/agaricus.txt.test") diff --git a/demo/guide-python/predict_leaf_indices.py b/demo/guide-python/predict_leaf_indices.py index 23b96a752..45cc8fa7f 100644 --- a/demo/guide-python/predict_leaf_indices.py +++ b/demo/guide-python/predict_leaf_indices.py @@ -3,6 +3,7 @@ Demo for obtaining leaf index ============================= """ import os + import xgboost as xgb # load data in do training diff --git a/demo/guide-python/quantile_data_iterator.py b/demo/guide-python/quantile_data_iterator.py index 292cd127e..68daa999d 100644 --- a/demo/guide-python/quantile_data_iterator.py +++ b/demo/guide-python/quantile_data_iterator.py @@ -17,10 +17,11 @@ using `itertools.tee` might incur significant memory usage according to: ''' -import xgboost import cupy import numpy +import xgboost + COLS = 64 ROWS_PER_BATCH = 1000 # data is splited by rows BATCHES = 32 diff --git a/demo/guide-python/sklearn_evals_result.py b/demo/guide-python/sklearn_evals_result.py index c20328adb..9aed58500 100644 --- a/demo/guide-python/sklearn_evals_result.py +++ b/demo/guide-python/sklearn_evals_result.py @@ -3,10 +3,11 @@ Demo for accessing the xgboost eval metrics by using sklearn interface ====================================================================== """ -import xgboost as xgb import numpy as np from sklearn.datasets import make_hastie_10_2 +import xgboost as xgb + X, y = make_hastie_10_2(n_samples=2000, random_state=42) # Map labels from {-1, 1} to {0, 1} diff --git a/demo/guide-python/sklearn_examples.py b/demo/guide-python/sklearn_examples.py index b234da175..5890987f9 100644 --- a/demo/guide-python/sklearn_examples.py +++ b/demo/guide-python/sklearn_examples.py @@ -7,12 +7,13 @@ Created on 1 Apr 2015 @author: Jamie Hall ''' import pickle -import xgboost as xgb import numpy as np -from sklearn.model_selection import KFold, train_test_split, GridSearchCV +from sklearn.datasets import fetch_california_housing, load_digits, load_iris from sklearn.metrics import confusion_matrix, mean_squared_error -from sklearn.datasets import load_iris, load_digits, fetch_california_housing +from sklearn.model_selection import GridSearchCV, KFold, train_test_split + +import xgboost as xgb rng = np.random.RandomState(31337) diff --git a/demo/guide-python/sklearn_parallel.py b/demo/guide-python/sklearn_parallel.py index c65fd7c22..b0fc49d81 100644 --- a/demo/guide-python/sklearn_parallel.py +++ b/demo/guide-python/sklearn_parallel.py @@ -2,11 +2,13 @@ Demo for using xgboost with sklearn =================================== """ -from sklearn.model_selection import GridSearchCV -from sklearn.datasets import fetch_california_housing -import xgboost as xgb import multiprocessing +from sklearn.datasets import fetch_california_housing +from sklearn.model_selection import GridSearchCV + +import xgboost as xgb + if __name__ == "__main__": print("Parallel Parameter optimization") X, y = fetch_california_housing(return_X_y=True) diff --git a/demo/guide-python/update_process.py b/demo/guide-python/update_process.py index 907399fcf..77e0dc870 100644 --- a/demo/guide-python/update_process.py +++ b/demo/guide-python/update_process.py @@ -7,9 +7,10 @@ experiment. """ -import xgboost as xgb -from sklearn.datasets import fetch_california_housing import numpy as np +from sklearn.datasets import fetch_california_housing + +import xgboost as xgb def main(): diff --git a/demo/kaggle-higgs/higgs-cv.py b/demo/kaggle-higgs/higgs-cv.py index fe954e256..75b8202df 100755 --- a/demo/kaggle-higgs/higgs-cv.py +++ b/demo/kaggle-higgs/higgs-cv.py @@ -1,5 +1,6 @@ #!/usr/bin/python import numpy as np + import xgboost as xgb ### load data in do training diff --git a/demo/kaggle-higgs/higgs-pred.py b/demo/kaggle-higgs/higgs-pred.py index 4da3427d9..c14a8a94c 100755 --- a/demo/kaggle-higgs/higgs-pred.py +++ b/demo/kaggle-higgs/higgs-pred.py @@ -1,6 +1,7 @@ #!/usr/bin/python # make prediction import numpy as np + import xgboost as xgb # path to where the data lies diff --git a/demo/kaggle-higgs/speedtest.py b/demo/kaggle-higgs/speedtest.py index 04f45ab89..be101e8b5 100755 --- a/demo/kaggle-higgs/speedtest.py +++ b/demo/kaggle-higgs/speedtest.py @@ -1,9 +1,12 @@ #!/usr/bin/python # this is the example script to use xgboost to train -import numpy as np -import xgboost as xgb -from sklearn.ensemble import GradientBoostingClassifier import time + +import numpy as np +from sklearn.ensemble import GradientBoostingClassifier + +import xgboost as xgb + test_size = 550000 # path to where the data lies diff --git a/demo/multiclass_classification/train.py b/demo/multiclass_classification/train.py index 9f1721dfc..a261c20a2 100755 --- a/demo/multiclass_classification/train.py +++ b/demo/multiclass_classification/train.py @@ -3,6 +3,7 @@ from __future__ import division import numpy as np + import xgboost as xgb # label need to be 0 to num_class -1 diff --git a/demo/nvflare/custom/controller.py b/demo/nvflare/custom/controller.py index 989a405bb..ae2933ad8 100644 --- a/demo/nvflare/custom/controller.py +++ b/demo/nvflare/custom/controller.py @@ -10,7 +10,6 @@ from nvflare.apis.fl_context import FLContext from nvflare.apis.impl.controller import Controller, Task from nvflare.apis.shareable import Shareable from nvflare.apis.signal import Signal - from trainer import SupportedTasks diff --git a/demo/nvflare/custom/trainer.py b/demo/nvflare/custom/trainer.py index fd93ae3a4..4c6dedc90 100644 --- a/demo/nvflare/custom/trainer.py +++ b/demo/nvflare/custom/trainer.py @@ -1,7 +1,7 @@ import os from nvflare.apis.executor import Executor -from nvflare.apis.fl_constant import ReturnCode, FLContextKey +from nvflare.apis.fl_constant import FLContextKey, ReturnCode from nvflare.apis.fl_context import FLContext from nvflare.apis.shareable import Shareable, make_reply from nvflare.apis.signal import Signal diff --git a/demo/rank/rank.py b/demo/rank/rank.py index d19b2c528..57cf04245 100644 --- a/demo/rank/rank.py +++ b/demo/rank/rank.py @@ -1,8 +1,8 @@ #!/usr/bin/python -import xgboost as xgb -from xgboost import DMatrix from sklearn.datasets import load_svmlight_file +import xgboost as xgb +from xgboost import DMatrix # This script demonstrate how to do ranking with xgboost.train x_train, y_train = load_svmlight_file("mq2008.train") diff --git a/demo/rank/rank_sklearn.py b/demo/rank/rank_sklearn.py index 723b8c7d9..fe2635f37 100644 --- a/demo/rank/rank_sklearn.py +++ b/demo/rank/rank_sklearn.py @@ -1,7 +1,8 @@ #!/usr/bin/python -import xgboost as xgb from sklearn.datasets import load_svmlight_file +import xgboost as xgb + # This script demonstrate how to do ranking with XGBRanker x_train, y_train = load_svmlight_file("mq2008.train") x_valid, y_valid = load_svmlight_file("mq2008.vali") diff --git a/demo/rank/trans_data.py b/demo/rank/trans_data.py index aa72276c0..a93cf48ca 100644 --- a/demo/rank/trans_data.py +++ b/demo/rank/trans_data.py @@ -1,5 +1,6 @@ import sys + def save_data(group_data,output_feature,output_group): if len(group_data) == 0: return diff --git a/demo/rmm_plugin/rmm_mgpu_with_dask.py b/demo/rmm_plugin/rmm_mgpu_with_dask.py index 23c1f794e..be2aa83a7 100644 --- a/demo/rmm_plugin/rmm_mgpu_with_dask.py +++ b/demo/rmm_plugin/rmm_mgpu_with_dask.py @@ -1,8 +1,9 @@ -import xgboost as xgb -from sklearn.datasets import make_classification import dask from dask.distributed import Client from dask_cuda import LocalCUDACluster +from sklearn.datasets import make_classification + +import xgboost as xgb def main(client): diff --git a/demo/rmm_plugin/rmm_singlegpu.py b/demo/rmm_plugin/rmm_singlegpu.py index 6b7d1b58c..50d4a7ea3 100644 --- a/demo/rmm_plugin/rmm_singlegpu.py +++ b/demo/rmm_plugin/rmm_singlegpu.py @@ -1,7 +1,8 @@ -import xgboost as xgb import rmm from sklearn.datasets import make_classification +import xgboost as xgb + # Initialize RMM pool allocator rmm.reinitialize(pool_allocator=True) # Optionally force XGBoost to use RMM for all GPU memory allocation, see ./README.md diff --git a/dev/prepare_jvm_release.py b/dev/prepare_jvm_release.py index 371be762c..07769b411 100644 --- a/dev/prepare_jvm_release.py +++ b/dev/prepare_jvm_release.py @@ -1,16 +1,17 @@ -import re -import os -import sys -import platform -import errno import argparse -import subprocess +import errno import glob +import os +import platform +import re import shutil +import subprocess +import sys import tempfile import zipfile -from urllib.request import urlretrieve from contextlib import contextmanager +from urllib.request import urlretrieve + def normpath(path): """Normalize UNIX path to a native path.""" diff --git a/dev/query_contributors.py b/dev/query_contributors.py index 9adb72c97..d57ad3f7c 100644 --- a/dev/query_contributors.py +++ b/dev/query_contributors.py @@ -1,10 +1,11 @@ """Query list of all contributors and reviewers in a release""" -from sh.contrib import git -import sys -import re -import requests import json +import re +import sys + +import requests +from sh.contrib import git if len(sys.argv) != 5: print(f'Usage: {sys.argv[0]} [starting commit/tag] [ending commit/tag] [GitHub username] ' + diff --git a/dev/release-py-r.py b/dev/release-py-r.py index a59b48032..11524927d 100644 --- a/dev/release-py-r.py +++ b/dev/release-py-r.py @@ -2,14 +2,15 @@ tqdm, sh are required to run this script. """ -from urllib.request import urlretrieve import argparse -from typing import List, Optional -from sh.contrib import git -from packaging import version -import subprocess -import tqdm import os +import subprocess +from typing import List, Optional +from urllib.request import urlretrieve + +import tqdm +from packaging import version +from sh.contrib import git # The package building is managed by Jenkins CI. PREFIX = "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/release_" diff --git a/doc/conf.py b/doc/conf.py index bb51a7f8f..65d5ae7e5 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -11,14 +11,15 @@ # # All configuration values have a default; values that are commented out # serve to show the default. -from subprocess import call -from sh.contrib import git -import urllib.request -from urllib.error import HTTPError -import sys -import re import os +import re import subprocess +import sys +import urllib.request +from subprocess import call +from urllib.error import HTTPError + +from sh.contrib import git git_branch = os.getenv('SPHINX_GIT_BRANCH', default=None) if not git_branch: diff --git a/doc/sphinx_util.py b/doc/sphinx_util.py index f557bd9ac..720cd33e5 100644 --- a/doc/sphinx_util.py +++ b/doc/sphinx_util.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- """Helper utility function for customization.""" -import sys import os import subprocess +import sys READTHEDOCS_BUILD = (os.environ.get('READTHEDOCS', None) is not None) diff --git a/python-package/setup.py b/python-package/setup.py index 8a1b1b709..7e4bb7f7d 100644 --- a/python-package/setup.py +++ b/python-package/setup.py @@ -1,13 +1,14 @@ """Setup xgboost package.""" +import logging import os import shutil import subprocess -import logging -from typing import Optional, List import sys from platform import system -from setuptools import setup, find_packages, Extension -from setuptools.command import build_ext, sdist, install_lib, install +from typing import List, Optional + +from setuptools import Extension, find_packages, setup +from setuptools.command import build_ext, install, install_lib, sdist # You can't use `pip install .` as pip copies setup.py to a temporary # directory, parent directory is no longer reachable (isolated build) . diff --git a/python-package/xgboost/callback.py b/python-package/xgboost/callback.py index b1bf882b0..fb04b8038 100644 --- a/python-package/xgboost/callback.py +++ b/python-package/xgboost/callback.py @@ -6,17 +6,28 @@ """ -from abc import ABC import collections import os import pickle -from typing import Callable, List, Optional, Union, Dict, Tuple, TypeVar, cast, Sequence, Any +from abc import ABC +from typing import ( + Any, + Callable, + Dict, + List, + Optional, + Sequence, + Tuple, + TypeVar, + Union, + cast, +) + import numpy from . import collective from .core import Booster, DMatrix, XGBoostError, _get_booster_layer_trees - __all__ = [ "TrainingCallback", "LearningRateScheduler", diff --git a/python-package/xgboost/collective.py b/python-package/xgboost/collective.py index f040b8e72..8021316e8 100644 --- a/python-package/xgboost/collective.py +++ b/python-package/xgboost/collective.py @@ -4,12 +4,12 @@ import json import logging import pickle from enum import IntEnum, unique -from typing import Any, List, Dict +from typing import Any, Dict, List import numpy as np from ._typing import _T -from .core import _LIB, _check_call, c_str, py_str, from_pystr_to_cstr +from .core import _LIB, _check_call, c_str, from_pystr_to_cstr, py_str LOGGER = logging.getLogger("[xgboost.collective]") diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index d0ffb7957..ad99ed17c 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -282,7 +282,7 @@ def _has_categorical(booster: "Booster", data: DataType) -> bool: """Check whether the booster and input data for prediction contain categorical data. """ - from .data import _is_pandas_df, _is_cudf_df + from .data import _is_cudf_df, _is_pandas_df if _is_pandas_df(data) or _is_cudf_df(data): ft = booster.feature_types if ft is None: @@ -355,8 +355,7 @@ def ctypes2cupy(cptr: CNumericPtr, length: int, dtype: Type[np.number]) -> CupyT """Convert a ctypes pointer array to a cupy array.""" # pylint: disable=import-error import cupy - from cupy.cuda.memory import MemoryPointer - from cupy.cuda.memory import UnownedMemory + from cupy.cuda.memory import MemoryPointer, UnownedMemory CUPY_TO_CTYPES_MAPPING: Dict[Type[np.number], Type[CNumeric]] = { cupy.float32: ctypes.c_float, @@ -512,8 +511,7 @@ class DataIter(ABC): # pylint: disable=too-many-instance-attributes feature_types: Optional[FeatureTypes] = None, **kwargs: Any, ) -> None: - from .data import dispatch_proxy_set_data - from .data import _proxy_transform + from .data import _proxy_transform, dispatch_proxy_set_data new, cat_codes, feature_names, feature_types = _proxy_transform( data, @@ -732,7 +730,7 @@ class DMatrix: # pylint: disable=too-many-instance-attributes,too-many-public-m self.handle: Optional[ctypes.c_void_p] = None return - from .data import dispatch_data_backend, _is_iter + from .data import _is_iter, dispatch_data_backend if _is_iter(data): self._init_from_iter(data, enable_categorical) @@ -1406,10 +1404,10 @@ class QuantileDMatrix(DMatrix): **meta: Any, ) -> None: from .data import ( - _is_dlpack, - _transform_dlpack, - _is_iter, SingleBatchInternalIter, + _is_dlpack, + _is_iter, + _transform_dlpack, ) if _is_dlpack(data): diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py index 775eedd57..7347835d0 100644 --- a/python-package/xgboost/data.py +++ b/python-package/xgboost/data.py @@ -278,10 +278,7 @@ def _pandas_feature_info( enable_categorical: bool, ) -> Tuple[Optional[FeatureNames], Optional[FeatureTypes]]: import pandas as pd - from pandas.api.types import ( - is_sparse, - is_categorical_dtype, - ) + from pandas.api.types import is_categorical_dtype, is_sparse # handle feature names if feature_names is None and meta is None: @@ -308,10 +305,10 @@ def _pandas_feature_info( def is_nullable_dtype(dtype: PandasDType) -> bool: """Wether dtype is a pandas nullable type.""" from pandas.api.types import ( - is_integer_dtype, is_bool_dtype, - is_float_dtype, is_categorical_dtype, + is_float_dtype, + is_integer_dtype, ) # dtype: pd.core.arrays.numeric.NumericDtype @@ -325,6 +322,7 @@ def is_nullable_dtype(dtype: PandasDType) -> bool: def _pandas_cat_null(data: DataFrame) -> DataFrame: from pandas.api.types import is_categorical_dtype + # handle category codes and nullable. cat_columns = [ col @@ -363,10 +361,7 @@ def _transform_pandas_df( meta: Optional[str] = None, meta_type: Optional[NumpyDType] = None, ) -> Tuple[np.ndarray, Optional[FeatureNames], Optional[FeatureTypes]]: - from pandas.api.types import ( - is_sparse, - is_categorical_dtype, - ) + from pandas.api.types import is_categorical_dtype, is_sparse if not all( dtype.name in _pandas_dtype_mapper @@ -533,8 +528,9 @@ def _from_dt_df( ptrs[icol] = ctypes.c_void_p(ptr) else: # datatable<=0.8.0 - from datatable.internal import \ - frame_column_data_r # pylint: disable=no-name-in-module + from datatable.internal import ( + frame_column_data_r, # pylint: disable=no-name-in-module + ) for icol in range(data.ncols): ptrs[icol] = frame_column_data_r(data, icol) diff --git a/python-package/xgboost/libpath.py b/python-package/xgboost/libpath.py index 1ae77556e..2ac36ef0f 100644 --- a/python-package/xgboost/libpath.py +++ b/python-package/xgboost/libpath.py @@ -3,8 +3,8 @@ import os import platform -from typing import List import sys +from typing import List class XGBoostLibraryNotFound(Exception): diff --git a/python-package/xgboost/plotting.py b/python-package/xgboost/plotting.py index 559578a89..6dc2a8802 100644 --- a/python-package/xgboost/plotting.py +++ b/python-package/xgboost/plotting.py @@ -2,9 +2,9 @@ # pylint: disable=too-many-branches # coding: utf-8 """Plotting Library.""" -from io import BytesIO import json -from typing import Optional, Any +from io import BytesIO +from typing import Any, Optional import numpy as np @@ -269,8 +269,8 @@ def plot_tree( """ try: - from matplotlib import pyplot as plt from matplotlib import image + from matplotlib import pyplot as plt except ImportError as e: raise ImportError('You must install matplotlib to plot tree') from e diff --git a/python-package/xgboost/rabit.py b/python-package/xgboost/rabit.py index 258ec4b6d..00a192d02 100644 --- a/python-package/xgboost/rabit.py +++ b/python-package/xgboost/rabit.py @@ -2,7 +2,7 @@ import logging import warnings from enum import IntEnum, unique -from typing import Any, TypeVar, Callable, Optional, List +from typing import Any, Callable, List, Optional, TypeVar import numpy as np diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py index 45e4b7ffc..5079ce08d 100644 --- a/python-package/xgboost/testing/__init__.py +++ b/python-package/xgboost/testing/__init__.py @@ -10,7 +10,6 @@ import os import platform import socket import sys -import urllib import zipfile from concurrent.futures import ThreadPoolExecutor from contextlib import contextmanager @@ -29,6 +28,7 @@ from typing import ( TypedDict, Union, ) +from urllib import request import numpy as np import pytest @@ -439,7 +439,7 @@ def get_mq2008( src = "https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip" target = dpath + "/MQ2008.zip" if not os.path.exists(target): - urllib.request.urlretrieve(url=src, filename=target) + request.urlretrieve(url=src, filename=target) with zipfile.ZipFile(target, "r") as f: f.extractall(path=dpath) diff --git a/python-package/xgboost/tracker.py b/python-package/xgboost/tracker.py index 169f303cc..bc179a674 100644 --- a/python-package/xgboost/tracker.py +++ b/python-package/xgboost/tracker.py @@ -3,14 +3,13 @@ This script is a variant of dmlc-core/dmlc_tracker/tracker.py, which is a specialized version for xgboost tasks. """ +import argparse +import logging import socket import struct -import logging -from threading import Thread -import argparse import sys - -from typing import Dict, List, Tuple, Union, Optional, Set +from threading import Thread +from typing import Dict, List, Optional, Set, Tuple, Union _RingMap = Dict[int, Tuple[int, int]] _TreeMap = Dict[int, List[int]] diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py index c279dac3d..82172f818 100644 --- a/python-package/xgboost/training.py +++ b/python-package/xgboost/training.py @@ -5,15 +5,26 @@ import copy import os import warnings -from typing import Optional, Dict, Any, Union, Tuple, Sequence, List, cast, Iterable +from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union, cast import numpy as np -from .callback import TrainingCallback, CallbackContainer, EvaluationMonitor, EarlyStopping -from .core import Booster, DMatrix, XGBoostError, _deprecate_positional_args -from .core import Metric, Objective -from .compat import SKLEARN_INSTALLED, XGBStratifiedKFold, DataFrame -from ._typing import Callable, FPreProcCallable, BoosterParam +from ._typing import BoosterParam, Callable, FPreProcCallable +from .callback import ( + CallbackContainer, + EarlyStopping, + EvaluationMonitor, + TrainingCallback, +) +from .compat import SKLEARN_INSTALLED, DataFrame, XGBStratifiedKFold +from .core import ( + Booster, + DMatrix, + Metric, + Objective, + XGBoostError, + _deprecate_positional_args, +) _CVFolds = Sequence["CVPack"] diff --git a/tests/ci_build/insert_vcomp140.py b/tests/ci_build/insert_vcomp140.py index e45c7d204..938817415 100644 --- a/tests/ci_build/insert_vcomp140.py +++ b/tests/ci_build/insert_vcomp140.py @@ -1,7 +1,7 @@ -import sys -import re -import zipfile import glob +import re +import sys +import zipfile if len(sys.argv) != 2: print('Usage: {} [wheel]'.format(sys.argv[0])) diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py index 79a59ccfb..23a593e08 100644 --- a/tests/ci_build/lint_python.py +++ b/tests/ci_build/lint_python.py @@ -12,16 +12,31 @@ CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) PROJECT_ROOT = os.path.normpath(os.path.join(CURDIR, os.path.pardir, os.path.pardir)) -def run_formatter(rel_path: str) -> bool: - path = os.path.join(PROJECT_ROOT, rel_path) - isort_ret = subprocess.run(["isort", "--check", "--profile=black", path]).returncode - black_ret = subprocess.run(["black", "--check", rel_path]).returncode - if isort_ret != 0 or black_ret != 0: - msg = ( - "Please run the following command on your machine to address the format" - f" errors:\n isort --profile=black {rel_path}\n black {rel_path}\n" - ) - print(msg, file=sys.stdout) +def run_black(rel_path: str) -> bool: + cmd = ["black", "-q", "--check", rel_path] + ret = subprocess.run(cmd).returncode + if ret != 0: + subprocess.run(["black", "--version"]) + msg = """ +Please run the following command on your machine to address the formatting error: + + """ + msg += " ".join(cmd) + print(msg, file=sys.stderr) + return False + return True + + +def run_isort(rel_path: str) -> bool: + cmd = ["isort", "--check", "--profile=black", rel_path] + ret = subprocess.run(cmd).returncode + if ret != 0: + msg = """ +Please run the following command on your machine to address the formatting error: + + """ + msg += " ".join(cmd) + print(msg, file=sys.stderr) return False return True @@ -114,8 +129,8 @@ if __name__ == "__main__": parser.add_argument("--pylint", type=int, choices=[0, 1], default=1) args = parser.parse_args() if args.format == 1: - if not all( - run_formatter(path) + black_results = [ + run_black(path) for path in [ # core "python-package/xgboost/__init__.py", @@ -141,7 +156,28 @@ if __name__ == "__main__": "demo/guide-python/categorical.py", "demo/guide-python/spark_estimator_examples.py", ] - ): + ] + if not all(black_results): + sys.exit(-1) + + isort_results = [ + run_isort(path) + for path in [ + # core + "python-package/", + # tests + "tests/test_distributed/", + "tests/python/", + "tests/python-gpu/", + "tests/ci_build/", + # demo + "demo/", + # misc + "dev/", + "doc/", + ] + ] + if not all(black_results): sys.exit(-1) if args.type_check == 1: diff --git a/tests/ci_build/rename_whl.py b/tests/ci_build/rename_whl.py index ec0b1d0e4..766c88a2f 100644 --- a/tests/ci_build/rename_whl.py +++ b/tests/ci_build/rename_whl.py @@ -1,5 +1,5 @@ -import sys import os +import sys from contextlib import contextmanager diff --git a/tests/ci_build/tidy.py b/tests/ci_build/tidy.py index 5364a817d..107e62662 100755 --- a/tests/ci_build/tidy.py +++ b/tests/ci_build/tidy.py @@ -1,15 +1,16 @@ #!/usr/bin/env python -import subprocess -import yaml -import json -from multiprocessing import Pool, cpu_count -import shutil -import os -import sys -import re import argparse +import json +import os +import re +import shutil +import subprocess +import sys +from multiprocessing import Pool, cpu_count from time import time +import yaml + def call(args): '''Subprocess run wrapper.''' diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py index 78b947a60..8a2501eb8 100644 --- a/tests/python-gpu/test_gpu_updaters.py +++ b/tests/python-gpu/test_gpu_updaters.py @@ -4,13 +4,10 @@ from typing import Any, Dict import numpy as np import pytest from hypothesis import assume, given, note, settings, strategies +from xgboost.testing.params import cat_parameter_strategy, hist_parameter_strategy import xgboost as xgb from xgboost import testing as tm -from xgboost.testing.params import ( - hist_parameter_strategy, - cat_parameter_strategy, -) sys.path.append("tests/python") import test_updaters as test_up diff --git a/tests/python/test_demos.py b/tests/python/test_demos.py index eead230c4..8a987492d 100644 --- a/tests/python/test_demos.py +++ b/tests/python/test_demos.py @@ -1,7 +1,7 @@ import os import subprocess -import tempfile import sys +import tempfile import pytest diff --git a/tests/python/test_tracker.py b/tests/python/test_tracker.py index 79e096ca4..8709589dd 100644 --- a/tests/python/test_tracker.py +++ b/tests/python/test_tracker.py @@ -5,9 +5,8 @@ import numpy as np import pytest import xgboost as xgb -from xgboost import RabitTracker +from xgboost import RabitTracker, collective from xgboost import testing as tm -from xgboost import collective if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows", allow_module_level=True) diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py index 7ef75109a..1ed43588e 100644 --- a/tests/python/test_updaters.py +++ b/tests/python/test_updaters.py @@ -5,14 +5,14 @@ from typing import Any, Dict import numpy as np import pytest from hypothesis import given, note, settings, strategies +from xgboost.testing.params import ( + cat_parameter_strategy, + exact_parameter_strategy, + hist_parameter_strategy, +) import xgboost as xgb from xgboost import testing as tm -from xgboost.testing.params import ( - exact_parameter_strategy, - hist_parameter_strategy, - cat_parameter_strategy, -) def train_result(param, dmat, num_rounds): diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py index 1dfa34d85..fbe5607a1 100644 --- a/tests/test_distributed/test_with_dask/test_with_dask.py +++ b/tests/test_distributed/test_with_dask/test_with_dask.py @@ -12,7 +12,7 @@ from itertools import starmap from math import ceil from operator import attrgetter, getitem from pathlib import Path -from typing import Any, Dict, Optional, Tuple, Type, Union, Generator +from typing import Any, Dict, Generator, Optional, Tuple, Type, Union import hypothesis import numpy as np