Require isort on all Python files. (#8420)

This commit is contained in:
Jiaming Yuan 2022-11-08 12:59:06 +08:00 committed by GitHub
parent bf8de227a9
commit 0d3da9869c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
69 changed files with 290 additions and 187 deletions

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import sys
import random import random
import sys
if len(sys.argv) < 2: if len(sys.argv) < 2:
print ('Usage:<filename> <k> [nfold = 5]') print ('Usage:<filename> <k> [nfold = 5]')

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import sys
import random import random
import sys
if len(sys.argv) < 2: if len(sys.argv) < 2:
print('Usage:<filename> <k> [nfold = 5]') print('Usage:<filename> <k> [nfold = 5]')

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import sys import sys
fo = open(sys.argv[2], 'w') fo = open(sys.argv[2], 'w')
for l in open(sys.argv[1]): for l in open(sys.argv[1]):

View File

@ -6,9 +6,11 @@ Demo for survival analysis (regression). using Accelerated Failure Time (AFT) mo
""" """
import os import os
from sklearn.model_selection import ShuffleSplit
import pandas as pd
import numpy as np import numpy as np
import pandas as pd
from sklearn.model_selection import ShuffleSplit
import xgboost as xgb import xgboost as xgb
# The Veterans' Administration Lung Cancer Trial # The Veterans' Administration Lung Cancer Trial

View File

@ -6,11 +6,12 @@ Demo for survival analysis (regression) using Accelerated Failure Time (AFT) mod
using Optuna to tune hyperparameters using Optuna to tune hyperparameters
""" """
from sklearn.model_selection import ShuffleSplit
import pandas as pd
import numpy as np import numpy as np
import xgboost as xgb
import optuna import optuna
import pandas as pd
from sklearn.model_selection import ShuffleSplit
import xgboost as xgb
# The Veterans' Administration Lung Cancer Trial # The Veterans' Administration Lung Cancer Trial
# The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980) # The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980)

View File

@ -6,9 +6,10 @@ This demo uses 1D toy data and visualizes how XGBoost fits a tree ensemble. The
model starts out as a flat line and evolves into a step function in order to account for model starts out as a flat line and evolves into a step function in order to account for
all ranged labels. all ranged labels.
""" """
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np
import xgboost as xgb
plt.rcParams.update({'font.size': 13}) plt.rcParams.update({'font.size': 13})

View File

@ -4,12 +4,14 @@ Example of training survival model with Dask on CPU
""" """
import xgboost as xgb
import os import os
from xgboost.dask import DaskDMatrix
import dask.dataframe as dd import dask.dataframe as dd
from dask.distributed import Client from dask.distributed import Client, LocalCluster
from dask.distributed import LocalCluster from xgboost.dask import DaskDMatrix
import xgboost as xgb
def main(client): def main(client):
# Load an example survival data from CSV into a Dask data frame. # Load an example survival data from CSV into a Dask data frame.

View File

@ -3,11 +3,11 @@ Example of training with Dask on CPU
==================================== ====================================
""" """
import xgboost as xgb
from xgboost.dask import DaskDMatrix
from dask.distributed import Client
from dask.distributed import LocalCluster
from dask import array as da from dask import array as da
from dask.distributed import Client, LocalCluster
from xgboost.dask import DaskDMatrix
import xgboost as xgb
def main(client): def main(client):

View File

@ -3,12 +3,12 @@ Example of using callbacks with Dask
==================================== ====================================
""" """
import numpy as np import numpy as np
import xgboost as xgb from dask.distributed import Client, LocalCluster
from xgboost.dask import DaskDMatrix
from dask.distributed import Client
from dask.distributed import LocalCluster
from dask_ml.datasets import make_regression from dask_ml.datasets import make_regression
from dask_ml.model_selection import train_test_split from dask_ml.model_selection import train_test_split
from xgboost.dask import DaskDMatrix
import xgboost as xgb
def probability_for_going_backward(epoch): def probability_for_going_backward(epoch):

View File

@ -2,14 +2,15 @@
Example of training with Dask on GPU Example of training with Dask on GPU
==================================== ====================================
""" """
from dask_cuda import LocalCUDACluster
import dask_cudf import dask_cudf
from dask.distributed import Client
from dask import array as da from dask import array as da
from dask import dataframe as dd from dask import dataframe as dd
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
from xgboost.dask import DaskDMatrix
import xgboost as xgb import xgboost as xgb
from xgboost import dask as dxgb from xgboost import dask as dxgb
from xgboost.dask import DaskDMatrix
def using_dask_matrix(client: Client, X, y): def using_dask_matrix(client: Client, X, y):

View File

@ -2,9 +2,9 @@
Use scikit-learn regressor interface with CPU histogram tree method Use scikit-learn regressor interface with CPU histogram tree method
=================================================================== ===================================================================
""" """
from dask.distributed import Client
from dask.distributed import LocalCluster
from dask import array as da from dask import array as da
from dask.distributed import Client, LocalCluster
import xgboost import xgboost

View File

@ -3,10 +3,12 @@ Use scikit-learn regressor interface with GPU histogram tree method
=================================================================== ===================================================================
""" """
from dask import array as da
from dask.distributed import Client from dask.distributed import Client
# It's recommended to use dask_cuda for GPU assignment # It's recommended to use dask_cuda for GPU assignment
from dask_cuda import LocalCUDACluster from dask_cuda import LocalCUDACluster
from dask import array as da
import xgboost import xgboost

View File

@ -1,7 +1,9 @@
import xgboost as xgb import time
from sklearn.datasets import fetch_covtype from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import time
import xgboost as xgb
# Fetch dataset using sklearn # Fetch dataset using sklearn
cov = fetch_covtype() cov = fetch_covtype()

View File

@ -9,13 +9,14 @@ interfaces in the Python package like scikit-learn interface and Dask interface.
See :doc:`/python/python_intro` and :doc:`/tutorials/index` for other references. See :doc:`/python/python_intro` and :doc:`/tutorials/index` for other references.
""" """
import numpy as np
import pickle
import xgboost as xgb
import os import os
import pickle
import numpy as np
from sklearn.datasets import load_svmlight_file from sklearn.datasets import load_svmlight_file
import xgboost as xgb
# Make sure the demo knows where to load the data. # Make sure the demo knows where to load the data.
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
XGBOOST_ROOT_DIR = os.path.dirname(os.path.dirname(CURRENT_DIR)) XGBOOST_ROOT_DIR = os.path.dirname(os.path.dirname(CURRENT_DIR))

View File

@ -3,8 +3,8 @@ Demo for boosting from prediction
================================= =================================
""" """
import os import os
import xgboost as xgb
import xgboost as xgb
CURRENT_DIR = os.path.dirname(__file__) CURRENT_DIR = os.path.dirname(__file__)
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))

View File

@ -4,14 +4,16 @@ Demo for using and defining callback functions
.. versionadded:: 1.3.0 .. versionadded:: 1.3.0
''' '''
import xgboost as xgb import argparse
import tempfile
import os import os
import tempfile
import numpy as np import numpy as np
from matplotlib import pyplot as plt
from sklearn.datasets import load_breast_cancer from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import argparse import xgboost as xgb
class Plotting(xgb.callback.TrainingCallback): class Plotting(xgb.callback.TrainingCallback):

View File

@ -3,11 +3,13 @@ Demo for training continuation
============================== ==============================
""" """
from sklearn.datasets import load_breast_cancer import os
import xgboost
import pickle import pickle
import tempfile import tempfile
import os
from sklearn.datasets import load_breast_cancer
import xgboost
def training_continuation(tmpdir: str, use_pickle: bool) -> None: def training_continuation(tmpdir: str, use_pickle: bool) -> None:

View File

@ -3,7 +3,9 @@ Demo for using cross validation
=============================== ===============================
""" """
import os import os
import numpy as np import numpy as np
import xgboost as xgb import xgboost as xgb
# load data in do training # load data in do training

View File

@ -14,14 +14,16 @@ The `SLE` objective reduces impact of outliers in training dataset, hence here w
compare its performance with standard squared error. compare its performance with standard squared error.
""" """
import numpy as np
import xgboost as xgb
from typing import Tuple, Dict, List
from time import time
import argparse import argparse
from time import time
from typing import Dict, List, Tuple
import matplotlib import matplotlib
import numpy as np
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import xgboost as xgb
# shape of generated data. # shape of generated data.
kRows = 4096 kRows = 4096
kCols = 16 kCols = 16

View File

@ -10,11 +10,13 @@ See :doc:`/tutorials/custom_metric_obj` for detailed tutorial and notes.
''' '''
import numpy as np
import xgboost as xgb
from matplotlib import pyplot as plt
import argparse import argparse
import numpy as np
from matplotlib import pyplot as plt
import xgboost as xgb
np.random.seed(1994) np.random.seed(1994)
kRows = 100 kRows = 100

View File

@ -3,6 +3,7 @@ This script demonstrate how to access the eval metrics
====================================================== ======================================================
""" """
import os import os
import xgboost as xgb import xgboost as xgb
CURRENT_DIR = os.path.dirname(__file__) CURRENT_DIR = os.path.dirname(__file__)

View File

@ -12,11 +12,13 @@ See :doc:`the tutorial </tutorials/external_memory>` for more details.
""" """
import os import os
import xgboost
from typing import Callable, List, Tuple
from sklearn.datasets import make_regression
import tempfile import tempfile
from typing import Callable, List, Tuple
import numpy as np import numpy as np
from sklearn.datasets import make_regression
import xgboost
def make_batches( def make_batches(

View File

@ -5,11 +5,13 @@ Demo for using feature weight to change column sampling
.. versionadded:: 1.3.0 .. versionadded:: 1.3.0
''' '''
import numpy as np
import xgboost
from matplotlib import pyplot as plt
import argparse import argparse
import numpy as np
from matplotlib import pyplot as plt
import xgboost
def main(args): def main(args):
rng = np.random.RandomState(1994) rng = np.random.RandomState(1994)

View File

@ -2,9 +2,10 @@
Demo for gamma regression Demo for gamma regression
========================= =========================
""" """
import xgboost as xgb
import numpy as np import numpy as np
import xgboost as xgb
# this script demonstrates how to fit gamma regression model (with log link function) # this script demonstrates how to fit gamma regression model (with log link function)
# in xgboost, before running the demo you need to generate the autoclaims dataset # in xgboost, before running the demo you need to generate the autoclaims dataset
# by running gen_autoclaims.R located in xgboost/demo/data. # by running gen_autoclaims.R located in xgboost/demo/data.

View File

@ -3,7 +3,9 @@ Demo for GLM
============ ============
""" """
import os import os
import xgboost as xgb import xgboost as xgb
## ##
# this script demonstrate how to fit generalized linear model in xgboost # this script demonstrate how to fit generalized linear model in xgboost
# basically, we are using linear model, instead of tree for our boosters # basically, we are using linear model, instead of tree for our boosters

View File

@ -10,10 +10,11 @@ See :doc:`/tutorials/multioutput` for more information.
""" """
import argparse import argparse
from typing import Dict, Tuple, List from typing import Dict, List, Tuple
import numpy as np import numpy as np
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import xgboost as xgb import xgboost as xgb

View File

@ -3,10 +3,12 @@ Demo for prediction using number of trees
========================================= =========================================
""" """
import os import os
import numpy as np import numpy as np
import xgboost as xgb
from sklearn.datasets import load_svmlight_file from sklearn.datasets import load_svmlight_file
import xgboost as xgb
CURRENT_DIR = os.path.dirname(__file__) CURRENT_DIR = os.path.dirname(__file__)
train = os.path.join(CURRENT_DIR, "../data/agaricus.txt.train") train = os.path.join(CURRENT_DIR, "../data/agaricus.txt.train")
test = os.path.join(CURRENT_DIR, "../data/agaricus.txt.test") test = os.path.join(CURRENT_DIR, "../data/agaricus.txt.test")

View File

@ -3,6 +3,7 @@ Demo for obtaining leaf index
============================= =============================
""" """
import os import os
import xgboost as xgb import xgboost as xgb
# load data in do training # load data in do training

View File

@ -17,10 +17,11 @@ using `itertools.tee` might incur significant memory usage according to:
''' '''
import xgboost
import cupy import cupy
import numpy import numpy
import xgboost
COLS = 64 COLS = 64
ROWS_PER_BATCH = 1000 # data is splited by rows ROWS_PER_BATCH = 1000 # data is splited by rows
BATCHES = 32 BATCHES = 32

View File

@ -3,10 +3,11 @@ Demo for accessing the xgboost eval metrics by using sklearn interface
====================================================================== ======================================================================
""" """
import xgboost as xgb
import numpy as np import numpy as np
from sklearn.datasets import make_hastie_10_2 from sklearn.datasets import make_hastie_10_2
import xgboost as xgb
X, y = make_hastie_10_2(n_samples=2000, random_state=42) X, y = make_hastie_10_2(n_samples=2000, random_state=42)
# Map labels from {-1, 1} to {0, 1} # Map labels from {-1, 1} to {0, 1}

View File

@ -7,12 +7,13 @@ Created on 1 Apr 2015
@author: Jamie Hall @author: Jamie Hall
''' '''
import pickle import pickle
import xgboost as xgb
import numpy as np import numpy as np
from sklearn.model_selection import KFold, train_test_split, GridSearchCV from sklearn.datasets import fetch_california_housing, load_digits, load_iris
from sklearn.metrics import confusion_matrix, mean_squared_error from sklearn.metrics import confusion_matrix, mean_squared_error
from sklearn.datasets import load_iris, load_digits, fetch_california_housing from sklearn.model_selection import GridSearchCV, KFold, train_test_split
import xgboost as xgb
rng = np.random.RandomState(31337) rng = np.random.RandomState(31337)

View File

@ -2,11 +2,13 @@
Demo for using xgboost with sklearn Demo for using xgboost with sklearn
=================================== ===================================
""" """
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import fetch_california_housing
import xgboost as xgb
import multiprocessing import multiprocessing
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
if __name__ == "__main__": if __name__ == "__main__":
print("Parallel Parameter optimization") print("Parallel Parameter optimization")
X, y = fetch_california_housing(return_X_y=True) X, y = fetch_california_housing(return_X_y=True)

View File

@ -7,9 +7,10 @@ experiment.
""" """
import xgboost as xgb
from sklearn.datasets import fetch_california_housing
import numpy as np import numpy as np
from sklearn.datasets import fetch_california_housing
import xgboost as xgb
def main(): def main():

View File

@ -1,5 +1,6 @@
#!/usr/bin/python #!/usr/bin/python
import numpy as np import numpy as np
import xgboost as xgb import xgboost as xgb
### load data in do training ### load data in do training

View File

@ -1,6 +1,7 @@
#!/usr/bin/python #!/usr/bin/python
# make prediction # make prediction
import numpy as np import numpy as np
import xgboost as xgb import xgboost as xgb
# path to where the data lies # path to where the data lies

View File

@ -1,9 +1,12 @@
#!/usr/bin/python #!/usr/bin/python
# this is the example script to use xgboost to train # this is the example script to use xgboost to train
import numpy as np
import xgboost as xgb
from sklearn.ensemble import GradientBoostingClassifier
import time import time
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
test_size = 550000 test_size = 550000
# path to where the data lies # path to where the data lies

View File

@ -3,6 +3,7 @@
from __future__ import division from __future__ import division
import numpy as np import numpy as np
import xgboost as xgb import xgboost as xgb
# label need to be 0 to num_class -1 # label need to be 0 to num_class -1

View File

@ -10,7 +10,6 @@ from nvflare.apis.fl_context import FLContext
from nvflare.apis.impl.controller import Controller, Task from nvflare.apis.impl.controller import Controller, Task
from nvflare.apis.shareable import Shareable from nvflare.apis.shareable import Shareable
from nvflare.apis.signal import Signal from nvflare.apis.signal import Signal
from trainer import SupportedTasks from trainer import SupportedTasks

View File

@ -1,7 +1,7 @@
import os import os
from nvflare.apis.executor import Executor from nvflare.apis.executor import Executor
from nvflare.apis.fl_constant import ReturnCode, FLContextKey from nvflare.apis.fl_constant import FLContextKey, ReturnCode
from nvflare.apis.fl_context import FLContext from nvflare.apis.fl_context import FLContext
from nvflare.apis.shareable import Shareable, make_reply from nvflare.apis.shareable import Shareable, make_reply
from nvflare.apis.signal import Signal from nvflare.apis.signal import Signal

View File

@ -1,8 +1,8 @@
#!/usr/bin/python #!/usr/bin/python
import xgboost as xgb
from xgboost import DMatrix
from sklearn.datasets import load_svmlight_file from sklearn.datasets import load_svmlight_file
import xgboost as xgb
from xgboost import DMatrix
# This script demonstrate how to do ranking with xgboost.train # This script demonstrate how to do ranking with xgboost.train
x_train, y_train = load_svmlight_file("mq2008.train") x_train, y_train = load_svmlight_file("mq2008.train")

View File

@ -1,7 +1,8 @@
#!/usr/bin/python #!/usr/bin/python
import xgboost as xgb
from sklearn.datasets import load_svmlight_file from sklearn.datasets import load_svmlight_file
import xgboost as xgb
# This script demonstrate how to do ranking with XGBRanker # This script demonstrate how to do ranking with XGBRanker
x_train, y_train = load_svmlight_file("mq2008.train") x_train, y_train = load_svmlight_file("mq2008.train")
x_valid, y_valid = load_svmlight_file("mq2008.vali") x_valid, y_valid = load_svmlight_file("mq2008.vali")

View File

@ -1,5 +1,6 @@
import sys import sys
def save_data(group_data,output_feature,output_group): def save_data(group_data,output_feature,output_group):
if len(group_data) == 0: if len(group_data) == 0:
return return

View File

@ -1,8 +1,9 @@
import xgboost as xgb
from sklearn.datasets import make_classification
import dask import dask
from dask.distributed import Client from dask.distributed import Client
from dask_cuda import LocalCUDACluster from dask_cuda import LocalCUDACluster
from sklearn.datasets import make_classification
import xgboost as xgb
def main(client): def main(client):

View File

@ -1,7 +1,8 @@
import xgboost as xgb
import rmm import rmm
from sklearn.datasets import make_classification from sklearn.datasets import make_classification
import xgboost as xgb
# Initialize RMM pool allocator # Initialize RMM pool allocator
rmm.reinitialize(pool_allocator=True) rmm.reinitialize(pool_allocator=True)
# Optionally force XGBoost to use RMM for all GPU memory allocation, see ./README.md # Optionally force XGBoost to use RMM for all GPU memory allocation, see ./README.md

View File

@ -1,16 +1,17 @@
import re
import os
import sys
import platform
import errno
import argparse import argparse
import subprocess import errno
import glob import glob
import os
import platform
import re
import shutil import shutil
import subprocess
import sys
import tempfile import tempfile
import zipfile import zipfile
from urllib.request import urlretrieve
from contextlib import contextmanager from contextlib import contextmanager
from urllib.request import urlretrieve
def normpath(path): def normpath(path):
"""Normalize UNIX path to a native path.""" """Normalize UNIX path to a native path."""

View File

@ -1,10 +1,11 @@
"""Query list of all contributors and reviewers in a release""" """Query list of all contributors and reviewers in a release"""
from sh.contrib import git
import sys
import re
import requests
import json import json
import re
import sys
import requests
from sh.contrib import git
if len(sys.argv) != 5: if len(sys.argv) != 5:
print(f'Usage: {sys.argv[0]} [starting commit/tag] [ending commit/tag] [GitHub username] ' + print(f'Usage: {sys.argv[0]} [starting commit/tag] [ending commit/tag] [GitHub username] ' +

View File

@ -2,14 +2,15 @@
tqdm, sh are required to run this script. tqdm, sh are required to run this script.
""" """
from urllib.request import urlretrieve
import argparse import argparse
from typing import List, Optional
from sh.contrib import git
from packaging import version
import subprocess
import tqdm
import os import os
import subprocess
from typing import List, Optional
from urllib.request import urlretrieve
import tqdm
from packaging import version
from sh.contrib import git
# The package building is managed by Jenkins CI. # The package building is managed by Jenkins CI.
PREFIX = "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/release_" PREFIX = "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/release_"

View File

@ -11,14 +11,15 @@
# #
# All configuration values have a default; values that are commented out # All configuration values have a default; values that are commented out
# serve to show the default. # serve to show the default.
from subprocess import call
from sh.contrib import git
import urllib.request
from urllib.error import HTTPError
import sys
import re
import os import os
import re
import subprocess import subprocess
import sys
import urllib.request
from subprocess import call
from urllib.error import HTTPError
from sh.contrib import git
git_branch = os.getenv('SPHINX_GIT_BRANCH', default=None) git_branch = os.getenv('SPHINX_GIT_BRANCH', default=None)
if not git_branch: if not git_branch:

View File

@ -1,8 +1,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""Helper utility function for customization.""" """Helper utility function for customization."""
import sys
import os import os
import subprocess import subprocess
import sys
READTHEDOCS_BUILD = (os.environ.get('READTHEDOCS', None) is not None) READTHEDOCS_BUILD = (os.environ.get('READTHEDOCS', None) is not None)

View File

@ -1,13 +1,14 @@
"""Setup xgboost package.""" """Setup xgboost package."""
import logging
import os import os
import shutil import shutil
import subprocess import subprocess
import logging
from typing import Optional, List
import sys import sys
from platform import system from platform import system
from setuptools import setup, find_packages, Extension from typing import List, Optional
from setuptools.command import build_ext, sdist, install_lib, install
from setuptools import Extension, find_packages, setup
from setuptools.command import build_ext, install, install_lib, sdist
# You can't use `pip install .` as pip copies setup.py to a temporary # You can't use `pip install .` as pip copies setup.py to a temporary
# directory, parent directory is no longer reachable (isolated build) . # directory, parent directory is no longer reachable (isolated build) .

View File

@ -6,17 +6,28 @@
""" """
from abc import ABC
import collections import collections
import os import os
import pickle import pickle
from typing import Callable, List, Optional, Union, Dict, Tuple, TypeVar, cast, Sequence, Any from abc import ABC
from typing import (
Any,
Callable,
Dict,
List,
Optional,
Sequence,
Tuple,
TypeVar,
Union,
cast,
)
import numpy import numpy
from . import collective from . import collective
from .core import Booster, DMatrix, XGBoostError, _get_booster_layer_trees from .core import Booster, DMatrix, XGBoostError, _get_booster_layer_trees
__all__ = [ __all__ = [
"TrainingCallback", "TrainingCallback",
"LearningRateScheduler", "LearningRateScheduler",

View File

@ -4,12 +4,12 @@ import json
import logging import logging
import pickle import pickle
from enum import IntEnum, unique from enum import IntEnum, unique
from typing import Any, List, Dict from typing import Any, Dict, List
import numpy as np import numpy as np
from ._typing import _T from ._typing import _T
from .core import _LIB, _check_call, c_str, py_str, from_pystr_to_cstr from .core import _LIB, _check_call, c_str, from_pystr_to_cstr, py_str
LOGGER = logging.getLogger("[xgboost.collective]") LOGGER = logging.getLogger("[xgboost.collective]")

View File

@ -282,7 +282,7 @@ def _has_categorical(booster: "Booster", data: DataType) -> bool:
"""Check whether the booster and input data for prediction contain categorical data. """Check whether the booster and input data for prediction contain categorical data.
""" """
from .data import _is_pandas_df, _is_cudf_df from .data import _is_cudf_df, _is_pandas_df
if _is_pandas_df(data) or _is_cudf_df(data): if _is_pandas_df(data) or _is_cudf_df(data):
ft = booster.feature_types ft = booster.feature_types
if ft is None: if ft is None:
@ -355,8 +355,7 @@ def ctypes2cupy(cptr: CNumericPtr, length: int, dtype: Type[np.number]) -> CupyT
"""Convert a ctypes pointer array to a cupy array.""" """Convert a ctypes pointer array to a cupy array."""
# pylint: disable=import-error # pylint: disable=import-error
import cupy import cupy
from cupy.cuda.memory import MemoryPointer from cupy.cuda.memory import MemoryPointer, UnownedMemory
from cupy.cuda.memory import UnownedMemory
CUPY_TO_CTYPES_MAPPING: Dict[Type[np.number], Type[CNumeric]] = { CUPY_TO_CTYPES_MAPPING: Dict[Type[np.number], Type[CNumeric]] = {
cupy.float32: ctypes.c_float, cupy.float32: ctypes.c_float,
@ -512,8 +511,7 @@ class DataIter(ABC): # pylint: disable=too-many-instance-attributes
feature_types: Optional[FeatureTypes] = None, feature_types: Optional[FeatureTypes] = None,
**kwargs: Any, **kwargs: Any,
) -> None: ) -> None:
from .data import dispatch_proxy_set_data from .data import _proxy_transform, dispatch_proxy_set_data
from .data import _proxy_transform
new, cat_codes, feature_names, feature_types = _proxy_transform( new, cat_codes, feature_names, feature_types = _proxy_transform(
data, data,
@ -732,7 +730,7 @@ class DMatrix: # pylint: disable=too-many-instance-attributes,too-many-public-m
self.handle: Optional[ctypes.c_void_p] = None self.handle: Optional[ctypes.c_void_p] = None
return return
from .data import dispatch_data_backend, _is_iter from .data import _is_iter, dispatch_data_backend
if _is_iter(data): if _is_iter(data):
self._init_from_iter(data, enable_categorical) self._init_from_iter(data, enable_categorical)
@ -1406,10 +1404,10 @@ class QuantileDMatrix(DMatrix):
**meta: Any, **meta: Any,
) -> None: ) -> None:
from .data import ( from .data import (
_is_dlpack,
_transform_dlpack,
_is_iter,
SingleBatchInternalIter, SingleBatchInternalIter,
_is_dlpack,
_is_iter,
_transform_dlpack,
) )
if _is_dlpack(data): if _is_dlpack(data):

View File

@ -278,10 +278,7 @@ def _pandas_feature_info(
enable_categorical: bool, enable_categorical: bool,
) -> Tuple[Optional[FeatureNames], Optional[FeatureTypes]]: ) -> Tuple[Optional[FeatureNames], Optional[FeatureTypes]]:
import pandas as pd import pandas as pd
from pandas.api.types import ( from pandas.api.types import is_categorical_dtype, is_sparse
is_sparse,
is_categorical_dtype,
)
# handle feature names # handle feature names
if feature_names is None and meta is None: if feature_names is None and meta is None:
@ -308,10 +305,10 @@ def _pandas_feature_info(
def is_nullable_dtype(dtype: PandasDType) -> bool: def is_nullable_dtype(dtype: PandasDType) -> bool:
"""Wether dtype is a pandas nullable type.""" """Wether dtype is a pandas nullable type."""
from pandas.api.types import ( from pandas.api.types import (
is_integer_dtype,
is_bool_dtype, is_bool_dtype,
is_float_dtype,
is_categorical_dtype, is_categorical_dtype,
is_float_dtype,
is_integer_dtype,
) )
# dtype: pd.core.arrays.numeric.NumericDtype # dtype: pd.core.arrays.numeric.NumericDtype
@ -325,6 +322,7 @@ def is_nullable_dtype(dtype: PandasDType) -> bool:
def _pandas_cat_null(data: DataFrame) -> DataFrame: def _pandas_cat_null(data: DataFrame) -> DataFrame:
from pandas.api.types import is_categorical_dtype from pandas.api.types import is_categorical_dtype
# handle category codes and nullable. # handle category codes and nullable.
cat_columns = [ cat_columns = [
col col
@ -363,10 +361,7 @@ def _transform_pandas_df(
meta: Optional[str] = None, meta: Optional[str] = None,
meta_type: Optional[NumpyDType] = None, meta_type: Optional[NumpyDType] = None,
) -> Tuple[np.ndarray, Optional[FeatureNames], Optional[FeatureTypes]]: ) -> Tuple[np.ndarray, Optional[FeatureNames], Optional[FeatureTypes]]:
from pandas.api.types import ( from pandas.api.types import is_categorical_dtype, is_sparse
is_sparse,
is_categorical_dtype,
)
if not all( if not all(
dtype.name in _pandas_dtype_mapper dtype.name in _pandas_dtype_mapper
@ -533,8 +528,9 @@ def _from_dt_df(
ptrs[icol] = ctypes.c_void_p(ptr) ptrs[icol] = ctypes.c_void_p(ptr)
else: else:
# datatable<=0.8.0 # datatable<=0.8.0
from datatable.internal import \ from datatable.internal import (
frame_column_data_r # pylint: disable=no-name-in-module frame_column_data_r, # pylint: disable=no-name-in-module
)
for icol in range(data.ncols): for icol in range(data.ncols):
ptrs[icol] = frame_column_data_r(data, icol) ptrs[icol] = frame_column_data_r(data, icol)

View File

@ -3,8 +3,8 @@
import os import os
import platform import platform
from typing import List
import sys import sys
from typing import List
class XGBoostLibraryNotFound(Exception): class XGBoostLibraryNotFound(Exception):

View File

@ -2,9 +2,9 @@
# pylint: disable=too-many-branches # pylint: disable=too-many-branches
# coding: utf-8 # coding: utf-8
"""Plotting Library.""" """Plotting Library."""
from io import BytesIO
import json import json
from typing import Optional, Any from io import BytesIO
from typing import Any, Optional
import numpy as np import numpy as np
@ -269,8 +269,8 @@ def plot_tree(
""" """
try: try:
from matplotlib import pyplot as plt
from matplotlib import image from matplotlib import image
from matplotlib import pyplot as plt
except ImportError as e: except ImportError as e:
raise ImportError('You must install matplotlib to plot tree') from e raise ImportError('You must install matplotlib to plot tree') from e

View File

@ -2,7 +2,7 @@
import logging import logging
import warnings import warnings
from enum import IntEnum, unique from enum import IntEnum, unique
from typing import Any, TypeVar, Callable, Optional, List from typing import Any, Callable, List, Optional, TypeVar
import numpy as np import numpy as np

View File

@ -10,7 +10,6 @@ import os
import platform import platform
import socket import socket
import sys import sys
import urllib
import zipfile import zipfile
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager from contextlib import contextmanager
@ -29,6 +28,7 @@ from typing import (
TypedDict, TypedDict,
Union, Union,
) )
from urllib import request
import numpy as np import numpy as np
import pytest import pytest
@ -439,7 +439,7 @@ def get_mq2008(
src = "https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip" src = "https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip"
target = dpath + "/MQ2008.zip" target = dpath + "/MQ2008.zip"
if not os.path.exists(target): if not os.path.exists(target):
urllib.request.urlretrieve(url=src, filename=target) request.urlretrieve(url=src, filename=target)
with zipfile.ZipFile(target, "r") as f: with zipfile.ZipFile(target, "r") as f:
f.extractall(path=dpath) f.extractall(path=dpath)

View File

@ -3,14 +3,13 @@
This script is a variant of dmlc-core/dmlc_tracker/tracker.py, This script is a variant of dmlc-core/dmlc_tracker/tracker.py,
which is a specialized version for xgboost tasks. which is a specialized version for xgboost tasks.
""" """
import argparse
import logging
import socket import socket
import struct import struct
import logging
from threading import Thread
import argparse
import sys import sys
from threading import Thread
from typing import Dict, List, Tuple, Union, Optional, Set from typing import Dict, List, Optional, Set, Tuple, Union
_RingMap = Dict[int, Tuple[int, int]] _RingMap = Dict[int, Tuple[int, int]]
_TreeMap = Dict[int, List[int]] _TreeMap = Dict[int, List[int]]

View File

@ -5,15 +5,26 @@
import copy import copy
import os import os
import warnings import warnings
from typing import Optional, Dict, Any, Union, Tuple, Sequence, List, cast, Iterable from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union, cast
import numpy as np import numpy as np
from .callback import TrainingCallback, CallbackContainer, EvaluationMonitor, EarlyStopping from ._typing import BoosterParam, Callable, FPreProcCallable
from .core import Booster, DMatrix, XGBoostError, _deprecate_positional_args from .callback import (
from .core import Metric, Objective CallbackContainer,
from .compat import SKLEARN_INSTALLED, XGBStratifiedKFold, DataFrame EarlyStopping,
from ._typing import Callable, FPreProcCallable, BoosterParam EvaluationMonitor,
TrainingCallback,
)
from .compat import SKLEARN_INSTALLED, DataFrame, XGBStratifiedKFold
from .core import (
Booster,
DMatrix,
Metric,
Objective,
XGBoostError,
_deprecate_positional_args,
)
_CVFolds = Sequence["CVPack"] _CVFolds = Sequence["CVPack"]

View File

@ -1,7 +1,7 @@
import sys
import re
import zipfile
import glob import glob
import re
import sys
import zipfile
if len(sys.argv) != 2: if len(sys.argv) != 2:
print('Usage: {} [wheel]'.format(sys.argv[0])) print('Usage: {} [wheel]'.format(sys.argv[0]))

View File

@ -12,16 +12,31 @@ CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
PROJECT_ROOT = os.path.normpath(os.path.join(CURDIR, os.path.pardir, os.path.pardir)) PROJECT_ROOT = os.path.normpath(os.path.join(CURDIR, os.path.pardir, os.path.pardir))
def run_formatter(rel_path: str) -> bool: def run_black(rel_path: str) -> bool:
path = os.path.join(PROJECT_ROOT, rel_path) cmd = ["black", "-q", "--check", rel_path]
isort_ret = subprocess.run(["isort", "--check", "--profile=black", path]).returncode ret = subprocess.run(cmd).returncode
black_ret = subprocess.run(["black", "--check", rel_path]).returncode if ret != 0:
if isort_ret != 0 or black_ret != 0: subprocess.run(["black", "--version"])
msg = ( msg = """
"Please run the following command on your machine to address the format" Please run the following command on your machine to address the formatting error:
f" errors:\n isort --profile=black {rel_path}\n black {rel_path}\n"
) """
print(msg, file=sys.stdout) msg += " ".join(cmd)
print(msg, file=sys.stderr)
return False
return True
def run_isort(rel_path: str) -> bool:
cmd = ["isort", "--check", "--profile=black", rel_path]
ret = subprocess.run(cmd).returncode
if ret != 0:
msg = """
Please run the following command on your machine to address the formatting error:
"""
msg += " ".join(cmd)
print(msg, file=sys.stderr)
return False return False
return True return True
@ -114,8 +129,8 @@ if __name__ == "__main__":
parser.add_argument("--pylint", type=int, choices=[0, 1], default=1) parser.add_argument("--pylint", type=int, choices=[0, 1], default=1)
args = parser.parse_args() args = parser.parse_args()
if args.format == 1: if args.format == 1:
if not all( black_results = [
run_formatter(path) run_black(path)
for path in [ for path in [
# core # core
"python-package/xgboost/__init__.py", "python-package/xgboost/__init__.py",
@ -141,7 +156,28 @@ if __name__ == "__main__":
"demo/guide-python/categorical.py", "demo/guide-python/categorical.py",
"demo/guide-python/spark_estimator_examples.py", "demo/guide-python/spark_estimator_examples.py",
] ]
): ]
if not all(black_results):
sys.exit(-1)
isort_results = [
run_isort(path)
for path in [
# core
"python-package/",
# tests
"tests/test_distributed/",
"tests/python/",
"tests/python-gpu/",
"tests/ci_build/",
# demo
"demo/",
# misc
"dev/",
"doc/",
]
]
if not all(black_results):
sys.exit(-1) sys.exit(-1)
if args.type_check == 1: if args.type_check == 1:

View File

@ -1,5 +1,5 @@
import sys
import os import os
import sys
from contextlib import contextmanager from contextlib import contextmanager

View File

@ -1,15 +1,16 @@
#!/usr/bin/env python #!/usr/bin/env python
import subprocess
import yaml
import json
from multiprocessing import Pool, cpu_count
import shutil
import os
import sys
import re
import argparse import argparse
import json
import os
import re
import shutil
import subprocess
import sys
from multiprocessing import Pool, cpu_count
from time import time from time import time
import yaml
def call(args): def call(args):
'''Subprocess run wrapper.''' '''Subprocess run wrapper.'''

View File

@ -4,13 +4,10 @@ from typing import Any, Dict
import numpy as np import numpy as np
import pytest import pytest
from hypothesis import assume, given, note, settings, strategies from hypothesis import assume, given, note, settings, strategies
from xgboost.testing.params import cat_parameter_strategy, hist_parameter_strategy
import xgboost as xgb import xgboost as xgb
from xgboost import testing as tm from xgboost import testing as tm
from xgboost.testing.params import (
hist_parameter_strategy,
cat_parameter_strategy,
)
sys.path.append("tests/python") sys.path.append("tests/python")
import test_updaters as test_up import test_updaters as test_up

View File

@ -1,7 +1,7 @@
import os import os
import subprocess import subprocess
import tempfile
import sys import sys
import tempfile
import pytest import pytest

View File

@ -5,9 +5,8 @@ import numpy as np
import pytest import pytest
import xgboost as xgb import xgboost as xgb
from xgboost import RabitTracker from xgboost import RabitTracker, collective
from xgboost import testing as tm from xgboost import testing as tm
from xgboost import collective
if sys.platform.startswith("win"): if sys.platform.startswith("win"):
pytest.skip("Skipping dask tests on Windows", allow_module_level=True) pytest.skip("Skipping dask tests on Windows", allow_module_level=True)

View File

@ -5,14 +5,14 @@ from typing import Any, Dict
import numpy as np import numpy as np
import pytest import pytest
from hypothesis import given, note, settings, strategies from hypothesis import given, note, settings, strategies
from xgboost.testing.params import (
cat_parameter_strategy,
exact_parameter_strategy,
hist_parameter_strategy,
)
import xgboost as xgb import xgboost as xgb
from xgboost import testing as tm from xgboost import testing as tm
from xgboost.testing.params import (
exact_parameter_strategy,
hist_parameter_strategy,
cat_parameter_strategy,
)
def train_result(param, dmat, num_rounds): def train_result(param, dmat, num_rounds):

View File

@ -12,7 +12,7 @@ from itertools import starmap
from math import ceil from math import ceil
from operator import attrgetter, getitem from operator import attrgetter, getitem
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Optional, Tuple, Type, Union, Generator from typing import Any, Dict, Generator, Optional, Tuple, Type, Union
import hypothesis import hypothesis
import numpy as np import numpy as np