Move python tool configurations to pyproject.toml, and add the python 3.11 classifier. (#9112)

This commit is contained in:
Uriya Harpeness 2023-05-05 21:59:06 +03:00 committed by GitHub
parent 55968ed3fa
commit a075aa24ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 209 additions and 207 deletions

View File

@ -1,26 +0,0 @@
[MASTER]
ignore=tests
extension-pkg-whitelist=numpy
disable=unexpected-special-method-signature,too-many-nested-blocks,useless-object-inheritance,import-outside-toplevel,unsubscriptable-object,attribute-defined-outside-init
dummy-variables-rgx=(unused|)_.*
reports=no
[BASIC]
# Enforce naming convention
const-naming-style=UPPER_CASE
class-naming-style=PascalCase
function-naming-style=snake_case
method-naming-style=snake_case
attr-naming-style=snake_case
argument-naming-style=snake_case
variable-naming-style=snake_case
class-attribute-naming-style=snake_case
# Allow single-letter variables
variable-rgx=[a-zA-Z_][a-z0-9_]{0,30}$

View File

@ -9,13 +9,13 @@ build-backend = "packager.pep517"
name = "xgboost" name = "xgboost"
version = "2.0.0-dev" version = "2.0.0-dev"
authors = [ authors = [
{name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu"}, { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
{name = "Jiaming Yuan", email = "jm.yuan@outlook.com"} { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }
] ]
description = "XGBoost Python Package" description = "XGBoost Python Package"
readme = {file = "README.rst", content-type = "text/x-rst"} readme = { file = "README.rst", content-type = "text/x-rst" }
requires-python = ">=3.8" requires-python = ">=3.8"
license = {text = "Apache-2.0"} license = { text = "Apache-2.0" }
classifiers = [ classifiers = [
"License :: OSI Approved :: Apache Software License", "License :: OSI Approved :: Apache Software License",
"Development Status :: 5 - Production/Stable", "Development Status :: 5 - Production/Stable",
@ -24,7 +24,8 @@ classifiers = [
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10" "Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11"
] ]
dependencies = [ dependencies = [
"numpy", "numpy",
@ -44,3 +45,39 @@ plotting = ["graphviz", "matplotlib"]
pyspark = ["pyspark", "scikit-learn", "cloudpickle"] pyspark = ["pyspark", "scikit-learn", "cloudpickle"]
[tool.hatch.build.targets.wheel.hooks.custom] [tool.hatch.build.targets.wheel.hooks.custom]
[tool.isort]
profile = "black"
[tool.mypy]
ignore_missing_imports = true
disallow_untyped_defs = true
follow_imports = "silent"
[tool.pylint.main]
ignore = ["tests"]
extension-pkg-whitelist = ["numpy"]
disable = [
"attribute-defined-outside-init",
"import-outside-toplevel",
"too-many-nested-blocks",
"unexpected-special-method-signature",
"unsubscriptable-object",
"useless-object-inheritance"
]
dummy-variables-rgx = "(unused|)_.*"
reports = false
[tool.pylint.basic]
# Enforce naming convention
const-naming-style = "UPPER_CASE"
class-naming-style = "PascalCase"
function-naming-style = "snake_case"
method-naming-style = "snake_case"
attr-naming-style = "snake_case"
argument-naming-style = "snake_case"
variable-naming-style = "snake_case"
class-attribute-naming-style = "snake_case"
# Allow single-letter variables
variable-rgx = "[a-zA-Z_][a-z0-9_]{0,30}$"

View File

@ -1,7 +0,0 @@
[metadata]
description_file = README.rst
[mypy]
ignore_missing_imports = True
disallow_untyped_defs = True
follow_imports = silent

View File

@ -1,145 +1,19 @@
import argparse import argparse
import os import os
import pathlib
import subprocess import subprocess
import sys import sys
from collections import Counter
from multiprocessing import Pool, cpu_count from multiprocessing import Pool, cpu_count
from typing import Dict, Tuple from typing import Dict, List, Tuple
from pylint import epylint
from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time
CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
SRCPATH = os.path.normpath(
os.path.join(CURDIR, os.path.pardir, os.path.pardir, "python-package")
)
class LintersPaths:
"""The paths each linter run on."""
@record_time BLACK = (
def run_black(rel_path: str, fix: bool) -> bool:
if fix:
cmd = ["black", "-q", rel_path]
else:
cmd = ["black", "-q", "--check", rel_path]
ret = subprocess.run(cmd).returncode
if ret != 0:
subprocess.run(["black", "--version"])
msg = """
Please run the following command on your machine to address the formatting error:
"""
msg += " ".join(cmd)
print(msg, file=sys.stderr)
return False
return True
@record_time
def run_isort(rel_path: str, fix: bool) -> bool:
if fix:
cmd = ["isort", f"--src={SRCPATH}", "--profile=black", rel_path]
else:
cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path]
ret = subprocess.run(cmd).returncode
if ret != 0:
subprocess.run(["isort", "--version"])
msg = """
Please run the following command on your machine to address the formatting error:
"""
msg += " ".join(cmd)
print(msg, file=sys.stderr)
return False
return True
@record_time
@cd(PY_PACKAGE)
def run_mypy(rel_path: str) -> bool:
path = os.path.join(ROOT, rel_path)
ret = subprocess.run(["mypy", path])
if ret.returncode != 0:
return False
return True
class PyLint:
"""A helper for running pylint, mostly copied from dmlc-core/scripts."""
def __init__(self) -> None:
self.pypackage_root = os.path.join(ROOT, "python-package/")
self.pylint_cats = set(["error", "warning", "convention", "refactor"])
self.pylint_opts = [
"--extension-pkg-whitelist=numpy",
"--rcfile=" + os.path.join(self.pypackage_root, ".pylintrc"),
]
def run(self, path: str) -> Tuple[Dict, str, str]:
(pylint_stdout, pylint_stderr) = epylint.py_run(
" ".join([str(path)] + self.pylint_opts), return_std=True
)
emap = {}
err = pylint_stderr.read()
out = []
for line in pylint_stdout:
out.append(line)
key = line.split(":")[-1].split("(")[0].strip()
if key not in self.pylint_cats:
continue
if key not in emap:
emap[key] = 1
else:
emap[key] += 1
return {path: emap}, err, "\n".join(out)
def __call__(self) -> bool:
all_errors: Dict[str, Dict[str, int]] = {}
def print_summary_map(result_map: Dict[str, Dict[str, int]]) -> int:
"""Print summary of certain result map."""
if len(result_map) == 0:
return 0
ftype = "Python"
npass = sum(1 for x in result_map.values() if len(x) == 0)
print(f"====={npass}/{len(result_map)} {ftype} files passed check=====")
for fname, emap in result_map.items():
if len(emap) == 0:
continue
print(
f"{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={str(emap)}"
)
return len(result_map) - npass
all_scripts = []
for root, dirs, files in os.walk(self.pypackage_root):
for f in files:
if f.endswith(".py"):
all_scripts.append(os.path.join(root, f))
with Pool(cpu_count()) as pool:
error_maps = pool.map(self.run, all_scripts)
for emap, err, out in error_maps:
print(out)
if len(err) != 0:
print(err)
all_errors.update(emap)
nerr = print_summary_map(all_errors)
return nerr == 0
@record_time
def run_pylint() -> bool:
return PyLint()()
@record_time
def main(args: argparse.Namespace) -> None:
if args.format == 1:
black_results = [
run_black(path, args.fix)
for path in [
# core # core
"python-package/", "python-package/",
# tests # tests
@ -150,7 +24,6 @@ def main(args: argparse.Namespace) -> None:
"tests/python/test_quantile_dmatrix.py", "tests/python/test_quantile_dmatrix.py",
"tests/python/test_tree_regularization.py", "tests/python/test_tree_regularization.py",
"tests/python-gpu/test_gpu_data_iterator.py", "tests/python-gpu/test_gpu_data_iterator.py",
"tests/ci_build/lint_python.py",
"tests/test_distributed/test_with_spark/", "tests/test_distributed/test_with_spark/",
"tests/test_distributed/test_gpu_with_spark/", "tests/test_distributed/test_gpu_with_spark/",
# demo # demo
@ -168,14 +41,9 @@ def main(args: argparse.Namespace) -> None:
"tests/ci_build/test_r_package.py", "tests/ci_build/test_r_package.py",
"tests/ci_build/test_utils.py", "tests/ci_build/test_utils.py",
"tests/ci_build/change_version.py", "tests/ci_build/change_version.py",
] )
]
if not all(black_results):
sys.exit(-1)
isort_results = [ ISORT = (
run_isort(path, args.fix)
for path in [
# core # core
"python-package/", "python-package/",
# tests # tests
@ -188,17 +56,18 @@ def main(args: argparse.Namespace) -> None:
# misc # misc
"dev/", "dev/",
"doc/", "doc/",
] )
]
if not all(isort_results):
sys.exit(-1)
if args.type_check == 1: MYPY = (
if not all(
run_mypy(path)
for path in [
# core # core
"python-package/", "python-package/",
# tests
"tests/python/test_dt.py",
"tests/python/test_data_iterator.py",
"tests/python-gpu/test_gpu_data_iterator.py",
"tests/test_distributed/test_with_spark/test_data.py",
"tests/test_distributed/test_gpu_with_spark/test_data.py",
"tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",
# demo # demo
"demo/json-model/json_parser.py", "demo/json-model/json_parser.py",
"demo/guide-python/external_memory.py", "demo/guide-python/external_memory.py",
@ -207,21 +76,150 @@ def main(args: argparse.Namespace) -> None:
"demo/guide-python/individual_trees.py", "demo/guide-python/individual_trees.py",
"demo/guide-python/quantile_regression.py", "demo/guide-python/quantile_regression.py",
"demo/guide-python/multioutput_regression.py", "demo/guide-python/multioutput_regression.py",
# tests
"tests/python/test_dt.py",
"tests/python/test_data_iterator.py",
"tests/python-gpu/test_gpu_data_iterator.py",
"tests/test_distributed/test_with_spark/test_data.py",
"tests/test_distributed/test_gpu_with_spark/test_data.py",
"tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",
# CI # CI
"tests/ci_build/lint_python.py", "tests/ci_build/lint_python.py",
"tests/ci_build/test_r_package.py", "tests/ci_build/test_r_package.py",
"tests/ci_build/test_utils.py", "tests/ci_build/test_utils.py",
"tests/ci_build/change_version.py", "tests/ci_build/change_version.py",
] )
def check_cmd_print_failure_assistance(cmd: List[str]) -> bool:
if subprocess.run(cmd).returncode == 0:
return True
subprocess.run([cmd[0], "--version"])
msg = """
Please run the following command on your machine to address the formatting error:
"""
msg += " ".join(cmd)
print(msg, file=sys.stderr)
return False
@record_time
@cd(PY_PACKAGE)
def run_black(rel_path: str, fix: bool) -> bool:
cmd = ["black", "-q", os.path.join(ROOT, rel_path)]
if not fix:
cmd += ["--check"]
return check_cmd_print_failure_assistance(cmd)
@record_time
@cd(PY_PACKAGE)
def run_isort(rel_path: str, fix: bool) -> bool:
# Isort gets confused when trying to find the config file, so specified explicitly.
cmd = ["isort", "--settings-path", PY_PACKAGE, os.path.join(ROOT, rel_path)]
if not fix:
cmd += ["--check"]
return check_cmd_print_failure_assistance(cmd)
@record_time
@cd(PY_PACKAGE)
def run_mypy(rel_path: str) -> bool:
cmd = ["mypy", os.path.join(ROOT, rel_path)]
return check_cmd_print_failure_assistance(cmd)
class PyLint:
"""A helper for running pylint, mostly copied from dmlc-core/scripts."""
MESSAGE_CATEGORIES = {
"Fatal",
"Error",
"Warning",
"Convention",
"Refactor",
"Information",
}
MESSAGE_PREFIX_TO_CATEGORY = {
category[0]: category for category in MESSAGE_CATEGORIES
}
@classmethod
@cd(PY_PACKAGE)
def get_summary(cls, path: str) -> Tuple[str, Dict[str, int], str, str, bool]:
"""Get the summary of pylint's errors, warnings, etc."""
ret = subprocess.run(["pylint", path], capture_output=True)
stdout = ret.stdout.decode("utf-8")
emap: Dict[str, int] = Counter()
for line in stdout.splitlines():
if ":" in line and (
category := cls.MESSAGE_PREFIX_TO_CATEGORY.get(
line.split(":")[-2].strip()[0]
)
): ):
subprocess.check_call(["mypy", "--version"]) emap[category] += 1
return path, emap, stdout, ret.stderr.decode("utf-8"), ret.returncode == 0
@staticmethod
def print_summary_map(result_map: Dict[str, Dict[str, int]]) -> int:
"""Print summary of certain result map."""
if len(result_map) == 0:
return 0
ftype = "Python"
nfail = sum(map(bool, result_map.values()))
print(
f"====={len(result_map) - nfail}/{len(result_map)} {ftype} files passed check====="
)
for fname, emap in result_map.items():
if emap:
print(
f"{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={emap}"
)
return nfail
@classmethod
def run(cls) -> bool:
"""Run pylint with parallelization on a batch of paths."""
all_errors: Dict[str, Dict[str, int]] = {}
with Pool(cpu_count()) as pool:
error_maps = pool.map(
cls.get_summary,
(os.fspath(file) for file in pathlib.Path(PY_PACKAGE).glob("**/*.py")),
)
for path, emap, out, err, succeeded in error_maps:
all_errors[path] = emap
if succeeded:
continue
print(out)
if len(err) != 0:
print(err)
nerr = cls.print_summary_map(all_errors)
return nerr == 0
@record_time
def run_pylint() -> bool:
return PyLint.run()
@record_time
def main(args: argparse.Namespace) -> None:
if args.format == 1:
black_results = [run_black(path, args.fix) for path in LintersPaths.BLACK]
if not all(black_results):
sys.exit(-1)
isort_results = [run_isort(path, args.fix) for path in LintersPaths.ISORT]
if not all(isort_results):
sys.exit(-1)
if args.type_check == 1:
mypy_results = [run_mypy(path) for path in LintersPaths.MYPY]
if not all(mypy_results):
sys.exit(-1) sys.exit(-1)
if args.pylint == 1: if args.pylint == 1: