diff --git a/python-package/.pylintrc b/python-package/.pylintrc deleted file mode 100644 index 4c8f890a6..000000000 --- a/python-package/.pylintrc +++ /dev/null @@ -1,26 +0,0 @@ -[MASTER] - -ignore=tests - -extension-pkg-whitelist=numpy - -disable=unexpected-special-method-signature,too-many-nested-blocks,useless-object-inheritance,import-outside-toplevel,unsubscriptable-object,attribute-defined-outside-init - -dummy-variables-rgx=(unused|)_.* - -reports=no - -[BASIC] - -# Enforce naming convention -const-naming-style=UPPER_CASE -class-naming-style=PascalCase -function-naming-style=snake_case -method-naming-style=snake_case -attr-naming-style=snake_case -argument-naming-style=snake_case -variable-naming-style=snake_case -class-attribute-naming-style=snake_case - -# Allow single-letter variables -variable-rgx=[a-zA-Z_][a-z0-9_]{0,30}$ diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml index ec6f62ceb..2bf463db6 100644 --- a/python-package/pyproject.toml +++ b/python-package/pyproject.toml @@ -9,13 +9,13 @@ build-backend = "packager.pep517" name = "xgboost" version = "2.0.0-dev" authors = [ - {name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu"}, - {name = "Jiaming Yuan", email = "jm.yuan@outlook.com"} + { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" }, + { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" } ] description = "XGBoost Python Package" -readme = {file = "README.rst", content-type = "text/x-rst"} +readme = { file = "README.rst", content-type = "text/x-rst" } requires-python = ">=3.8" -license = {text = "Apache-2.0"} +license = { text = "Apache-2.0" } classifiers = [ "License :: OSI Approved :: Apache Software License", "Development Status :: 5 - Production/Stable", @@ -24,7 +24,8 @@ classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10" + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11" ] dependencies = [ "numpy", @@ -44,3 +45,39 @@ plotting = ["graphviz", "matplotlib"] pyspark = ["pyspark", "scikit-learn", "cloudpickle"] [tool.hatch.build.targets.wheel.hooks.custom] + +[tool.isort] +profile = "black" + +[tool.mypy] +ignore_missing_imports = true +disallow_untyped_defs = true +follow_imports = "silent" + +[tool.pylint.main] +ignore = ["tests"] +extension-pkg-whitelist = ["numpy"] +disable = [ + "attribute-defined-outside-init", + "import-outside-toplevel", + "too-many-nested-blocks", + "unexpected-special-method-signature", + "unsubscriptable-object", + "useless-object-inheritance" +] +dummy-variables-rgx = "(unused|)_.*" +reports = false + +[tool.pylint.basic] +# Enforce naming convention +const-naming-style = "UPPER_CASE" +class-naming-style = "PascalCase" +function-naming-style = "snake_case" +method-naming-style = "snake_case" +attr-naming-style = "snake_case" +argument-naming-style = "snake_case" +variable-naming-style = "snake_case" +class-attribute-naming-style = "snake_case" + +# Allow single-letter variables +variable-rgx = "[a-zA-Z_][a-z0-9_]{0,30}$" diff --git a/python-package/setup.cfg b/python-package/setup.cfg deleted file mode 100644 index 415f32010..000000000 --- a/python-package/setup.cfg +++ /dev/null @@ -1,7 +0,0 @@ -[metadata] -description_file = README.rst - -[mypy] -ignore_missing_imports = True -disallow_untyped_defs = True -follow_imports = silent \ No newline at end of file diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py index 3f553da9f..4601c4378 100644 --- a/tests/ci_build/lint_python.py +++ b/tests/ci_build/lint_python.py @@ -1,227 +1,225 @@ import argparse import os +import pathlib import subprocess import sys +from collections import Counter from multiprocessing import Pool, cpu_count -from typing import Dict, Tuple +from typing import Dict, List, Tuple -from pylint import epylint from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time -CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) -SRCPATH = os.path.normpath( - os.path.join(CURDIR, os.path.pardir, os.path.pardir, "python-package") -) + +class LintersPaths: + """The paths each linter run on.""" + + BLACK = ( + # core + "python-package/", + # tests + "tests/python/test_config.py", + "tests/python/test_data_iterator.py", + "tests/python/test_dt.py", + "tests/python/test_predict.py", + "tests/python/test_quantile_dmatrix.py", + "tests/python/test_tree_regularization.py", + "tests/python-gpu/test_gpu_data_iterator.py", + "tests/test_distributed/test_with_spark/", + "tests/test_distributed/test_gpu_with_spark/", + # demo + "demo/json-model/json_parser.py", + "demo/guide-python/cat_in_the_dat.py", + "demo/guide-python/categorical.py", + "demo/guide-python/feature_weights.py", + "demo/guide-python/sklearn_parallel.py", + "demo/guide-python/spark_estimator_examples.py", + "demo/guide-python/individual_trees.py", + "demo/guide-python/quantile_regression.py", + "demo/guide-python/multioutput_regression.py", + # CI + "tests/ci_build/lint_python.py", + "tests/ci_build/test_r_package.py", + "tests/ci_build/test_utils.py", + "tests/ci_build/change_version.py", + ) + + ISORT = ( + # core + "python-package/", + # tests + "tests/test_distributed/", + "tests/python/", + "tests/python-gpu/", + "tests/ci_build/", + # demo + "demo/", + # misc + "dev/", + "doc/", + ) + + MYPY = ( + # core + "python-package/", + # tests + "tests/python/test_dt.py", + "tests/python/test_data_iterator.py", + "tests/python-gpu/test_gpu_data_iterator.py", + "tests/test_distributed/test_with_spark/test_data.py", + "tests/test_distributed/test_gpu_with_spark/test_data.py", + "tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py", + # demo + "demo/json-model/json_parser.py", + "demo/guide-python/external_memory.py", + "demo/guide-python/cat_in_the_dat.py", + "demo/guide-python/feature_weights.py", + "demo/guide-python/individual_trees.py", + "demo/guide-python/quantile_regression.py", + "demo/guide-python/multioutput_regression.py", + # CI + "tests/ci_build/lint_python.py", + "tests/ci_build/test_r_package.py", + "tests/ci_build/test_utils.py", + "tests/ci_build/change_version.py", + ) + + +def check_cmd_print_failure_assistance(cmd: List[str]) -> bool: + if subprocess.run(cmd).returncode == 0: + return True + + subprocess.run([cmd[0], "--version"]) + msg = """ +Please run the following command on your machine to address the formatting error: + + """ + msg += " ".join(cmd) + print(msg, file=sys.stderr) + return False @record_time +@cd(PY_PACKAGE) def run_black(rel_path: str, fix: bool) -> bool: - if fix: - cmd = ["black", "-q", rel_path] - else: - cmd = ["black", "-q", "--check", rel_path] - ret = subprocess.run(cmd).returncode - if ret != 0: - subprocess.run(["black", "--version"]) - msg = """ -Please run the following command on your machine to address the formatting error: + cmd = ["black", "-q", os.path.join(ROOT, rel_path)] + if not fix: + cmd += ["--check"] - """ - msg += " ".join(cmd) - print(msg, file=sys.stderr) - return False - return True + return check_cmd_print_failure_assistance(cmd) @record_time +@cd(PY_PACKAGE) def run_isort(rel_path: str, fix: bool) -> bool: - if fix: - cmd = ["isort", f"--src={SRCPATH}", "--profile=black", rel_path] - else: - cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path] - ret = subprocess.run(cmd).returncode - if ret != 0: - subprocess.run(["isort", "--version"]) - msg = """ -Please run the following command on your machine to address the formatting error: + # Isort gets confused when trying to find the config file, so specified explicitly. + cmd = ["isort", "--settings-path", PY_PACKAGE, os.path.join(ROOT, rel_path)] + if not fix: + cmd += ["--check"] - """ - msg += " ".join(cmd) - print(msg, file=sys.stderr) - return False - return True + return check_cmd_print_failure_assistance(cmd) @record_time @cd(PY_PACKAGE) def run_mypy(rel_path: str) -> bool: - path = os.path.join(ROOT, rel_path) - ret = subprocess.run(["mypy", path]) - if ret.returncode != 0: - return False - return True + cmd = ["mypy", os.path.join(ROOT, rel_path)] + + return check_cmd_print_failure_assistance(cmd) class PyLint: """A helper for running pylint, mostly copied from dmlc-core/scripts.""" - def __init__(self) -> None: - self.pypackage_root = os.path.join(ROOT, "python-package/") - self.pylint_cats = set(["error", "warning", "convention", "refactor"]) - self.pylint_opts = [ - "--extension-pkg-whitelist=numpy", - "--rcfile=" + os.path.join(self.pypackage_root, ".pylintrc"), - ] + MESSAGE_CATEGORIES = { + "Fatal", + "Error", + "Warning", + "Convention", + "Refactor", + "Information", + } + MESSAGE_PREFIX_TO_CATEGORY = { + category[0]: category for category in MESSAGE_CATEGORIES + } - def run(self, path: str) -> Tuple[Dict, str, str]: - (pylint_stdout, pylint_stderr) = epylint.py_run( - " ".join([str(path)] + self.pylint_opts), return_std=True + @classmethod + @cd(PY_PACKAGE) + def get_summary(cls, path: str) -> Tuple[str, Dict[str, int], str, str, bool]: + """Get the summary of pylint's errors, warnings, etc.""" + ret = subprocess.run(["pylint", path], capture_output=True) + stdout = ret.stdout.decode("utf-8") + + emap: Dict[str, int] = Counter() + for line in stdout.splitlines(): + if ":" in line and ( + category := cls.MESSAGE_PREFIX_TO_CATEGORY.get( + line.split(":")[-2].strip()[0] + ) + ): + emap[category] += 1 + + return path, emap, stdout, ret.stderr.decode("utf-8"), ret.returncode == 0 + + @staticmethod + def print_summary_map(result_map: Dict[str, Dict[str, int]]) -> int: + """Print summary of certain result map.""" + if len(result_map) == 0: + return 0 + + ftype = "Python" + nfail = sum(map(bool, result_map.values())) + print( + f"====={len(result_map) - nfail}/{len(result_map)} {ftype} files passed check=====" ) - emap = {} - err = pylint_stderr.read() + for fname, emap in result_map.items(): + if emap: + print( + f"{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={emap}" + ) + return nfail - out = [] - for line in pylint_stdout: - out.append(line) - key = line.split(":")[-1].split("(")[0].strip() - if key not in self.pylint_cats: - continue - if key not in emap: - emap[key] = 1 - else: - emap[key] += 1 - - return {path: emap}, err, "\n".join(out) - - def __call__(self) -> bool: + @classmethod + def run(cls) -> bool: + """Run pylint with parallelization on a batch of paths.""" all_errors: Dict[str, Dict[str, int]] = {} - def print_summary_map(result_map: Dict[str, Dict[str, int]]) -> int: - """Print summary of certain result map.""" - if len(result_map) == 0: - return 0 - ftype = "Python" - npass = sum(1 for x in result_map.values() if len(x) == 0) - print(f"====={npass}/{len(result_map)} {ftype} files passed check=====") - for fname, emap in result_map.items(): - if len(emap) == 0: - continue - print( - f"{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={str(emap)}" - ) - return len(result_map) - npass - - all_scripts = [] - for root, dirs, files in os.walk(self.pypackage_root): - for f in files: - if f.endswith(".py"): - all_scripts.append(os.path.join(root, f)) - with Pool(cpu_count()) as pool: - error_maps = pool.map(self.run, all_scripts) - for emap, err, out in error_maps: + error_maps = pool.map( + cls.get_summary, + (os.fspath(file) for file in pathlib.Path(PY_PACKAGE).glob("**/*.py")), + ) + for path, emap, out, err, succeeded in error_maps: + all_errors[path] = emap + if succeeded: + continue + print(out) if len(err) != 0: print(err) - all_errors.update(emap) - nerr = print_summary_map(all_errors) + nerr = cls.print_summary_map(all_errors) return nerr == 0 @record_time def run_pylint() -> bool: - return PyLint()() + return PyLint.run() @record_time def main(args: argparse.Namespace) -> None: if args.format == 1: - black_results = [ - run_black(path, args.fix) - for path in [ - # core - "python-package/", - # tests - "tests/python/test_config.py", - "tests/python/test_data_iterator.py", - "tests/python/test_dt.py", - "tests/python/test_predict.py", - "tests/python/test_quantile_dmatrix.py", - "tests/python/test_tree_regularization.py", - "tests/python-gpu/test_gpu_data_iterator.py", - "tests/ci_build/lint_python.py", - "tests/test_distributed/test_with_spark/", - "tests/test_distributed/test_gpu_with_spark/", - # demo - "demo/json-model/json_parser.py", - "demo/guide-python/cat_in_the_dat.py", - "demo/guide-python/categorical.py", - "demo/guide-python/feature_weights.py", - "demo/guide-python/sklearn_parallel.py", - "demo/guide-python/spark_estimator_examples.py", - "demo/guide-python/individual_trees.py", - "demo/guide-python/quantile_regression.py", - "demo/guide-python/multioutput_regression.py", - # CI - "tests/ci_build/lint_python.py", - "tests/ci_build/test_r_package.py", - "tests/ci_build/test_utils.py", - "tests/ci_build/change_version.py", - ] - ] + black_results = [run_black(path, args.fix) for path in LintersPaths.BLACK] if not all(black_results): sys.exit(-1) - isort_results = [ - run_isort(path, args.fix) - for path in [ - # core - "python-package/", - # tests - "tests/test_distributed/", - "tests/python/", - "tests/python-gpu/", - "tests/ci_build/", - # demo - "demo/", - # misc - "dev/", - "doc/", - ] - ] + isort_results = [run_isort(path, args.fix) for path in LintersPaths.ISORT] if not all(isort_results): sys.exit(-1) if args.type_check == 1: - if not all( - run_mypy(path) - for path in [ - # core - "python-package/", - # demo - "demo/json-model/json_parser.py", - "demo/guide-python/external_memory.py", - "demo/guide-python/cat_in_the_dat.py", - "demo/guide-python/feature_weights.py", - "demo/guide-python/individual_trees.py", - "demo/guide-python/quantile_regression.py", - "demo/guide-python/multioutput_regression.py", - # tests - "tests/python/test_dt.py", - "tests/python/test_data_iterator.py", - "tests/python-gpu/test_gpu_data_iterator.py", - "tests/test_distributed/test_with_spark/test_data.py", - "tests/test_distributed/test_gpu_with_spark/test_data.py", - "tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py", - # CI - "tests/ci_build/lint_python.py", - "tests/ci_build/test_r_package.py", - "tests/ci_build/test_utils.py", - "tests/ci_build/change_version.py", - ] - ): - subprocess.check_call(["mypy", "--version"]) + mypy_results = [run_mypy(path) for path in LintersPaths.MYPY] + if not all(mypy_results): sys.exit(-1) if args.pylint == 1: