From 3970e4e6bb1688b662abd3c58cda615c5ba7892d Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 23 Jul 2022 08:12:37 +0800 Subject: [PATCH] Move pylint helper from dmlc-core. (#8101) * Move pylint helper from dmlc-core. - Move the helper into the XGBoost ci_build. - Run it with multiprocessing. * Fix original test. --- .github/workflows/main.yml | 7 +- .github/workflows/python_tests.yml | 8 ++- tests/ci_build/conda_env/python_lint.yml | 4 ++ tests/ci_build/lint_python.py | 87 ++++++++++++++++++++++-- 4 files changed, 95 insertions(+), 11 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a3b2d77c7..d2ecb1df4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -129,7 +129,7 @@ jobs: lint: runs-on: ubuntu-latest - name: Code linting for Python and C++ + name: Code linting for C++ steps: - uses: actions/checkout@v2 with: @@ -140,11 +140,10 @@ jobs: architecture: 'x64' - name: Install Python packages run: | - python -m pip install wheel setuptools - python -m pip install pylint cpplint numpy scipy scikit-learn pyspark pandas cloudpickle + python -m pip install wheel setuptools cpplint pylint - name: Run lint run: | - make lint + LINT_LANG=cpp make lint doxygen: runs-on: ubuntu-latest diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index d3b545a2e..afa70a826 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -28,11 +28,15 @@ jobs: - name: Run mypy shell: bash -l {0} run: | - python tests/ci_build/lint_python.py --format=0 --type-check=1 + python tests/ci_build/lint_python.py --format=0 --type-check=1 --pylint=0 - name: Run formatter shell: bash -l {0} run: | - python tests/ci_build/lint_python.py --format=1 --type-check=0 + python tests/ci_build/lint_python.py --format=1 --type-check=0 --pylint=0 + - name: Run pylint + shell: bash -l {0} + run: | + python tests/ci_build/lint_python.py --format=0 --type-check=0 --pylint=1 python-sdist-test: runs-on: ${{ matrix.os }} diff --git a/tests/ci_build/conda_env/python_lint.yml b/tests/ci_build/conda_env/python_lint.yml index a0238b4d7..f1f877d52 100644 --- a/tests/ci_build/conda_env/python_lint.yml +++ b/tests/ci_build/conda_env/python_lint.yml @@ -3,13 +3,17 @@ channels: - conda-forge dependencies: - python=3.8 +- pylint - wheel - setuptools - mypy - numpy - scipy - pandas +- scikit-learn - dask - distributed - black - isort +- pyspark +- cloudpickle diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py index 31516e559..f7ca6d1c0 100644 --- a/tests/ci_build/lint_python.py +++ b/tests/ci_build/lint_python.py @@ -2,14 +2,17 @@ import argparse import os import subprocess import sys +from multiprocessing import Pool, cpu_count +from typing import Dict, Tuple +from pylint import epylint from test_utils import DirectoryExcursion CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) PROJECT_ROOT = os.path.normpath(os.path.join(CURDIR, os.path.pardir, os.path.pardir)) -def run_formatter(rel_path: str): +def run_formatter(rel_path: str) -> bool: path = os.path.join(PROJECT_ROOT, rel_path) isort_ret = subprocess.run(["isort", "--check", "--profile=black", path]).returncode black_ret = subprocess.run( @@ -25,7 +28,7 @@ def run_formatter(rel_path: str): return True -def run_mypy(rel_path: str): +def run_mypy(rel_path: str) -> bool: with DirectoryExcursion(os.path.join(PROJECT_ROOT, "python-package")): path = os.path.join(PROJECT_ROOT, rel_path) ret = subprocess.run(["mypy", path]) @@ -34,16 +37,85 @@ def run_mypy(rel_path: str): return True +class PyLint: + """A helper for running pylint, mostly copied from dmlc-core/scripts. """ + def __init__(self) -> None: + self.pypackage_root = os.path.join(PROJECT_ROOT, "python-package/") + self.pylint_cats = set(["error", "warning", "convention", "refactor"]) + self.pylint_opts = [ + "--extension-pkg-whitelist=numpy", + "--rcfile=" + os.path.join(self.pypackage_root, ".pylintrc"), + ] + + def run(self, path: str) -> Tuple[Dict, str, str]: + (pylint_stdout, pylint_stderr) = epylint.py_run( + " ".join([str(path)] + self.pylint_opts), return_std=True + ) + emap = {} + err = pylint_stderr.read() + + out = [] + for line in pylint_stdout: + out.append(line) + key = line.split(":")[-1].split("(")[0].strip() + if key not in self.pylint_cats: + continue + if key not in emap: + emap[key] = 1 + else: + emap[key] += 1 + + return {path: emap}, err, "\n".join(out) + + def __call__(self) -> bool: + all_errors: Dict[str, Dict[str, int]] = {} + + def print_summary_map(result_map: Dict[str, Dict[str, int]]) -> int: + """Print summary of certain result map.""" + if len(result_map) == 0: + return 0 + ftype = "Python" + npass = sum(1 for x in result_map.values() if len(x) == 0) + print(f"====={npass}/{len(result_map)} {ftype} files passed check=====") + for fname, emap in result_map.items(): + if len(emap) == 0: + continue + print( + f"{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={str(emap)}" + ) + return len(result_map) - npass + + all_scripts = [] + for root, dirs, files in os.walk(self.pypackage_root): + for f in files: + if f.endswith(".py"): + all_scripts.append(os.path.join(root, f)) + + with Pool(cpu_count()) as pool: + error_maps = pool.map(self.run, all_scripts) + for emap, err, out in error_maps: + print(out) + if len(err) != 0: + print(err) + all_errors.update(emap) + + nerr = print_summary_map(all_errors) + return nerr == 0 + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--format", type=int, choices=[0, 1], default=1) parser.add_argument("--type-check", type=int, choices=[0, 1], default=1) + parser.add_argument("--pylint", type=int, choices=[0, 1], default=1) args = parser.parse_args() if args.format == 1: if not all( - [ - run_formatter("python-package/xgboost/dask.py"), - run_formatter("python-package/xgboost/spark"), + run_formatter(path) + for path in [ + "python-package/xgboost/dask.py", + "python-package/xgboost/spark", + "tests/ci_build/lint_python.py", ] ): sys.exit(-1) @@ -58,6 +130,11 @@ if __name__ == "__main__": "tests/python/test_data_iterator.py", "tests/python-gpu/test_gpu_with_dask.py", "tests/python-gpu/test_gpu_data_iterator.py", + "tests/ci_build/lint_python.py", ] ): sys.exit(-1) + + if args.pylint == 1: + if not PyLint()(): + sys.exit(-1)