Move pylint helper from dmlc-core. (#8101)

* Move pylint helper from dmlc-core.

- Move the helper into the XGBoost ci_build.
- Run it with multiprocessing.

* Fix original test.
This commit is contained in:
Jiaming Yuan 2022-07-23 08:12:37 +08:00 committed by GitHub
parent 7785d65c8a
commit 3970e4e6bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 95 additions and 11 deletions

View File

@ -129,7 +129,7 @@ jobs:
lint: lint:
runs-on: ubuntu-latest runs-on: ubuntu-latest
name: Code linting for Python and C++ name: Code linting for C++
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
with: with:
@ -140,11 +140,10 @@ jobs:
architecture: 'x64' architecture: 'x64'
- name: Install Python packages - name: Install Python packages
run: | run: |
python -m pip install wheel setuptools python -m pip install wheel setuptools cpplint pylint
python -m pip install pylint cpplint numpy scipy scikit-learn pyspark pandas cloudpickle
- name: Run lint - name: Run lint
run: | run: |
make lint LINT_LANG=cpp make lint
doxygen: doxygen:
runs-on: ubuntu-latest runs-on: ubuntu-latest

View File

@ -28,11 +28,15 @@ jobs:
- name: Run mypy - name: Run mypy
shell: bash -l {0} shell: bash -l {0}
run: | run: |
python tests/ci_build/lint_python.py --format=0 --type-check=1 python tests/ci_build/lint_python.py --format=0 --type-check=1 --pylint=0
- name: Run formatter - name: Run formatter
shell: bash -l {0} shell: bash -l {0}
run: | run: |
python tests/ci_build/lint_python.py --format=1 --type-check=0 python tests/ci_build/lint_python.py --format=1 --type-check=0 --pylint=0
- name: Run pylint
shell: bash -l {0}
run: |
python tests/ci_build/lint_python.py --format=0 --type-check=0 --pylint=1
python-sdist-test: python-sdist-test:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}

View File

@ -3,13 +3,17 @@ channels:
- conda-forge - conda-forge
dependencies: dependencies:
- python=3.8 - python=3.8
- pylint
- wheel - wheel
- setuptools - setuptools
- mypy - mypy
- numpy - numpy
- scipy - scipy
- pandas - pandas
- scikit-learn
- dask - dask
- distributed - distributed
- black - black
- isort - isort
- pyspark
- cloudpickle

View File

@ -2,14 +2,17 @@ import argparse
import os import os
import subprocess import subprocess
import sys import sys
from multiprocessing import Pool, cpu_count
from typing import Dict, Tuple
from pylint import epylint
from test_utils import DirectoryExcursion from test_utils import DirectoryExcursion
CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
PROJECT_ROOT = os.path.normpath(os.path.join(CURDIR, os.path.pardir, os.path.pardir)) PROJECT_ROOT = os.path.normpath(os.path.join(CURDIR, os.path.pardir, os.path.pardir))
def run_formatter(rel_path: str): def run_formatter(rel_path: str) -> bool:
path = os.path.join(PROJECT_ROOT, rel_path) path = os.path.join(PROJECT_ROOT, rel_path)
isort_ret = subprocess.run(["isort", "--check", "--profile=black", path]).returncode isort_ret = subprocess.run(["isort", "--check", "--profile=black", path]).returncode
black_ret = subprocess.run( black_ret = subprocess.run(
@ -25,7 +28,7 @@ def run_formatter(rel_path: str):
return True return True
def run_mypy(rel_path: str): def run_mypy(rel_path: str) -> bool:
with DirectoryExcursion(os.path.join(PROJECT_ROOT, "python-package")): with DirectoryExcursion(os.path.join(PROJECT_ROOT, "python-package")):
path = os.path.join(PROJECT_ROOT, rel_path) path = os.path.join(PROJECT_ROOT, rel_path)
ret = subprocess.run(["mypy", path]) ret = subprocess.run(["mypy", path])
@ -34,16 +37,85 @@ def run_mypy(rel_path: str):
return True return True
class PyLint:
"""A helper for running pylint, mostly copied from dmlc-core/scripts. """
def __init__(self) -> None:
self.pypackage_root = os.path.join(PROJECT_ROOT, "python-package/")
self.pylint_cats = set(["error", "warning", "convention", "refactor"])
self.pylint_opts = [
"--extension-pkg-whitelist=numpy",
"--rcfile=" + os.path.join(self.pypackage_root, ".pylintrc"),
]
def run(self, path: str) -> Tuple[Dict, str, str]:
(pylint_stdout, pylint_stderr) = epylint.py_run(
" ".join([str(path)] + self.pylint_opts), return_std=True
)
emap = {}
err = pylint_stderr.read()
out = []
for line in pylint_stdout:
out.append(line)
key = line.split(":")[-1].split("(")[0].strip()
if key not in self.pylint_cats:
continue
if key not in emap:
emap[key] = 1
else:
emap[key] += 1
return {path: emap}, err, "\n".join(out)
def __call__(self) -> bool:
all_errors: Dict[str, Dict[str, int]] = {}
def print_summary_map(result_map: Dict[str, Dict[str, int]]) -> int:
"""Print summary of certain result map."""
if len(result_map) == 0:
return 0
ftype = "Python"
npass = sum(1 for x in result_map.values() if len(x) == 0)
print(f"====={npass}/{len(result_map)} {ftype} files passed check=====")
for fname, emap in result_map.items():
if len(emap) == 0:
continue
print(
f"{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={str(emap)}"
)
return len(result_map) - npass
all_scripts = []
for root, dirs, files in os.walk(self.pypackage_root):
for f in files:
if f.endswith(".py"):
all_scripts.append(os.path.join(root, f))
with Pool(cpu_count()) as pool:
error_maps = pool.map(self.run, all_scripts)
for emap, err, out in error_maps:
print(out)
if len(err) != 0:
print(err)
all_errors.update(emap)
nerr = print_summary_map(all_errors)
return nerr == 0
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--format", type=int, choices=[0, 1], default=1) parser.add_argument("--format", type=int, choices=[0, 1], default=1)
parser.add_argument("--type-check", type=int, choices=[0, 1], default=1) parser.add_argument("--type-check", type=int, choices=[0, 1], default=1)
parser.add_argument("--pylint", type=int, choices=[0, 1], default=1)
args = parser.parse_args() args = parser.parse_args()
if args.format == 1: if args.format == 1:
if not all( if not all(
[ run_formatter(path)
run_formatter("python-package/xgboost/dask.py"), for path in [
run_formatter("python-package/xgboost/spark"), "python-package/xgboost/dask.py",
"python-package/xgboost/spark",
"tests/ci_build/lint_python.py",
] ]
): ):
sys.exit(-1) sys.exit(-1)
@ -58,6 +130,11 @@ if __name__ == "__main__":
"tests/python/test_data_iterator.py", "tests/python/test_data_iterator.py",
"tests/python-gpu/test_gpu_with_dask.py", "tests/python-gpu/test_gpu_with_dask.py",
"tests/python-gpu/test_gpu_data_iterator.py", "tests/python-gpu/test_gpu_data_iterator.py",
"tests/ci_build/lint_python.py",
] ]
): ):
sys.exit(-1) sys.exit(-1)
if args.pylint == 1:
if not PyLint()():
sys.exit(-1)