[CI] Revise R tests. (#8430)

- Use the standard package check (check on the tarball instead of the source tree).
- Run commands in parallel.
- Cleanup dependencies installation.
- Replace makefile.
- Documentation.
- Test using the image from rhub.
This commit is contained in:
Jiaming Yuan
2022-11-09 09:12:13 +08:00
committed by GitHub
parent 4449e30184
commit a83748eb45
12 changed files with 499 additions and 363 deletions

View File

@@ -6,12 +6,13 @@ from multiprocessing import Pool, cpu_count
from typing import Dict, Tuple
from pylint import epylint
from test_utils import DirectoryExcursion
from test_utils import DirectoryExcursion, print_time, record_time
CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
PROJECT_ROOT = os.path.normpath(os.path.join(CURDIR, os.path.pardir, os.path.pardir))
@record_time
def run_black(rel_path: str) -> bool:
cmd = ["black", "-q", "--check", rel_path]
ret = subprocess.run(cmd).returncode
@@ -27,10 +28,12 @@ Please run the following command on your machine to address the formatting error
return True
@record_time
def run_isort(rel_path: str) -> bool:
cmd = ["isort", "--check", "--profile=black", rel_path]
ret = subprocess.run(cmd).returncode
if ret != 0:
subprocess.run(["isort", "--version"])
msg = """
Please run the following command on your machine to address the formatting error:
@@ -41,6 +44,7 @@ Please run the following command on your machine to address the formatting error
return True
@record_time
def run_mypy(rel_path: str) -> bool:
with DirectoryExcursion(os.path.join(PROJECT_ROOT, "python-package")):
path = os.path.join(PROJECT_ROOT, rel_path)
@@ -117,17 +121,13 @@ class PyLint:
return nerr == 0
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=(
"Run static checkers for XGBoost, see `python_lint.yml' "
"conda env file for a list of dependencies."
)
)
parser.add_argument("--format", type=int, choices=[0, 1], default=1)
parser.add_argument("--type-check", type=int, choices=[0, 1], default=1)
parser.add_argument("--pylint", type=int, choices=[0, 1], default=1)
args = parser.parse_args()
@record_time
def run_pylint() -> bool:
return PyLint()()
@record_time
def main(args: argparse.Namespace) -> None:
if args.format == 1:
black_results = [
run_black(path)
@@ -148,6 +148,8 @@ if __name__ == "__main__":
"tests/python/test_quantile_dmatrix.py",
"tests/python-gpu/test_gpu_data_iterator.py",
"tests/ci_build/lint_python.py",
"tests/ci_build/test_r_package.py",
"tests/ci_build/test_utils.py",
"tests/test_distributed/test_with_spark/",
"tests/test_distributed/test_gpu_with_spark/",
# demo
@@ -177,7 +179,7 @@ if __name__ == "__main__":
"doc/",
]
]
if not all(black_results):
if not all(isort_results):
sys.exit(-1)
if args.type_check == 1:
@@ -194,6 +196,8 @@ if __name__ == "__main__":
"tests/python/test_data_iterator.py",
"tests/python-gpu/test_gpu_data_iterator.py",
"tests/ci_build/lint_python.py",
"tests/ci_build/test_r_package.py",
"tests/ci_build/test_utils.py",
"tests/test_distributed/test_with_spark/test_data.py",
"tests/test_distributed/test_gpu_with_spark/test_data.py",
"tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",
@@ -202,5 +206,22 @@ if __name__ == "__main__":
sys.exit(-1)
if args.pylint == 1:
if not PyLint()():
if not run_pylint():
sys.exit(-1)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=(
"Run static checkers for XGBoost, see `python_lint.yml' "
"conda env file for a list of dependencies."
)
)
parser.add_argument("--format", type=int, choices=[0, 1], default=1)
parser.add_argument("--type-check", type=int, choices=[0, 1], default=1)
parser.add_argument("--pylint", type=int, choices=[0, 1], default=1)
args = parser.parse_args()
try:
main(args)
finally:
print_time()

View File

@@ -1,23 +0,0 @@
#!/bin/bash
# To be called when R package tests have failed
set -e
set -x
flag="$1"
if [ -f "xgboost.Rcheck/00install.out" ]; then
echo "===== xgboost.Rcheck/00install.out ===="
cat xgboost.Rcheck/00install.out
fi
if [ -f "xgboost.Rcheck/00check.log" ]; then
printf "\n\n===== xgboost.Rcheck/00check.log ====\n"
cat xgboost.Rcheck/00check.log
fi
if [[ "$flag" == "fail" ]]
then
exit 1
fi

View File

@@ -1,95 +1,315 @@
"""Utilities for packaging R code and running tests."""
import argparse
import os
import shutil
import subprocess
from time import time
from pathlib import Path
from platform import system
from test_utils import DirectoryExcursion
from test_utils import DirectoryExcursion, cd, print_time, record_time
ROOT = os.path.normpath(
os.path.join(os.path.dirname(os.path.abspath(__file__)), os.path.pardir,
os.path.pardir))
r_package = os.path.join(ROOT, 'R-package')
os.path.join(
os.path.dirname(os.path.abspath(__file__)), os.path.pardir, os.path.pardir
)
)
r_package = os.path.join(ROOT, "R-package")
def get_mingw_bin():
return os.path.join('c:/rtools40/mingw64/', 'bin')
def get_mingw_bin() -> str:
return os.path.join("c:/rtools40/mingw64/", "bin")
def test_with_autotools(args):
with DirectoryExcursion(r_package):
@cd(ROOT)
@record_time
def pack_rpackage() -> Path:
"""Compose the directory used for creating R package tar ball."""
dest = Path("xgboost")
def pkgroot(path: str) -> None:
"""Change makefiles according to the package layout."""
with open(Path("R-package") / "src" / path, "r") as fd:
makefile = fd.read()
makefile = makefile.replace("PKGROOT=../../", "PKGROOT=.", 1)
with open(dest / "src" / path, "w") as fd:
fd.write(makefile)
output = subprocess.run(["git", "clean", "-xdf", "--dry-run"], capture_output=True)
if output.returncode != 0:
raise ValueError("Failed to check git repository status.", output)
would_remove = output.stdout.decode("utf-8").strip().split("\n")
if would_remove and not all(f.find("tests/ci_build") != -1 for f in would_remove):
raise ValueError(
"\n".join(would_remove) + "\nPlease cleanup the working git repository."
)
shutil.copytree("R-package", dest)
os.remove(dest / "demo" / "runall.R")
# core
shutil.copytree("src", dest / "src" / "src")
shutil.copytree("include", dest / "src" / "include")
shutil.copytree("amalgamation", dest / "src" / "amalgamation")
# rabit
rabit = Path("rabit")
os.mkdir(dest / "src" / rabit)
shutil.copytree(rabit / "src", dest / "src" / "rabit" / "src")
shutil.copytree(rabit / "include", dest / "src" / "rabit" / "include")
# dmlc-core
dmlc_core = Path("dmlc-core")
os.mkdir(dest / "src" / dmlc_core)
shutil.copytree(dmlc_core / "include", dest / "src" / "dmlc-core" / "include")
shutil.copytree(dmlc_core / "src", dest / "src" / "dmlc-core" / "src")
# makefile & license
shutil.copyfile("LICENSE", dest / "LICENSE")
osxmakef = dest / "src" / "Makevars.win-e"
if os.path.exists(osxmakef):
os.remove(osxmakef)
pkgroot("Makevars.in")
pkgroot("Makevars.win")
# misc
rwsp = Path("R-package") / "remove_warning_suppression_pragma.sh"
if system() != "Windows":
subprocess.check_call(rwsp)
rwsp = dest / "remove_warning_suppression_pragma.sh"
if system() != "Windows":
subprocess.check_call(rwsp)
os.remove(rwsp)
os.remove(dest / "CMakeLists.txt")
shutil.rmtree(dest / "tests" / "helper_scripts")
return dest
@cd(ROOT)
@record_time
def build_rpackage(path: str) -> str:
def find_tarball() -> str:
found = []
for root, subdir, files in os.walk("."):
for f in files:
if f.endswith(".tar.gz") and f.startswith("xgboost"):
found.append(os.path.join(root, f))
if not found:
raise ValueError("Failed to find output tar ball.")
if len(found) > 1:
raise ValueError("Found more than one packages:", found)
return found[0]
env = os.environ.copy()
print("Ncpus:", f"{os.cpu_count()}")
env.update({"MAKEFLAGS": f"-j{os.cpu_count()}"})
subprocess.check_call([R, "CMD", "build", path], env=env)
tarball = find_tarball()
return tarball
@cd(ROOT)
@record_time
def check_rpackage(path: str) -> None:
env = os.environ.copy()
print("Ncpus:", f"{os.cpu_count()}")
env.update(
{
"MAKEFLAGS": f"-j{os.cpu_count()}",
# cran specific environment variables
"_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(2.5),
}
)
# Actually we don't run this check on windows due to dependency issue.
if system() == "Windows":
# make sure compiler from rtools is used.
mingw_bin = get_mingw_bin()
CXX = os.path.join(mingw_bin, 'g++.exe')
CC = os.path.join(mingw_bin, 'gcc.exe')
cmd = ['R.exe', 'CMD', 'INSTALL', str(os.path.curdir)]
env = os.environ.copy()
env.update({'CC': CC, 'CXX': CXX, "MAKE": "make -j$(nproc)"})
subprocess.check_call(cmd, env=env)
subprocess.check_call([
'R.exe', '-q', '-e',
"library(testthat); setwd('tests'); source('testthat.R')"
])
subprocess.check_call([
'R.exe', '-q', '-e',
"demo(runall, package = 'xgboost')"
])
CXX = os.path.join(mingw_bin, "g++.exe")
CC = os.path.join(mingw_bin, "gcc.exe")
env.update({"CC": CC, "CXX": CXX})
status = subprocess.run([R, "CMD", "check", "--as-cran", path], env=env)
with open(Path("xgboost.Rcheck") / "00check.log", "r") as fd:
check_log = fd.read()
with open(Path("xgboost.Rcheck") / "00install.out", "r") as fd:
install_log = fd.read()
msg = f"""
----------------------- Install ----------------------
{install_log}
----------------------- Check -----------------------
{check_log}
"""
if status.returncode != 0:
print(msg)
raise ValueError("Failed r package check.")
if check_log.find("WARNING") != -1:
print(msg)
raise ValueError("Has unresolved warnings.")
if check_log.find("Examples with CPU time") != -1:
print(msg)
raise ValueError("Suspicious NOTE.")
def test_with_cmake(args):
os.mkdir('build')
with DirectoryExcursion('build'):
if args.compiler == 'mingw':
@cd(r_package)
@record_time
def check_rmarkdown() -> None:
assert system() != "Windows", "Document test doesn't support Windows."
env = os.environ.copy()
env.update({"MAKEFLAGS": f"-j{os.cpu_count()}"})
print("Checking R document with devtools.")
bin_dir = os.path.dirname(R)
rscript = os.path.join(bin_dir, "Rscript")
subprocess.check_call([rscript, "-e", "devtools::document()"], env=env)
output = subprocess.run(["git", "diff", "--name-only"], capture_output=True)
if len(output.stdout.decode("utf-8").strip()) != 0:
raise ValueError("Please run `devtools::document()`.")
@cd(r_package)
@record_time
def test_with_autotools() -> None:
"""Windows only test. No `--as-cran` check, only unittests. We don't want to manage
the dependencies on Windows machine.
"""
assert system() == "Windows"
mingw_bin = get_mingw_bin()
CXX = os.path.join(mingw_bin, "g++.exe")
CC = os.path.join(mingw_bin, "gcc.exe")
cmd = [R, "CMD", "INSTALL", str(os.path.curdir)]
env = os.environ.copy()
env.update({"CC": CC, "CXX": CXX, "MAKEFLAGS": f"-j{os.cpu_count()}"})
subprocess.check_call(cmd, env=env)
subprocess.check_call(
["R.exe", "-q", "-e", "library(testthat); setwd('tests'); source('testthat.R')"]
)
subprocess.check_call(["R.exe", "-q", "-e", "demo(runall, package = 'xgboost')"])
@record_time
def test_with_cmake(args: argparse.Namespace) -> None:
os.mkdir("build")
with DirectoryExcursion("build"):
if args.compiler == "mingw":
mingw_bin = get_mingw_bin()
CXX = os.path.join(mingw_bin, 'g++.exe')
CC = os.path.join(mingw_bin, 'gcc.exe')
CXX = os.path.join(mingw_bin, "g++.exe")
CC = os.path.join(mingw_bin, "gcc.exe")
env = os.environ.copy()
env.update({'CC': CC, 'CXX': CXX})
subprocess.check_call([
'cmake', os.path.pardir, '-DUSE_OPENMP=ON', '-DR_LIB=ON',
'-DCMAKE_CONFIGURATION_TYPES=Release', '-G', 'Unix Makefiles',
],
env=env)
subprocess.check_call(['make', '-j', 'install'])
elif args.compiler == 'msvc':
subprocess.check_call([
'cmake', os.path.pardir, '-DUSE_OPENMP=ON', '-DR_LIB=ON',
'-DCMAKE_CONFIGURATION_TYPES=Release', '-A', 'x64'
])
subprocess.check_call([
'cmake', '--build', os.path.curdir, '--target', 'install',
'--config', 'Release'
])
env.update({"CC": CC, "CXX": CXX})
subprocess.check_call(
[
"cmake",
os.path.pardir,
"-DUSE_OPENMP=ON",
"-DR_LIB=ON",
"-DCMAKE_CONFIGURATION_TYPES=Release",
"-G",
"Unix Makefiles",
],
env=env,
)
subprocess.check_call(["make", "-j", "install"])
elif args.compiler == "msvc":
subprocess.check_call(
[
"cmake",
os.path.pardir,
"-DUSE_OPENMP=ON",
"-DR_LIB=ON",
"-DCMAKE_CONFIGURATION_TYPES=Release",
"-A",
"x64",
]
)
subprocess.check_call(
[
"cmake",
"--build",
os.path.curdir,
"--target",
"install",
"--config",
"Release",
]
)
else:
raise ValueError('Wrong compiler')
raise ValueError("Wrong compiler")
with DirectoryExcursion(r_package):
subprocess.check_call([
'R.exe', '-q', '-e',
"library(testthat); setwd('tests'); source('testthat.R')"
])
subprocess.check_call([
'R.exe', '-q', '-e',
"demo(runall, package = 'xgboost')"
])
subprocess.check_call(
[
R,
"-q",
"-e",
"library(testthat); setwd('tests'); source('testthat.R')",
]
)
subprocess.check_call([R, "-q", "-e", "demo(runall, package = 'xgboost')"])
@record_time
def main(args: argparse.Namespace) -> None:
start = time()
if args.build_tool == 'autotools':
test_with_autotools(args)
if args.task == "build":
src_dir = pack_rpackage()
build_rpackage(src_dir)
elif args.task == "doc":
check_rmarkdown()
elif args.task == "check":
if args.build_tool == "autotools" and system() != "Windows":
src_dir = pack_rpackage()
tarball = build_rpackage(src_dir)
check_rpackage(tarball)
elif args.build_tool == "autotools":
test_with_autotools()
else:
test_with_cmake(args)
else:
test_with_cmake(args)
print("Duration:", time() - start)
raise ValueError("Unexpected task.")
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--compiler',
type=str,
choices=['mingw', 'msvc'],
help='Compiler used for compiling CXX code.')
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=(
"Helper script for making R package and running R tests on CI. There are"
" also other helper scripts in the R tests directory for installing"
" dependencies and running linter."
)
)
parser.add_argument(
'--build-tool',
"--task",
type=str,
choices=['cmake', 'autotools'],
help='Build tool for compiling CXX code and install R package.')
choices=["build", "check", "doc"],
default="check",
required=False,
)
parser.add_argument(
"--compiler",
type=str,
choices=["mingw", "msvc"],
help="Compiler used for compiling CXX code. Only relevant for windows build",
default="mingw",
required=False,
)
parser.add_argument(
"--build-tool",
type=str,
choices=["cmake", "autotools"],
help="Build tool for compiling CXX code and install R package.",
default="autotools",
required=False,
)
parser.add_argument(
"--r",
type=str,
default="R" if system() != "Windows" else "R.exe",
help="Path to the R executable.",
)
args = parser.parse_args()
main(args)
R = args.r
try:
main(args)
finally:
print_time()

View File

@@ -1,14 +1,72 @@
"""Utilities for the CI."""
import os
from typing import Union
from datetime import datetime, timedelta
from functools import wraps
from typing import Any, Callable, Dict, TypedDict, TypeVar, Union
class DirectoryExcursion:
def __init__(self, path: Union[os.PathLike, str]):
def __init__(self, path: Union[os.PathLike, str]) -> None:
self.path = path
self.curdir = os.path.normpath(os.path.abspath(os.path.curdir))
def __enter__(self):
def __enter__(self) -> None:
os.chdir(self.path)
def __exit__(self, *args):
def __exit__(self, *args: Any) -> None:
os.chdir(self.curdir)
R = TypeVar("R")
def cd(path: Union[os.PathLike, str]) -> Callable:
"""Decorator for changing directory temporarily."""
def chdir(func: Callable[..., R]) -> Callable[..., R]:
@wraps(func)
def inner(*args: Any, **kwargs: Any) -> R:
with DirectoryExcursion(path):
return func(*args, **kwargs)
return inner
return chdir
Record = TypedDict("Record", {"count": int, "total": timedelta})
timer: Dict[str, Record] = {}
def record_time(func: Callable[..., R]) -> Callable[..., R]:
"""Decorator for recording function runtime."""
global timer
@wraps(func)
def inner(*args: Any, **kwargs: Any) -> R:
if func.__name__ not in timer:
timer[func.__name__] = {"count": 0, "total": timedelta(0)}
s = datetime.now()
try:
r = func(*args, **kwargs)
finally:
e = datetime.now()
timer[func.__name__]["count"] += 1
timer[func.__name__]["total"] += e - s
return r
return inner
def print_time() -> None:
"""Print all recorded items by :py:func:`record_time`."""
global timer
for k, v in timer.items():
print(
"Name:",
k,
"Called:",
v["count"],
"Elapsed:",
f"{v['total'].seconds} secs",
)