enable ROCm on latest XGBoost

This commit is contained in:
Hui Liu
2023-10-23 11:07:08 -07:00
328 changed files with 8028 additions and 3642 deletions

View File

@@ -1,5 +1,5 @@
ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
FROM nvcr.io/nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
ARG CUDA_VERSION_ARG
ARG NCCL_VERSION_ARG
ARG RAPIDS_VERSION_ARG

View File

@@ -1,5 +1,5 @@
ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
FROM nvcr.io/nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
ARG CUDA_VERSION_ARG
# Install all basic requirements

View File

@@ -1,5 +1,5 @@
ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
FROM nvcr.io/nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
ARG CUDA_VERSION_ARG
ARG NCCL_VERSION_ARG

View File

@@ -148,10 +148,11 @@ then
$(aws ecr get-login --no-include-email --region ${DOCKER_CACHE_ECR_REGION} --registry-ids ${DOCKER_CACHE_ECR_ID})
# Pull pre-build container from Docker build cache,
# if one exists for the particular branch or pull request
echo "docker pull --quiet ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
if time docker pull --quiet "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
DOCKER_TAG="${BRANCH_NAME//\//-}" # Slashes are not allow in Docker tag
echo "docker pull --quiet ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
if time docker pull --quiet "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
then
CACHE_FROM_CMD="--cache-from ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
CACHE_FROM_CMD="--cache-from ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
else
# If the build cache is empty of the particular branch or pull request,
# use the build cache associated with the master branch
@@ -185,8 +186,8 @@ if [[ -n "${DOCKER_CACHE_REPO}" ]]
then
# Push the container we just built to the Docker build cache
# that is associated with the particular branch or pull request
echo "docker tag ${DOCKER_IMG_NAME} ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
docker tag "${DOCKER_IMG_NAME}" "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
echo "docker tag ${DOCKER_IMG_NAME} ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
docker tag "${DOCKER_IMG_NAME}" "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
# Attempt to create Docker repository; it will fail if the repository already exists
echo "aws ecr create-repository --repository-name ${DOCKER_IMG_NAME} --region ${DOCKER_CACHE_ECR_REGION}"
@@ -214,10 +215,10 @@ then
EOF
fi
echo "docker push --quiet ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
docker push --quiet "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
echo "docker push --quiet ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
docker push --quiet "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
if [[ $? != "0" ]]; then
echo "ERROR: could not update Docker cache ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
echo "ERROR: could not update Docker cache ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
exit 1
fi
fi

View File

@@ -0,0 +1,14 @@
#!/bin/bash
set -e
cmake_files=$(
find . -name CMakeLists.txt -o -path "./cmake/*.cmake" \
| grep -v dmlc-core \
| grep -v gputreeshap
)
cmakelint \
--linelength=120 \
--filter=-convention/filename,-package/stdargs,-readability/wonkycase \
${cmake_files} \
|| exit 1

166
tests/ci_build/lint_cpp.py Normal file
View File

@@ -0,0 +1,166 @@
import argparse
import os
import re
import sys
import cpplint
from cpplint import _cpplint_state
CXX_SUFFIX = set(["cc", "c", "cpp", "h", "cu", "hpp"])
def filepath_enumerate(paths):
"""Enumerate the file paths of all subfiles of the list of paths"""
out = []
for path in paths:
if os.path.isfile(path):
out.append(path)
else:
for root, dirs, files in os.walk(path):
for name in files:
out.append(os.path.normpath(os.path.join(root, name)))
return out
def get_header_guard_dmlc(filename):
"""Get Header Guard Convention for DMLC Projects.
For headers in include, directly use the path
For headers in src, use project name plus path
Examples: with project-name = dmlc
include/dmlc/timer.h -> DMLC_TIMTER_H_
src/io/libsvm_parser.h -> DMLC_IO_LIBSVM_PARSER_H_
"""
fileinfo = cpplint.FileInfo(filename)
file_path_from_root = fileinfo.RepositoryName()
inc_list = ["include", "api", "wrapper", "contrib"]
if os.name == "nt":
inc_list.append("mshadow")
if file_path_from_root.find("src/") != -1 and _HELPER.project_name is not None:
idx = file_path_from_root.find("src/")
file_path_from_root = _HELPER.project_name + file_path_from_root[idx + 3 :]
else:
idx = file_path_from_root.find("include/")
if idx != -1:
file_path_from_root = file_path_from_root[idx + 8 :]
for spath in inc_list:
prefix = spath + "/"
if file_path_from_root.startswith(prefix):
file_path_from_root = re.sub("^" + prefix, "", file_path_from_root)
break
return re.sub(r"[-./\s]", "_", file_path_from_root).upper() + "_"
class Lint:
def __init__(self):
self.project_name = "xgboost"
self.cpp_header_map = {}
self.cpp_src_map = {}
self.python_map = {}
self.pylint_cats = set(["error", "warning", "convention", "refactor"])
# setup cpp lint
cpplint_args = ["--quiet", "--extensions=" + (",".join(CXX_SUFFIX)), "."]
_ = cpplint.ParseArguments(cpplint_args)
cpplint._SetFilters(
",".join(
[
"-build/c++11",
"-build/include,",
"+build/namespaces",
"+build/include_what_you_use",
"+build/include_order",
]
)
)
cpplint._SetCountingStyle("toplevel")
cpplint._line_length = 100
def process_cpp(self, path, suffix):
"""Process a cpp file."""
_cpplint_state.ResetErrorCounts()
cpplint.ProcessFile(str(path), _cpplint_state.verbose_level)
_cpplint_state.PrintErrorCounts()
errors = _cpplint_state.errors_by_category.copy()
if suffix == "h":
self.cpp_header_map[str(path)] = errors
else:
self.cpp_src_map[str(path)] = errors
@staticmethod
def _print_summary_map(strm, result_map, ftype):
"""Print summary of certain result map."""
if len(result_map) == 0:
return 0
npass = sum(1 for x in result_map.values() if len(x) == 0)
strm.write(f"====={npass}/{len(result_map)} {ftype} files passed check=====\n")
for fname, emap in result_map.items():
if len(emap) == 0:
continue
strm.write(
f"{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={str(emap)}\n"
)
return len(result_map) - npass
def print_summary(self, strm):
"""Print summary of lint."""
nerr = 0
nerr += Lint._print_summary_map(strm, self.cpp_header_map, "cpp-header")
nerr += Lint._print_summary_map(strm, self.cpp_src_map, "cpp-source")
if nerr == 0:
strm.write("All passed!\n")
else:
strm.write(f"{nerr} files failed lint\n")
return nerr
_HELPER = Lint()
cpplint.GetHeaderGuardCPPVariable = get_header_guard_dmlc
def process(fname, allow_type):
"""Process a file."""
fname = str(fname)
arr = fname.rsplit(".", 1)
if fname.find("#") != -1 or arr[-1] not in allow_type:
return
if arr[-1] in CXX_SUFFIX:
_HELPER.process_cpp(fname, arr[-1])
def main():
parser = argparse.ArgumentParser(description="run cpp lint")
parser.add_argument("path", nargs="+", help="path to traverse")
parser.add_argument(
"--exclude_path",
nargs="+",
default=[],
help="exclude this path, and all subfolders if path is a folder",
)
args = parser.parse_args()
excluded_paths = filepath_enumerate(args.exclude_path)
allow_type = []
allow_type += CXX_SUFFIX
for path in args.path:
if os.path.isfile(path):
normpath = os.path.normpath(path)
if normpath not in excluded_paths:
process(path, allow_type)
else:
for root, dirs, files in os.walk(path):
for name in files:
file_path = os.path.normpath(os.path.join(root, name))
if file_path not in excluded_paths:
process(file_path, allow_type)
nerr = _HELPER.print_summary(sys.stderr)
sys.exit(nerr > 0)
if __name__ == "__main__":
main()

View File

@@ -28,6 +28,7 @@ my_linters <- list(
equals_na = lintr::equals_na_linter(),
fixed_regex = lintr::fixed_regex_linter(),
for_loop_index = lintr::for_loop_index_linter(),
function_left_parentheses = lintr::function_left_parentheses_linter(),
function_return = lintr::function_return_linter(),
infix_spaces_linter = lintr::infix_spaces_linter(),
is_numeric = lintr::is_numeric_linter(),

View File

@@ -3,9 +3,15 @@ import argparse
import os
import shutil
import subprocess
from io import StringIO
from pathlib import Path
from platform import system
try:
import pandas as pd
except ImportError:
pd = None
from test_utils import R_PACKAGE, ROOT, DirectoryExcursion, cd, print_time, record_time
@@ -97,16 +103,47 @@ def build_rpackage(path: str) -> str:
return tarball
def check_example_timing(rcheck_dir: Path, threshold: float) -> None:
with open(rcheck_dir / "xgboost-Ex.timings", "r") as fd:
timings = fd.readlines()
newlines = []
for line in timings:
line = line.strip()
newlines.append(line)
con_timings = "\n".join(newlines)
df = pd.read_csv(StringIO(con_timings), delimiter="\t")
ratio_n = "user/elapsed"
df[ratio_n] = df["user"] / df["elapsed"]
offending = df[df[ratio_n] > threshold]
try:
# requires the tabulate package
df.to_markdown("timings.md")
offending.to_markdown("offending.md")
except ImportError:
print("failed to export markdown files.")
pass
if offending.shape[0] == 0:
return
print(offending)
raise ValueError("There are examples using too many threads")
@cd(ROOT)
@record_time
def check_rpackage(path: str) -> None:
env = os.environ.copy()
print("Ncpus:", f"{os.cpu_count()}")
threshold = 2.5
env.update(
{
"MAKEFLAGS": f"-j{os.cpu_count()}",
# cran specific environment variables
"_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(2.5),
"_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(threshold),
"_R_CHECK_TEST_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(threshold),
"_R_CHECK_VIGNETTE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(threshold),
}
)
@@ -118,11 +155,14 @@ def check_rpackage(path: str) -> None:
CC = os.path.join(mingw_bin, "gcc.exe")
env.update({"CC": CC, "CXX": CXX})
status = subprocess.run([R, "CMD", "check", "--as-cran", path], env=env)
with open(Path("xgboost.Rcheck") / "00check.log", "r") as fd:
status = subprocess.run(
[R, "CMD", "check", "--as-cran", "--timings", path], env=env
)
rcheck_dir = Path("xgboost.Rcheck")
with open(rcheck_dir / "00check.log", "r") as fd:
check_log = fd.read()
with open(Path("xgboost.Rcheck") / "00install.out", "r") as fd:
with open(rcheck_dir / "00install.out", "r") as fd:
install_log = fd.read()
msg = f"""
@@ -144,6 +184,8 @@ def check_rpackage(path: str) -> None:
if check_log.find("Examples with CPU time") != -1:
print(msg)
raise ValueError("Suspicious NOTE.")
if pd is not None:
check_example_timing(rcheck_dir, threshold)
@cd(R_PACKAGE)
@@ -264,6 +306,8 @@ def main(args: argparse.Namespace) -> None:
test_with_autotools()
else:
test_with_cmake(args)
elif args.task == "timings":
check_example_timing(Path("xgboost.Rcheck"), 2.5)
else:
raise ValueError("Unexpected task.")
@@ -279,7 +323,7 @@ if __name__ == "__main__":
parser.add_argument(
"--task",
type=str,
choices=["pack", "build", "check", "doc"],
choices=["pack", "build", "check", "doc", "timings"],
default="check",
required=False,
)