Use dlopen to load NCCL. (#9796)
This PR adds optional support for loading nccl with `dlopen` as an alternative of compile time linking. This is to address the size bloat issue with the PyPI binary release. - Add CMake option to load `nccl` at runtime. - Add an NCCL stub. After this, `nccl` will be fetched from PyPI when using pip to install XGBoost, either by a user or by `pyproject.toml`. Others who want to link the nccl at compile time can continue to do so without any change. At the moment, this is Linux only since we only support MNMG on Linux.
This commit is contained in:
@@ -2,6 +2,7 @@ ARG CUDA_VERSION_ARG
|
||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04
|
||||
ARG CUDA_VERSION_ARG
|
||||
ARG RAPIDS_VERSION_ARG
|
||||
ARG NCCL_VERSION_ARG
|
||||
|
||||
# Environment
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
@@ -23,7 +24,9 @@ RUN \
|
||||
conda install -c conda-forge mamba && \
|
||||
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
||||
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
|
||||
dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
|
||||
nccl>=$(cut -d "-" -f 1 << $NCCL_VERSION_ARG) \
|
||||
dask \
|
||||
dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
|
||||
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
|
||||
pyspark>=3.4.0 cloudpickle cuda-python && \
|
||||
mamba clean --all && \
|
||||
|
||||
@@ -27,7 +27,7 @@ RUN \
|
||||
wget -nv -nc https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||
yum -y update && \
|
||||
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} && \
|
||||
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} && \
|
||||
rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm;
|
||||
|
||||
ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:$PATH
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
rm -rf tmp_nccl
|
||||
|
||||
mkdir tmp_nccl
|
||||
pushd tmp_nccl
|
||||
|
||||
set -x
|
||||
|
||||
cat << EOF > test.cu
|
||||
int main(void) { return 0; }
|
||||
EOF
|
||||
|
||||
cat << EOF > CMakeLists.txt
|
||||
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
|
||||
project(gencode_extractor CXX C)
|
||||
cmake_policy(SET CMP0104 NEW)
|
||||
set(CMAKE_CUDA_HOST_COMPILER \${CMAKE_CXX_COMPILER})
|
||||
enable_language(CUDA)
|
||||
include(../cmake/Utils.cmake)
|
||||
compute_cmake_cuda_archs("")
|
||||
add_library(test OBJECT test.cu)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
EOF
|
||||
|
||||
cmake . -GNinja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
|
||||
gen_code=$(grep -o -- '--generate-code=\S*' compile_commands.json | paste -sd ' ')
|
||||
|
||||
nvprune ${gen_code} /usr/lib64/libnccl_static.a -o ../libnccl_static.a
|
||||
|
||||
popd
|
||||
rm -rf tmp_nccl
|
||||
|
||||
set +x
|
||||
@@ -1,22 +1,10 @@
|
||||
import os
|
||||
import sys
|
||||
from contextlib import contextmanager
|
||||
|
||||
|
||||
@contextmanager
|
||||
def cd(path):
|
||||
path = os.path.normpath(path)
|
||||
cwd = os.getcwd()
|
||||
os.chdir(path)
|
||||
print("cd " + path)
|
||||
try:
|
||||
yield path
|
||||
finally:
|
||||
os.chdir(cwd)
|
||||
|
||||
from test_utils import DirectoryExcursion
|
||||
|
||||
if len(sys.argv) != 4:
|
||||
print('Usage: {} [wheel to rename] [commit id] [platform tag]'.format(sys.argv[0]))
|
||||
print("Usage: {} [wheel to rename] [commit id] [platform tag]".format(sys.argv[0]))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@@ -26,20 +14,26 @@ platform_tag = sys.argv[3]
|
||||
|
||||
dirname, basename = os.path.dirname(whl_path), os.path.basename(whl_path)
|
||||
|
||||
with cd(dirname):
|
||||
tokens = basename.split('-')
|
||||
with DirectoryExcursion(dirname):
|
||||
tokens = basename.split("-")
|
||||
assert len(tokens) == 5
|
||||
version = tokens[1].split('+')[0]
|
||||
keywords = {'pkg_name': tokens[0],
|
||||
'version': version,
|
||||
'commit_id': commit_id,
|
||||
'platform_tag': platform_tag}
|
||||
new_name = '{pkg_name}-{version}+{commit_id}-py3-none-{platform_tag}.whl'.format(**keywords)
|
||||
print('Renaming {} to {}...'.format(basename, new_name))
|
||||
version = tokens[1].split("+")[0]
|
||||
keywords = {
|
||||
"pkg_name": tokens[0],
|
||||
"version": version,
|
||||
"commit_id": commit_id,
|
||||
"platform_tag": platform_tag,
|
||||
}
|
||||
new_name = "{pkg_name}-{version}+{commit_id}-py3-none-{platform_tag}.whl".format(
|
||||
**keywords
|
||||
)
|
||||
print("Renaming {} to {}...".format(basename, new_name))
|
||||
if os.path.isfile(new_name):
|
||||
os.remove(new_name)
|
||||
os.rename(basename, new_name)
|
||||
|
||||
filesize = os.path.getsize(new_name) / 1024 / 1024 # MB
|
||||
print(f"Wheel size: {filesize}")
|
||||
|
||||
msg = f"Limit of wheel size set by PyPI is exceeded. {new_name}: {filesize}"
|
||||
assert filesize <= 300, msg
|
||||
|
||||
Reference in New Issue
Block a user