This PR adds optional support for loading nccl with `dlopen` as an alternative of compile time linking. This is to address the size bloat issue with the PyPI binary release. - Add CMake option to load `nccl` at runtime. - Add an NCCL stub. After this, `nccl` will be fetched from PyPI when using pip to install XGBoost, either by a user or by `pyproject.toml`. Others who want to link the nccl at compile time can continue to do so without any change. At the moment, this is Linux only since we only support MNMG on Linux.
73 lines
3.0 KiB
Docker
73 lines
3.0 KiB
Docker
ARG CUDA_VERSION_ARG
|
|
FROM nvcr.io/nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
|
|
ARG CUDA_VERSION_ARG
|
|
ARG NCCL_VERSION_ARG
|
|
ARG RAPIDS_VERSION_ARG
|
|
|
|
# Install all basic requirements
|
|
RUN \
|
|
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/D42D0685.pub | sed '/^Version/d' \
|
|
> /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
|
|
yum install -y epel-release centos-release-scl && \
|
|
yum-config-manager --enable centos-sclo-rh-testing && \
|
|
yum -y update && \
|
|
yum install -y tar unzip wget xz git which ninja-build devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \
|
|
# Python
|
|
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
|
|
bash conda.sh -b -p /opt/mambaforge && \
|
|
/opt/mambaforge/bin/python -m pip install awscli && \
|
|
# CMake
|
|
wget -nv -nc https://cmake.org/files/v3.26/cmake-3.26.4-linux-x86_64.sh --no-check-certificate && \
|
|
bash cmake-3.26.4-linux-x86_64.sh --skip-license --prefix=/usr
|
|
|
|
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
|
RUN \
|
|
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
|
|
export NCCL_VERSION=$NCCL_VERSION_ARG && \
|
|
wget -nv -nc https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
|
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
|
yum -y update && \
|
|
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} && \
|
|
rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm;
|
|
|
|
ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:$PATH
|
|
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
|
|
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
|
|
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
|
|
ENV CUDAHOSTCXX=/opt/rh/devtoolset-9/root/usr/bin/c++
|
|
|
|
ENV GOSU_VERSION 1.10
|
|
|
|
# Install RMM
|
|
RUN git clone -b v${RAPIDS_VERSION_ARG}.00 https://github.com/rapidsai/rmm.git --recurse-submodules --depth 1 && \
|
|
pushd rmm && \
|
|
mkdir build && \
|
|
pushd build && \
|
|
cmake .. -GNinja -DCMAKE_INSTALL_PREFIX=/opt/rmm -DCUDA_STATIC_RUNTIME=ON && \
|
|
cmake --build . --target install && \
|
|
popd && \
|
|
popd && \
|
|
rm -rf rmm
|
|
|
|
# Install gRPC
|
|
RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \
|
|
--recurse-submodules --depth 1 && \
|
|
pushd grpc && \
|
|
cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=/opt/grpc -DCMAKE_CXX_VISIBILITY_PRESET=hidden && \
|
|
cmake --build build --target install && \
|
|
popd && \
|
|
rm -rf grpc
|
|
|
|
# Install lightweight sudo (not bound to TTY)
|
|
RUN set -ex; \
|
|
wget -nv -nc -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
|
|
chmod +x /usr/local/bin/gosu && \
|
|
gosu nobody true
|
|
|
|
# Default entry-point to use if running locally
|
|
# It will preserve attributes of created files
|
|
COPY entrypoint.sh /scripts/
|
|
|
|
WORKDIR /workspace
|
|
ENTRYPOINT ["/scripts/entrypoint.sh"]
|