xgboost/tests/ci_build/Dockerfile.gpu
Jiaming Yuan 0715ab3c10
Use dlopen to load NCCL. (#9796)
This PR adds optional support for loading nccl with `dlopen` as an alternative of compile time linking. This is to address the size bloat issue with the PyPI binary release.
- Add CMake option to load `nccl` at runtime.
- Add an NCCL stub.

After this, `nccl` will be fetched from PyPI when using pip to install XGBoost, either by a user or by `pyproject.toml`. Others who want to link the nccl at compile time can continue to do so without any change.

At the moment, this is Linux only since we only support MNMG on Linux.
2023-11-22 19:27:31 +08:00

50 lines
1.9 KiB
Docker

ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04
ARG CUDA_VERSION_ARG
ARG RAPIDS_VERSION_ARG
ARG NCCL_VERSION_ARG
# Environment
ENV DEBIAN_FRONTEND noninteractive
SHELL ["/bin/bash", "-c"] # Use Bash as shell
# Install all basic requirements
RUN \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \
apt-get update && \
apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge
ENV PATH=/opt/mambaforge/bin:$PATH
# Create new Conda environment with cuDF, Dask, and cuPy
RUN \
conda install -c conda-forge mamba && \
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
nccl>=$(cut -d "-" -f 1 << $NCCL_VERSION_ARG) \
dask \
dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
pyspark>=3.4.0 cloudpickle cuda-python && \
mamba clean --all && \
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
ENV GOSU_VERSION 1.10
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
# Install lightweight sudo (not bound to TTY)
RUN set -ex; \
wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
chmod +x /usr/local/bin/gosu && \
gosu nobody true
# Default entry-point to use if running locally
# It will preserve attributes of created files
COPY entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]