[CI] Build pip wheel with RMM support (#9383)

This commit is contained in:
Philip Hyunsu Cho 2023-07-18 01:52:26 -07:00 committed by GitHub
parent 6e18d3a290
commit e082718c66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 64 additions and 70 deletions

View File

@ -20,16 +20,18 @@ case "${container}" in
cpu) cpu)
;; ;;
gpu|rmm) gpu)
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
if [[ $container == "rmm" ]]
then
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
fi
;; ;;
gpu_build_centos7|jvm_gpu_build) gpu_build_centos7)
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
;;
jvm_gpu_build)
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
;; ;;

View File

@ -2,9 +2,11 @@
set -euo pipefail set -euo pipefail
WHEEL_TAG=manylinux2014_x86_64
source tests/buildkite/conftest.sh source tests/buildkite/conftest.sh
echo "--- Build with CUDA ${CUDA_VERSION}, RMM enabled" echo "--- Build with CUDA ${CUDA_VERSION} with RMM"
if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]] if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]]
then then
@ -13,14 +15,40 @@ else
arch_flag="" arch_flag=""
fi fi
command_wrapper="tests/ci_build/ci_build.sh rmm docker --build-arg "` command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "` `"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION" `"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
echo "--- Build libxgboost from the source" echo "--- Build libxgboost from the source"
$command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=gpu_test -DUSE_CUDA=ON \ $command_wrapper tests/ci_build/prune_libnccl.sh
-DUSE_NCCL=ON -DPLUGIN_RMM=ON ${arch_flag} $command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH="/opt/grpc;/opt/rmm" \
-DUSE_CUDA=ON -DUSE_NCCL=ON -DUSE_OPENMP=ON -DHIDE_CXX_SYMBOLS=ON -DPLUGIN_FEDERATED=ON \
-DPLUGIN_RMM=ON -DUSE_NCCL_LIB_PATH=ON -DNCCL_INCLUDE_DIR=/usr/include \
-DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag}
echo "--- Build binary wheel"
$command_wrapper bash -c \
"cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/"
$command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG}
echo "--- Audit binary wheel to ensure it's compliant with manylinux2014 standard"
tests/ci_build/ci_build.sh auditwheel_x86_64 docker auditwheel repair \
--plat ${WHEEL_TAG} python-package/dist/*.whl
$command_wrapper python tests/ci_build/rename_whl.py wheelhouse/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG}
mv -v wheelhouse/*.whl python-package/dist/
# Make sure that libgomp.so is vendored in the wheel
tests/ci_build/ci_build.sh auditwheel_x86_64 docker bash -c \
"unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
echo "--- Upload Python wheel"
buildkite-agent artifact upload python-package/dist/*.whl
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
then
aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/experimental_build_with_rmm/ \
--acl public-read --no-progress
fi
echo "-- Stash C++ test executable (testxgboost)" echo "-- Stash C++ test executable (testxgboost)"
buildkite-agent artifact upload build/testxgboost buildkite-agent artifact upload build/testxgboost

View File

@ -17,11 +17,12 @@ fi
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "` command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION" `"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
echo "--- Build libxgboost from the source" echo "--- Build libxgboost from the source"
$command_wrapper tests/ci_build/prune_libnccl.sh $command_wrapper tests/ci_build/prune_libnccl.sh
$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH=/opt/grpc \ $command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH="/opt/grpc" \
-DUSE_CUDA=ON -DUSE_NCCL=ON -DUSE_OPENMP=ON -DHIDE_CXX_SYMBOLS=ON -DPLUGIN_FEDERATED=ON \ -DUSE_CUDA=ON -DUSE_NCCL=ON -DUSE_OPENMP=ON -DHIDE_CXX_SYMBOLS=ON -DPLUGIN_FEDERATED=ON \
-DUSE_NCCL_LIB_PATH=ON -DNCCL_INCLUDE_DIR=/usr/include \ -DUSE_NCCL_LIB_PATH=ON -DNCCL_INCLUDE_DIR=/usr/include \
-DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag} -DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag}

View File

@ -16,7 +16,6 @@ steps:
- "tests/buildkite/build-containers.sh cpu" - "tests/buildkite/build-containers.sh cpu"
- "tests/buildkite/build-containers.sh gpu" - "tests/buildkite/build-containers.sh gpu"
- "tests/buildkite/build-containers.sh gpu_build_centos7" - "tests/buildkite/build-containers.sh gpu_build_centos7"
- "tests/buildkite/build-containers.sh rmm"
key: build-containers key: build-containers
agents: agents:
queue: linux-amd64-cpu queue: linux-amd64-cpu

View File

@ -16,8 +16,8 @@ echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
rm -rfv build/ rm -rfv build/
buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
chmod +x build/testxgboost chmod +x build/testxgboost
tests/ci_build/ci_build.sh rmm nvidia-docker \ tests/ci_build/ci_build.sh gpu nvidia-docker \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \ --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \ --build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
"source activate gpu_test && build/testxgboost --use-rmm-pool" build/testxgboost --use-rmm-pool

View File

@ -2,6 +2,7 @@ ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7 FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
ARG CUDA_VERSION_ARG ARG CUDA_VERSION_ARG
ARG NCCL_VERSION_ARG ARG NCCL_VERSION_ARG
ARG RAPIDS_VERSION_ARG
# Install all basic requirements # Install all basic requirements
RUN \ RUN \
@ -16,8 +17,8 @@ RUN \
bash conda.sh -b -p /opt/mambaforge && \ bash conda.sh -b -p /opt/mambaforge && \
/opt/mambaforge/bin/python -m pip install awscli && \ /opt/mambaforge/bin/python -m pip install awscli && \
# CMake # CMake
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ wget -nv -nc https://cmake.org/files/v3.24/cmake-3.24.0-linux-x86_64.sh --no-check-certificate && \
bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr bash cmake-3.24.0-linux-x86_64.sh --skip-license --prefix=/usr
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \ RUN \
@ -33,9 +34,21 @@ ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:$PATH
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++ ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
ENV CUDAHOSTCXX=/opt/rh/devtoolset-9/root/usr/bin/c++
ENV GOSU_VERSION 1.10 ENV GOSU_VERSION 1.10
# Install RMM
RUN git clone -b v${RAPIDS_VERSION_ARG}.00 https://github.com/rapidsai/rmm.git --recurse-submodules --depth 1 && \
pushd rmm && \
mkdir build && \
pushd build && \
cmake .. -GNinja -DCMAKE_INSTALL_PREFIX=/opt/rmm -DCUDA_STATIC_RUNTIME=ON && \
cmake --build . --target install && \
popd && \
popd && \
rm -rf rmm
# Install gRPC # Install gRPC
RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \ RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \
--recurse-submodules --depth 1 && \ --recurse-submodules --depth 1 && \

View File

@ -1,49 +0,0 @@
ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu20.04
ARG CUDA_VERSION_ARG
ARG RAPIDS_VERSION_ARG
ARG NCCL_VERSION_ARG
# Environment
ENV DEBIAN_FRONTEND noninteractive
SHELL ["/bin/bash", "-c"] # Use Bash as shell
# Install all basic requirements
RUN \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \
apt-get update && \
apt-get install -y wget unzip bzip2 libgomp1 build-essential ninja-build git && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
export NCCL_VERSION=$NCCL_VERSION_ARG && \
apt-get update && \
apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}
ENV PATH=/opt/mambaforge/bin:$PATH
# Create new Conda environment with RMM
RUN \
conda install -c conda-forge mamba && \
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.10 rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG cmake && \
mamba clean --all
ENV GOSU_VERSION 1.10
# Install lightweight sudo (not bound to TTY)
RUN set -ex; \
wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
chmod +x /usr/local/bin/gosu && \
gosu nobody true
# Default entry-point to use if running locally
# It will preserve attributes of created files
COPY entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]

View File

@ -26,7 +26,7 @@ set_property(TARGET test PROPERTY CUDA_ARCHITECTURES \${CMAKE_CUDA_ARCHITECTURES
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
EOF EOF
cmake . -GNinja cmake . -GNinja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
gen_code=$(grep -o -- '--generate-code=\S*' compile_commands.json | paste -sd ' ') gen_code=$(grep -o -- '--generate-code=\S*' compile_commands.json | paste -sd ' ')
nvprune ${gen_code} /usr/lib64/libnccl_static.a -o ../libnccl_static.a nvprune ${gen_code} /usr/lib64/libnccl_static.a -o ../libnccl_static.a