[CI] Build pip wheel with RMM support (#9383)
This commit is contained in:
parent
6e18d3a290
commit
e082718c66
@ -20,16 +20,18 @@ case "${container}" in
|
|||||||
cpu)
|
cpu)
|
||||||
;;
|
;;
|
||||||
|
|
||||||
gpu|rmm)
|
gpu)
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||||
if [[ $container == "rmm" ]]
|
|
||||||
then
|
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
|
|
||||||
fi
|
|
||||||
;;
|
;;
|
||||||
|
|
||||||
gpu_build_centos7|jvm_gpu_build)
|
gpu_build_centos7)
|
||||||
|
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
||||||
|
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||||
|
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||||
|
;;
|
||||||
|
|
||||||
|
jvm_gpu_build)
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
|
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||||
;;
|
;;
|
||||||
|
|||||||
@ -2,9 +2,11 @@
|
|||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
|
WHEEL_TAG=manylinux2014_x86_64
|
||||||
|
|
||||||
source tests/buildkite/conftest.sh
|
source tests/buildkite/conftest.sh
|
||||||
|
|
||||||
echo "--- Build with CUDA ${CUDA_VERSION}, RMM enabled"
|
echo "--- Build with CUDA ${CUDA_VERSION} with RMM"
|
||||||
|
|
||||||
if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]]
|
if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]]
|
||||||
then
|
then
|
||||||
@ -13,14 +15,40 @@ else
|
|||||||
arch_flag=""
|
arch_flag=""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
command_wrapper="tests/ci_build/ci_build.sh rmm docker --build-arg "`
|
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "`
|
||||||
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
|
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
|
||||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "`
|
`"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "`
|
||||||
`"NCCL_VERSION_ARG=$NCCL_VERSION"
|
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||||
|
|
||||||
echo "--- Build libxgboost from the source"
|
echo "--- Build libxgboost from the source"
|
||||||
$command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=gpu_test -DUSE_CUDA=ON \
|
$command_wrapper tests/ci_build/prune_libnccl.sh
|
||||||
-DUSE_NCCL=ON -DPLUGIN_RMM=ON ${arch_flag}
|
$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH="/opt/grpc;/opt/rmm" \
|
||||||
|
-DUSE_CUDA=ON -DUSE_NCCL=ON -DUSE_OPENMP=ON -DHIDE_CXX_SYMBOLS=ON -DPLUGIN_FEDERATED=ON \
|
||||||
|
-DPLUGIN_RMM=ON -DUSE_NCCL_LIB_PATH=ON -DNCCL_INCLUDE_DIR=/usr/include \
|
||||||
|
-DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag}
|
||||||
|
echo "--- Build binary wheel"
|
||||||
|
$command_wrapper bash -c \
|
||||||
|
"cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/"
|
||||||
|
$command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \
|
||||||
|
${BUILDKITE_COMMIT} ${WHEEL_TAG}
|
||||||
|
|
||||||
|
echo "--- Audit binary wheel to ensure it's compliant with manylinux2014 standard"
|
||||||
|
tests/ci_build/ci_build.sh auditwheel_x86_64 docker auditwheel repair \
|
||||||
|
--plat ${WHEEL_TAG} python-package/dist/*.whl
|
||||||
|
$command_wrapper python tests/ci_build/rename_whl.py wheelhouse/*.whl \
|
||||||
|
${BUILDKITE_COMMIT} ${WHEEL_TAG}
|
||||||
|
mv -v wheelhouse/*.whl python-package/dist/
|
||||||
|
# Make sure that libgomp.so is vendored in the wheel
|
||||||
|
tests/ci_build/ci_build.sh auditwheel_x86_64 docker bash -c \
|
||||||
|
"unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
|
||||||
|
|
||||||
|
echo "--- Upload Python wheel"
|
||||||
|
buildkite-agent artifact upload python-package/dist/*.whl
|
||||||
|
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
|
||||||
|
then
|
||||||
|
aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/experimental_build_with_rmm/ \
|
||||||
|
--acl public-read --no-progress
|
||||||
|
fi
|
||||||
|
|
||||||
echo "-- Stash C++ test executable (testxgboost)"
|
echo "-- Stash C++ test executable (testxgboost)"
|
||||||
buildkite-agent artifact upload build/testxgboost
|
buildkite-agent artifact upload build/testxgboost
|
||||||
|
|||||||
@ -17,11 +17,12 @@ fi
|
|||||||
|
|
||||||
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "`
|
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "`
|
||||||
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
|
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
|
||||||
`"NCCL_VERSION_ARG=$NCCL_VERSION"
|
`"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "`
|
||||||
|
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||||
|
|
||||||
echo "--- Build libxgboost from the source"
|
echo "--- Build libxgboost from the source"
|
||||||
$command_wrapper tests/ci_build/prune_libnccl.sh
|
$command_wrapper tests/ci_build/prune_libnccl.sh
|
||||||
$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH=/opt/grpc \
|
$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH="/opt/grpc" \
|
||||||
-DUSE_CUDA=ON -DUSE_NCCL=ON -DUSE_OPENMP=ON -DHIDE_CXX_SYMBOLS=ON -DPLUGIN_FEDERATED=ON \
|
-DUSE_CUDA=ON -DUSE_NCCL=ON -DUSE_OPENMP=ON -DHIDE_CXX_SYMBOLS=ON -DPLUGIN_FEDERATED=ON \
|
||||||
-DUSE_NCCL_LIB_PATH=ON -DNCCL_INCLUDE_DIR=/usr/include \
|
-DUSE_NCCL_LIB_PATH=ON -DNCCL_INCLUDE_DIR=/usr/include \
|
||||||
-DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag}
|
-DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag}
|
||||||
|
|||||||
@ -16,7 +16,6 @@ steps:
|
|||||||
- "tests/buildkite/build-containers.sh cpu"
|
- "tests/buildkite/build-containers.sh cpu"
|
||||||
- "tests/buildkite/build-containers.sh gpu"
|
- "tests/buildkite/build-containers.sh gpu"
|
||||||
- "tests/buildkite/build-containers.sh gpu_build_centos7"
|
- "tests/buildkite/build-containers.sh gpu_build_centos7"
|
||||||
- "tests/buildkite/build-containers.sh rmm"
|
|
||||||
key: build-containers
|
key: build-containers
|
||||||
agents:
|
agents:
|
||||||
queue: linux-amd64-cpu
|
queue: linux-amd64-cpu
|
||||||
|
|||||||
@ -16,8 +16,8 @@ echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
|
|||||||
rm -rfv build/
|
rm -rfv build/
|
||||||
buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
|
buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
|
||||||
chmod +x build/testxgboost
|
chmod +x build/testxgboost
|
||||||
tests/ci_build/ci_build.sh rmm nvidia-docker \
|
tests/ci_build/ci_build.sh gpu nvidia-docker \
|
||||||
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
||||||
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
|
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
|
||||||
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \
|
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
|
||||||
"source activate gpu_test && build/testxgboost --use-rmm-pool"
|
build/testxgboost --use-rmm-pool
|
||||||
|
|||||||
@ -2,6 +2,7 @@ ARG CUDA_VERSION_ARG
|
|||||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
|
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
|
||||||
ARG CUDA_VERSION_ARG
|
ARG CUDA_VERSION_ARG
|
||||||
ARG NCCL_VERSION_ARG
|
ARG NCCL_VERSION_ARG
|
||||||
|
ARG RAPIDS_VERSION_ARG
|
||||||
|
|
||||||
# Install all basic requirements
|
# Install all basic requirements
|
||||||
RUN \
|
RUN \
|
||||||
@ -16,8 +17,8 @@ RUN \
|
|||||||
bash conda.sh -b -p /opt/mambaforge && \
|
bash conda.sh -b -p /opt/mambaforge && \
|
||||||
/opt/mambaforge/bin/python -m pip install awscli && \
|
/opt/mambaforge/bin/python -m pip install awscli && \
|
||||||
# CMake
|
# CMake
|
||||||
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
|
wget -nv -nc https://cmake.org/files/v3.24/cmake-3.24.0-linux-x86_64.sh --no-check-certificate && \
|
||||||
bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr
|
bash cmake-3.24.0-linux-x86_64.sh --skip-license --prefix=/usr
|
||||||
|
|
||||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
||||||
RUN \
|
RUN \
|
||||||
@ -33,9 +34,21 @@ ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:$PATH
|
|||||||
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
|
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
|
||||||
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
|
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
|
||||||
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
|
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
|
||||||
|
ENV CUDAHOSTCXX=/opt/rh/devtoolset-9/root/usr/bin/c++
|
||||||
|
|
||||||
ENV GOSU_VERSION 1.10
|
ENV GOSU_VERSION 1.10
|
||||||
|
|
||||||
|
# Install RMM
|
||||||
|
RUN git clone -b v${RAPIDS_VERSION_ARG}.00 https://github.com/rapidsai/rmm.git --recurse-submodules --depth 1 && \
|
||||||
|
pushd rmm && \
|
||||||
|
mkdir build && \
|
||||||
|
pushd build && \
|
||||||
|
cmake .. -GNinja -DCMAKE_INSTALL_PREFIX=/opt/rmm -DCUDA_STATIC_RUNTIME=ON && \
|
||||||
|
cmake --build . --target install && \
|
||||||
|
popd && \
|
||||||
|
popd && \
|
||||||
|
rm -rf rmm
|
||||||
|
|
||||||
# Install gRPC
|
# Install gRPC
|
||||||
RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \
|
RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \
|
||||||
--recurse-submodules --depth 1 && \
|
--recurse-submodules --depth 1 && \
|
||||||
|
|||||||
@ -1,49 +0,0 @@
|
|||||||
ARG CUDA_VERSION_ARG
|
|
||||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu20.04
|
|
||||||
ARG CUDA_VERSION_ARG
|
|
||||||
ARG RAPIDS_VERSION_ARG
|
|
||||||
ARG NCCL_VERSION_ARG
|
|
||||||
|
|
||||||
# Environment
|
|
||||||
ENV DEBIAN_FRONTEND noninteractive
|
|
||||||
SHELL ["/bin/bash", "-c"] # Use Bash as shell
|
|
||||||
|
|
||||||
# Install all basic requirements
|
|
||||||
RUN \
|
|
||||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y wget unzip bzip2 libgomp1 build-essential ninja-build git && \
|
|
||||||
# Python
|
|
||||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
|
|
||||||
bash conda.sh -b -p /opt/mambaforge
|
|
||||||
|
|
||||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
|
||||||
RUN \
|
|
||||||
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
|
|
||||||
export NCCL_VERSION=$NCCL_VERSION_ARG && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}
|
|
||||||
|
|
||||||
ENV PATH=/opt/mambaforge/bin:$PATH
|
|
||||||
|
|
||||||
# Create new Conda environment with RMM
|
|
||||||
RUN \
|
|
||||||
conda install -c conda-forge mamba && \
|
|
||||||
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
|
||||||
python=3.10 rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG cmake && \
|
|
||||||
mamba clean --all
|
|
||||||
|
|
||||||
ENV GOSU_VERSION 1.10
|
|
||||||
|
|
||||||
# Install lightweight sudo (not bound to TTY)
|
|
||||||
RUN set -ex; \
|
|
||||||
wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
|
|
||||||
chmod +x /usr/local/bin/gosu && \
|
|
||||||
gosu nobody true
|
|
||||||
|
|
||||||
# Default entry-point to use if running locally
|
|
||||||
# It will preserve attributes of created files
|
|
||||||
COPY entrypoint.sh /scripts/
|
|
||||||
|
|
||||||
WORKDIR /workspace
|
|
||||||
ENTRYPOINT ["/scripts/entrypoint.sh"]
|
|
||||||
@ -26,7 +26,7 @@ set_property(TARGET test PROPERTY CUDA_ARCHITECTURES \${CMAKE_CUDA_ARCHITECTURES
|
|||||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
cmake . -GNinja
|
cmake . -GNinja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
|
||||||
gen_code=$(grep -o -- '--generate-code=\S*' compile_commands.json | paste -sd ' ')
|
gen_code=$(grep -o -- '--generate-code=\S*' compile_commands.json | paste -sd ' ')
|
||||||
|
|
||||||
nvprune ${gen_code} /usr/lib64/libnccl_static.a -o ../libnccl_static.a
|
nvprune ${gen_code} /usr/lib64/libnccl_static.a -o ../libnccl_static.a
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user