Use dlopen to load NCCL. (#9796)
This PR adds optional support for loading nccl with `dlopen` as an alternative of compile time linking. This is to address the size bloat issue with the PyPI binary release. - Add CMake option to load `nccl` at runtime. - Add an NCCL stub. After this, `nccl` will be fetched from PyPI when using pip to install XGBoost, either by a user or by `pyproject.toml`. Others who want to link the nccl at compile time can continue to do so without any change. At the moment, this is Linux only since we only support MNMG on Linux.
This commit is contained in:
@@ -21,11 +21,18 @@ command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||
|
||||
echo "--- Build libxgboost from the source"
|
||||
$command_wrapper tests/ci_build/prune_libnccl.sh
|
||||
$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH="/opt/grpc;/opt/rmm" \
|
||||
-DUSE_CUDA=ON -DUSE_NCCL=ON -DUSE_OPENMP=ON -DHIDE_CXX_SYMBOLS=ON -DPLUGIN_FEDERATED=ON \
|
||||
-DPLUGIN_RMM=ON -DUSE_NCCL_LIB_PATH=ON -DNCCL_INCLUDE_DIR=/usr/include \
|
||||
-DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag}
|
||||
$command_wrapper tests/ci_build/build_via_cmake.sh \
|
||||
-DCMAKE_PREFIX_PATH="/opt/grpc;/opt/rmm" \
|
||||
-DUSE_CUDA=ON \
|
||||
-DUSE_OPENMP=ON \
|
||||
-DHIDE_CXX_SYMBOLS=ON \
|
||||
-DPLUGIN_FEDERATED=ON \
|
||||
-DPLUGIN_RMM=ON \
|
||||
-DUSE_NCCL=ON \
|
||||
-DUSE_NCCL_LIB_PATH=ON \
|
||||
-DNCCL_INCLUDE_DIR=/usr/include \
|
||||
-DUSE_DLOPEN_NCCL=ON \
|
||||
${arch_flag}
|
||||
echo "--- Build binary wheel"
|
||||
$command_wrapper bash -c \
|
||||
"cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/"
|
||||
|
||||
@@ -21,11 +21,17 @@ command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||
|
||||
echo "--- Build libxgboost from the source"
|
||||
$command_wrapper tests/ci_build/prune_libnccl.sh
|
||||
$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH="/opt/grpc" \
|
||||
-DUSE_CUDA=ON -DUSE_NCCL=ON -DUSE_OPENMP=ON -DHIDE_CXX_SYMBOLS=ON -DPLUGIN_FEDERATED=ON \
|
||||
-DUSE_NCCL_LIB_PATH=ON -DNCCL_INCLUDE_DIR=/usr/include \
|
||||
-DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag}
|
||||
$command_wrapper tests/ci_build/build_via_cmake.sh \
|
||||
-DCMAKE_PREFIX_PATH="/opt/grpc" \
|
||||
-DUSE_CUDA=ON \
|
||||
-DUSE_OPENMP=ON \
|
||||
-DHIDE_CXX_SYMBOLS=ON \
|
||||
-DPLUGIN_FEDERATED=ON \
|
||||
-DUSE_NCCL=ON \
|
||||
-DUSE_NCCL_LIB_PATH=ON \
|
||||
-DNCCL_INCLUDE_DIR=/usr/include \
|
||||
-DUSE_DLOPEN_NCCL=ON \
|
||||
${arch_flag}
|
||||
echo "--- Build binary wheel"
|
||||
$command_wrapper bash -c \
|
||||
"cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/"
|
||||
|
||||
@@ -10,6 +10,7 @@ chmod +x build/testxgboost
|
||||
tests/ci_build/ci_build.sh gpu nvidia-docker \
|
||||
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
||||
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
|
||||
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
|
||||
build/testxgboost
|
||||
|
||||
echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
|
||||
|
||||
@@ -13,4 +13,5 @@ chmod +x build/testxgboost
|
||||
tests/ci_build/ci_build.sh gpu nvidia-docker \
|
||||
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
||||
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
|
||||
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
|
||||
build/testxgboost --gtest_filter=*MGPU*
|
||||
|
||||
@@ -24,7 +24,8 @@ export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
|
||||
|
||||
command_wrapper="tests/ci_build/ci_build.sh gpu nvidia-docker --build-arg "`
|
||||
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "`
|
||||
`"NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
|
||||
# Run specified test suite
|
||||
case "$suite" in
|
||||
|
||||
Reference in New Issue
Block a user