Use dlopen to load NCCL. (#9796)

This PR adds optional support for loading nccl with `dlopen` as an alternative of compile time linking. This is to address the size bloat issue with the PyPI binary release.
- Add CMake option to load `nccl` at runtime.
- Add an NCCL stub.

After this, `nccl` will be fetched from PyPI when using pip to install XGBoost, either by a user or by `pyproject.toml`. Others who want to link the nccl at compile time can continue to do so without any change.

At the moment, this is Linux only since we only support MNMG on Linux.
This commit is contained in:
Jiaming Yuan
2023-11-22 19:27:31 +08:00
committed by GitHub
parent fedd9674c8
commit 0715ab3c10
45 changed files with 658 additions and 268 deletions

View File

@@ -15,6 +15,8 @@ class BuildConfiguration: # pylint: disable=R0902
use_cuda: bool = False
# Whether to enable NCCL
use_nccl: bool = False
# Whether to load nccl dynamically
use_dlopen_nccl: bool = False
# Whether to enable HDFS
use_hdfs: bool = False
# Whether to enable Azure Storage

View File

@@ -29,7 +29,8 @@ classifiers = [
]
dependencies = [
"numpy",
"scipy"
"scipy",
"nvidia-nccl-cu12 ; platform_system == 'Linux' and platform_machine != 'aarch64'"
]
[project.urls]

View File

@@ -2,14 +2,15 @@
import ctypes
import json
import logging
import os
import pickle
from enum import IntEnum, unique
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional
import numpy as np
from ._typing import _T
from .core import _LIB, _check_call, c_str, from_pystr_to_cstr, py_str
from .core import _LIB, _check_call, build_info, c_str, from_pystr_to_cstr, py_str
LOGGER = logging.getLogger("[xgboost.collective]")
@@ -250,6 +251,31 @@ class CommunicatorContext:
def __init__(self, **args: Any) -> None:
self.args = args
key = "dmlc_nccl_path"
if args.get(key, None) is not None:
return
binfo = build_info()
if not binfo["USE_DLOPEN_NCCL"]:
return
try:
# PyPI package of NCCL.
from nvidia.nccl import lib
# There are two versions of nvidia-nccl, one is from PyPI, another one from
# nvidia-pyindex. We support only the first one as the second one is too old
# (2.9.8 as of writing).
if lib.__file__ is not None:
dirname: Optional[str] = os.path.dirname(lib.__file__)
else:
dirname = None
if dirname:
path = os.path.join(dirname, "libnccl.so.2")
self.args[key] = path
except ImportError:
pass
def __enter__(self) -> Dict[str, Any]:
init(**self.args)

View File

@@ -184,6 +184,13 @@ def _py_version() -> str:
return f.read().strip()
def _register_log_callback(lib: ctypes.CDLL) -> None:
lib.XGBGetLastError.restype = ctypes.c_char_p
lib.callback = _get_log_callback_func() # type: ignore
if lib.XGBRegisterLogCallback(lib.callback) != 0:
raise XGBoostError(lib.XGBGetLastError())
def _load_lib() -> ctypes.CDLL:
"""Load xgboost Library."""
lib_paths = find_lib_path()
@@ -228,10 +235,7 @@ Likely causes:
Error message(s): {os_error_list}
"""
)
lib.XGBGetLastError.restype = ctypes.c_char_p
lib.callback = _get_log_callback_func() # type: ignore
if lib.XGBRegisterLogCallback(lib.callback) != 0:
raise XGBoostError(lib.XGBGetLastError())
_register_log_callback(lib)
def parse(ver: str) -> Tuple[int, int, int]:
"""Avoid dependency on packaging (PEP 440)."""