This PR adds optional support for loading nccl with `dlopen` as an alternative of compile time linking. This is to address the size bloat issue with the PyPI binary release. - Add CMake option to load `nccl` at runtime. - Add an NCCL stub. After this, `nccl` will be fetched from PyPI when using pip to install XGBoost, either by a user or by `pyproject.toml`. Others who want to link the nccl at compile time can continue to do so without any change. At the moment, this is Linux only since we only support MNMG on Linux.
27 lines
874 B
Plaintext
27 lines
874 B
Plaintext
/**
|
|
* Copyright 2023, XGBoost Contributors
|
|
*/
|
|
#pragma once
|
|
|
|
#include <memory> // for shared_ptr
|
|
|
|
#include "../../src/collective/coll.h" // for Coll
|
|
#include "../../src/common/device_helpers.cuh" // for CUDAStreamView
|
|
#include "federated_comm.h" // for FederatedComm
|
|
#include "xgboost/context.h" // for Context
|
|
#include "xgboost/logging.h"
|
|
|
|
namespace xgboost::collective {
|
|
class CUDAFederatedComm : public FederatedComm {
|
|
dh::CUDAStreamView stream_;
|
|
|
|
public:
|
|
explicit CUDAFederatedComm(Context const* ctx, std::shared_ptr<FederatedComm const> impl);
|
|
[[nodiscard]] auto Stream() const { return stream_; }
|
|
Comm* MakeCUDAVar(Context const*, std::shared_ptr<Coll>) const override {
|
|
LOG(FATAL) << "[Internal Error]: Invalid request for CUDA variant.";
|
|
return nullptr;
|
|
}
|
|
};
|
|
} // namespace xgboost::collective
|