Deprecate single node multi-gpu mode (#4579)
* deprecate multi-gpu training * add single node * add warning
This commit is contained in:
parent
6757654337
commit
e94f85f0e4
@ -28,7 +28,7 @@ option(USE_NVTX "Build with cuda profiling annotations. Developers only." OFF)
|
|||||||
set(NVTX_HEADER_DIR "" CACHE PATH "Path to the stand-alone nvtx header")
|
set(NVTX_HEADER_DIR "" CACHE PATH "Path to the stand-alone nvtx header")
|
||||||
## CUDA
|
## CUDA
|
||||||
option(USE_CUDA "Build with GPU acceleration" OFF)
|
option(USE_CUDA "Build with GPU acceleration" OFF)
|
||||||
option(USE_NCCL "Build with NCCL to enable multi-GPU support." OFF)
|
option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF)
|
||||||
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
|
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
|
||||||
set(GPU_COMPUTE_VER "" CACHE STRING
|
set(GPU_COMPUTE_VER "" CACHE STRING
|
||||||
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
|
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
|
||||||
|
|||||||
@ -196,9 +196,9 @@ From the command line on Linux starting from the XGBoost directory:
|
|||||||
cmake .. -DUSE_CUDA=ON
|
cmake .. -DUSE_CUDA=ON
|
||||||
make -j4
|
make -j4
|
||||||
|
|
||||||
.. note:: Enabling multi-GPU training
|
.. note:: Enabling distributed GPU training
|
||||||
|
|
||||||
By default, multi-GPU training is disabled and only a single GPU will be used. To enable multi-GPU training, set the option ``USE_NCCL=ON``. Multi-GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **multi-GPU training is available only for Linux**.
|
By default, distributed GPU training is disabled and only a single GPU will be used. To enable distributed GPU training, set the option ``USE_NCCL=ON``. Distributed GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **distributed GPU training is available only for Linux**.
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
|
|||||||
@ -80,6 +80,8 @@ The GPU algorithms currently work with CLI, Python and R packages. See :doc:`/bu
|
|||||||
|
|
||||||
Single Node Multi-GPU
|
Single Node Multi-GPU
|
||||||
=====================
|
=====================
|
||||||
|
.. note:: Single node multi-GPU training is deprecated. Please use distributed GPU training with one process per GPU.
|
||||||
|
|
||||||
Multiple GPUs can be used with the ``gpu_hist`` tree method using the ``n_gpus`` parameter. which defaults to 1. If this is set to -1 all available GPUs will be used. If ``gpu_id`` is specified as non-zero, the selected gpu devices will be from ``gpu_id`` to ``gpu_id+n_gpus``, please note that ``gpu_id+n_gpus`` must be less than or equal to the number of available GPUs on your system. As with GPU vs. CPU, multi-GPU will not always be faster than a single GPU due to PCI bus bandwidth that can limit performance.
|
Multiple GPUs can be used with the ``gpu_hist`` tree method using the ``n_gpus`` parameter. which defaults to 1. If this is set to -1 all available GPUs will be used. If ``gpu_id`` is specified as non-zero, the selected gpu devices will be from ``gpu_id`` to ``gpu_id+n_gpus``, please note that ``gpu_id+n_gpus`` must be less than or equal to the number of available GPUs on your system. As with GPU vs. CPU, multi-GPU will not always be faster than a single GPU due to PCI bus bandwidth that can limit performance.
|
||||||
|
|
||||||
.. note:: Enabling multi-GPU training
|
.. note:: Enabling multi-GPU training
|
||||||
|
|||||||
@ -66,7 +66,9 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
|
|||||||
DMLC_DECLARE_FIELD(n_gpus)
|
DMLC_DECLARE_FIELD(n_gpus)
|
||||||
.set_default(0)
|
.set_default(0)
|
||||||
.set_lower_bound(-1)
|
.set_lower_bound(-1)
|
||||||
.describe("Number of GPUs to use for multi-gpu algorithms.");
|
.describe("Deprecated, please use distributed training with one "
|
||||||
|
"process per GPU. "
|
||||||
|
"Number of GPUs to use for multi-gpu algorithms.");
|
||||||
DMLC_DECLARE_FIELD(booster)
|
DMLC_DECLARE_FIELD(booster)
|
||||||
.set_default("gbtree")
|
.set_default("gbtree")
|
||||||
.describe("Gradient booster used for training.");
|
.describe("Gradient booster used for training.");
|
||||||
|
|||||||
@ -200,8 +200,14 @@ class LearnerImpl : public Learner {
|
|||||||
<< " Internal Error: Always call InitModel or Load before any evaluation.";
|
<< " Internal Error: Always call InitModel or Load before any evaluation.";
|
||||||
this->ValidateDMatrix(dmat);
|
this->ValidateDMatrix(dmat);
|
||||||
CHECK(this->gbm_) << " Internal: GBM is not set";
|
CHECK(this->gbm_) << " Internal: GBM is not set";
|
||||||
if (this->gbm_->UseGPU() && cfg_.find("n_gpus") == cfg_.cend()) {
|
if (this->gbm_->UseGPU()) {
|
||||||
tparam_.n_gpus = 1;
|
if (cfg_.find("n_gpus") == cfg_.cend()) {
|
||||||
|
tparam_.n_gpus = 1;
|
||||||
|
}
|
||||||
|
if (tparam_.n_gpus != 1) {
|
||||||
|
LOG(WARNING) << "Multi-GPU training is deprecated. "
|
||||||
|
"Please use distributed GPU training with one process per GPU.";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user