[GPU-Plugin] Make node_idx type 32 bit for hist algo. Set default n_gpus to 1. (#2445)
This commit is contained in:
parent
169c983b5f
commit
0e48f87529
@ -21,7 +21,7 @@ n_gpus | ✖ | ✔ |
|
|||||||
|
|
||||||
The device ordinal can be selected using the 'gpu_id' parameter, which defaults to 0.
|
The device ordinal can be selected using the 'gpu_id' parameter, which defaults to 0.
|
||||||
|
|
||||||
Multiple GPUs can be used with the grow_gpu_hist parameter using the n_gpus parameter, which defaults to -1 (indicating use all visible GPUs). If gpu_id is specified as non-zero, the gpu device order is mod(gpu_id + i) % n_visible_devices for i=0 to n_gpus-1. As with GPU vs. CPU, multi-GPU will not always be faster than a single GPU due to PCI bus bandwidth that can limit performance. For example, when n_features * n_bins * 2^depth divided by time of each round/iteration becomes comparable to the real PCI 16x bus bandwidth of order 4GB/s to 10GB/s, then AllReduce will dominant code speed and multiple GPUs become ineffective at increasing performance. Also, CPU overhead between GPU calls can limit usefulness of multiple GPUs.
|
Multiple GPUs can be used with the grow_gpu_hist parameter using the n_gpus parameter. which defaults to 1. If this is set to -1 all available GPUs will be used. If gpu_id is specified as non-zero, the gpu device order is mod(gpu_id + i) % n_visible_devices for i=0 to n_gpus-1. As with GPU vs. CPU, multi-GPU will not always be faster than a single GPU due to PCI bus bandwidth that can limit performance. For example, when n_features * n_bins * 2^depth divided by time of each round/iteration becomes comparable to the real PCI 16x bus bandwidth of order 4GB/s to 10GB/s, then AllReduce will dominant code speed and multiple GPUs become ineffective at increasing performance. Also, CPU overhead between GPU calls can limit usefulness of multiple GPUs.
|
||||||
|
|
||||||
This plugin currently works with the CLI version and python version.
|
This plugin currently works with the CLI version and python version.
|
||||||
|
|
||||||
|
|||||||
@ -121,8 +121,8 @@ class GPUHistBuilder {
|
|||||||
std::vector<dh::dvec<float>> fidx_min_map;
|
std::vector<dh::dvec<float>> fidx_min_map;
|
||||||
std::vector<dh::dvec<int>> feature_segments;
|
std::vector<dh::dvec<int>> feature_segments;
|
||||||
std::vector<dh::dvec<bst_float>> prediction_cache;
|
std::vector<dh::dvec<bst_float>> prediction_cache;
|
||||||
std::vector<dh::dvec<NodeIdT>> position;
|
std::vector<dh::dvec<int>> position;
|
||||||
std::vector<dh::dvec<NodeIdT>> position_tmp;
|
std::vector<dh::dvec<int>> position_tmp;
|
||||||
std::vector<DeviceGMat> device_matrix;
|
std::vector<DeviceGMat> device_matrix;
|
||||||
std::vector<dh::dvec<gpu_gpair>> device_gpair;
|
std::vector<dh::dvec<gpu_gpair>> device_gpair;
|
||||||
std::vector<dh::dvec<int>> gidx_feature_map;
|
std::vector<dh::dvec<int>> gidx_feature_map;
|
||||||
|
|||||||
@ -196,7 +196,7 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
|
|||||||
.describe("gpu to use for single gpu algorithms");
|
.describe("gpu to use for single gpu algorithms");
|
||||||
DMLC_DECLARE_FIELD(n_gpus)
|
DMLC_DECLARE_FIELD(n_gpus)
|
||||||
.set_lower_bound(-1)
|
.set_lower_bound(-1)
|
||||||
.set_default(-1)
|
.set_default(1)
|
||||||
.describe("Number of GPUs to use for multi-gpu algorithms: -1=use all GPUs");
|
.describe("Number of GPUs to use for multi-gpu algorithms: -1=use all GPUs");
|
||||||
// add alias of parameters
|
// add alias of parameters
|
||||||
DMLC_DECLARE_ALIAS(reg_lambda, lambda);
|
DMLC_DECLARE_ALIAS(reg_lambda, lambda);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user