diff --git a/plugin/updater_gpu/README.md b/plugin/updater_gpu/README.md
index 1f061e034..142b61459 100644
--- a/plugin/updater_gpu/README.md
+++ b/plugin/updater_gpu/README.md
@@ -21,7 +21,7 @@ n_gpus | &#10006; | &#10004; |
 
 The device ordinal can be selected using the 'gpu_id' parameter, which defaults to 0.
 
-Multiple GPUs can be used with the grow_gpu_hist parameter using the n_gpus parameter, which defaults to -1 (indicating use all visible GPUs).  If gpu_id is specified as non-zero, the gpu device order is mod(gpu_id + i) % n_visible_devices for i=0 to n_gpus-1.  As with GPU vs. CPU, multi-GPU will not always be faster than a single GPU due to PCI bus bandwidth that can limit performance.  For example, when n_features * n_bins * 2^depth divided by time of each round/iteration becomes comparable to the real PCI 16x bus bandwidth of order 4GB/s to 10GB/s, then AllReduce will dominant code speed and multiple GPUs become ineffective at increasing performance.  Also, CPU overhead between GPU calls can limit usefulness of multiple GPUs.
+Multiple GPUs can be used with the grow_gpu_hist parameter using the n_gpus parameter. which defaults to 1. If this is set to -1 all available GPUs will be used.  If gpu_id is specified as non-zero, the gpu device order is mod(gpu_id + i) % n_visible_devices for i=0 to n_gpus-1.  As with GPU vs. CPU, multi-GPU will not always be faster than a single GPU due to PCI bus bandwidth that can limit performance.  For example, when n_features * n_bins * 2^depth divided by time of each round/iteration becomes comparable to the real PCI 16x bus bandwidth of order 4GB/s to 10GB/s, then AllReduce will dominant code speed and multiple GPUs become ineffective at increasing performance.  Also, CPU overhead between GPU calls can limit usefulness of multiple GPUs.
 
 This plugin currently works with the CLI version and python version.
 
diff --git a/plugin/updater_gpu/src/gpu_hist_builder.cuh b/plugin/updater_gpu/src/gpu_hist_builder.cuh
index d553b9ce0..115d94e54 100644
--- a/plugin/updater_gpu/src/gpu_hist_builder.cuh
+++ b/plugin/updater_gpu/src/gpu_hist_builder.cuh
@@ -121,8 +121,8 @@ class GPUHistBuilder {
   std::vector<dh::dvec<float>> fidx_min_map;
   std::vector<dh::dvec<int>> feature_segments;
   std::vector<dh::dvec<bst_float>> prediction_cache;
-  std::vector<dh::dvec<NodeIdT>> position;
-  std::vector<dh::dvec<NodeIdT>> position_tmp;
+  std::vector<dh::dvec<int>> position;
+  std::vector<dh::dvec<int>> position_tmp;
   std::vector<DeviceGMat> device_matrix;
   std::vector<dh::dvec<gpu_gpair>> device_gpair;
   std::vector<dh::dvec<int>> gidx_feature_map;
diff --git a/src/tree/param.h b/src/tree/param.h
index a09e2b59f..283440307 100644
--- a/src/tree/param.h
+++ b/src/tree/param.h
@@ -196,7 +196,7 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
         .describe("gpu to use for single gpu algorithms");
     DMLC_DECLARE_FIELD(n_gpus)
         .set_lower_bound(-1)
-        .set_default(-1)
+        .set_default(1)
         .describe("Number of GPUs to use for multi-gpu algorithms: -1=use all GPUs");
     // add alias of parameters
     DMLC_DECLARE_ALIAS(reg_lambda, lambda);