diff --git a/src/common/common.cc b/src/common/common.cc
index fdada302d..c9899bc99 100644
--- a/src/common/common.cc
+++ b/src/common/common.cc
@@ -20,4 +20,11 @@ GlobalRandomEngine& GlobalRandom() {
   return RandomThreadLocalStore::Get()->engine;
 }
 }  // namespace common
+
+#if !defined(XGBOOST_USE_CUDA)
+int AllVisibleImpl::AllVisible() {
+  return 0;
+}
+#endif
+
 }  // namespace xgboost
diff --git a/src/common/common.cu b/src/common/common.cu
new file mode 100644
index 000000000..bffc69486
--- /dev/null
+++ b/src/common/common.cu
@@ -0,0 +1,18 @@
+/*!
+ * Copyright 2018 XGBoost contributors
+ */
+#include "common.h"
+
+namespace xgboost {
+
+int AllVisibleImpl::AllVisible() {
+  int n_visgpus = 0;
+  try {
+    dh::safe_cuda(cudaGetDeviceCount(&n_visgpus));
+  } catch(const std::exception& e) {
+    return 0;
+  }
+  return n_visgpus;
+}
+
+}  // namespace xgboost
diff --git a/src/common/common.h b/src/common/common.h
index 0ead15c3f..ead260000 100644
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -7,11 +7,39 @@
 #define XGBOOST_COMMON_COMMON_H_
 
 #include <xgboost/base.h>
+#include <xgboost/logging.h>
 
+#include <exception>
+#include <limits>
+#include <type_traits>
 #include <vector>
 #include <string>
 #include <sstream>
 
+#if defined(__CUDACC__)
+#include <thrust/system/cuda/error.h>
+#include <thrust/system_error.h>
+#endif
+
+namespace dh {
+#if defined(__CUDACC__)
+/*
+ * Error handling  functions
+ */
+#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
+
+inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
+                                       int line) {
+  if (code != cudaSuccess) {
+    throw thrust::system_error(code, thrust::cuda_category(),
+                               std::string{file} + "(" +  // NOLINT
+                               std::to_string(line) + ")");
+  }
+  return code;
+}
+#endif
+}  // namespace dh
+
 namespace xgboost {
 namespace common {
 /*!
@@ -103,5 +131,87 @@ class Range {
 };
 
 }  // namespace common
+struct AllVisibleImpl {
+  static int AllVisible();
+};
+/* \brief set of devices across which HostDeviceVector can be distributed.
+ *
+ * Currently implemented as a range, but can be changed later to something else,
+ *   e.g. a bitset
+ */
+class GPUSet {
+ public:
+  explicit GPUSet(int start = 0, int ndevices = 0)
+      : devices_(start, start + ndevices) {}
+
+  static GPUSet Empty() { return GPUSet(); }
+
+  static GPUSet Range(int start, int ndevices) {
+    return ndevices <= 0 ? Empty() : GPUSet{start, ndevices};
+  }
+  /*! \brief ndevices and num_rows both are upper bounds. */
+  static GPUSet All(int ndevices, int num_rows = std::numeric_limits<int>::max()) {
+    int n_devices_visible = AllVisible().Size();
+    if (ndevices < 0 || ndevices >  n_devices_visible) {
+      ndevices = n_devices_visible;
+    }
+    // fix-up device number to be limited by number of rows
+    ndevices = ndevices > num_rows ? num_rows : ndevices;
+    return Range(0, ndevices);
+  }
+  static GPUSet AllVisible() {
+    int n =  AllVisibleImpl::AllVisible();
+    return Range(0, n);
+  }
+  /*! \brief Ensure gpu_id is correct, so not dependent upon user knowing details */
+  static int GetDeviceIdx(int gpu_id) {
+    auto devices = AllVisible();
+    CHECK(!devices.IsEmpty()) << "Empty device.";
+    return (std::abs(gpu_id) + 0) % devices.Size();
+  }
+  /*! \brief Counting from gpu_id */
+  GPUSet Normalised(int gpu_id) const {
+    return Range(gpu_id, Size());
+  }
+  /*! \brief Counting from 0 */
+  GPUSet Unnormalised() const {
+    return Range(0, Size());
+  }
+
+  int Size() const {
+    int res = *devices_.end() - *devices_.begin();
+    return res < 0 ? 0 : res;
+  }
+  /*! \brief Get normalised device id. */
+  int operator[](int index) const {
+    CHECK(index >= 0 && index < Size());
+    return *devices_.begin() + index;
+  }
+
+  bool IsEmpty() const { return Size() == 0; }
+  /*! \brief Get un-normalised index. */
+  int Index(int device) const {
+    CHECK(Contains(device));
+    return device - *devices_.begin();
+  }
+
+  bool Contains(int device) const {
+    return *devices_.begin() <= device && device < *devices_.end();
+  }
+
+  common::Range::Iterator begin() const { return devices_.begin(); }  // NOLINT
+  common::Range::Iterator end() const { return devices_.end(); }      // NOLINT
+
+  friend bool operator==(const GPUSet& lhs, const GPUSet& rhs) {
+    return lhs.devices_ == rhs.devices_;
+  }
+  friend bool operator!=(const GPUSet& lhs, const GPUSet& rhs) {
+    return !(lhs == rhs);
+  }
+
+ private:
+  common::Range devices_;
+};
+
 }  // namespace xgboost
 #endif  // XGBOOST_COMMON_COMMON_H_
diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh
index 844edad59..c954fcac3 100644
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@@ -9,7 +9,6 @@
 #include <xgboost/logging.h>
 
 #include "common.h"
-#include "gpu_set.h"
 
 #include <algorithm>
 #include <chrono>
diff --git a/src/common/gpu_set.h b/src/common/gpu_set.h
deleted file mode 100644
index ed9b595a1..000000000
--- a/src/common/gpu_set.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*!
- * Copyright 2018 XGBoost contributors
- */
-#ifndef XGBOOST_COMMON_GPU_SET_H_
-#define XGBOOST_COMMON_GPU_SET_H_
-
-#include <xgboost/base.h>
-#include <xgboost/logging.h>
-
-#include <limits>
-#include <string>
-
-#include "common.h"
-#include "span.h"
-
-#if defined(__CUDACC__)
-#include <thrust/system/cuda/error.h>
-#include <thrust/system_error.h>
-#endif
-
-namespace dh {
-#if defined(__CUDACC__)
-/*
- * Error handling  functions
- */
-#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
-
-inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
-                                       int line) {
-  if (code != cudaSuccess) {
-    throw thrust::system_error(code, thrust::cuda_category(),
-                               std::string{file} + "(" +  // NOLINT
-                               std::to_string(line) + ")");
-  }
-  return code;
-}
-#endif
-}  // namespace dh
-
-namespace xgboost {
-
-/* \brief set of devices across which HostDeviceVector can be distributed.
- *
- * Currently implemented as a range, but can be changed later to something else,
- *   e.g. a bitset
- */
-class GPUSet {
- public:
-  explicit GPUSet(int start = 0, int ndevices = 0)
-      : devices_(start, start + ndevices) {}
-
-  static GPUSet Empty() { return GPUSet(); }
-
-  static GPUSet Range(int start, int ndevices) {
-    return ndevices <= 0 ? Empty() : GPUSet{start, ndevices};
-  }
-  /* \brief ndevices and num_rows both are upper bounds. */
-  static GPUSet All(int ndevices, int num_rows = std::numeric_limits<int>::max()) {
-    int n_devices_visible = AllVisible().Size();
-    ndevices = ndevices < 0 ? n_devices_visible : ndevices;
-    // fix-up device number to be limited by number of rows
-    ndevices = ndevices > num_rows ? num_rows : ndevices;
-    return Range(0, ndevices);
-  }
-
-  static GPUSet AllVisible() {
-    int n_visgpus = 0;
-#if defined(__CUDACC__)
-    dh::safe_cuda(cudaGetDeviceCount(&n_visgpus));
-#endif
-    return Range(0, n_visgpus);
-  }
-  /* \brief Ensure gpu_id is correct, so not dependent upon user knowing details */
-  static int GetDeviceIdx(int gpu_id) {
-    return (std::abs(gpu_id) + 0) % AllVisible().Size();
-  }
-  /* \brief Counting from gpu_id */
-  GPUSet Normalised(int gpu_id) const {
-    return Range(gpu_id, *devices_.end() + gpu_id);
-  }
-  /* \brief Counting from 0 */
-  GPUSet Unnormalised() const {
-    return Range(0, *devices_.end() - *devices_.begin());
-  }
-
-  int Size() const {
-    int res = *devices_.end() - *devices_.begin();
-    return res < 0 ? 0 : res;
-  }
-
-  int operator[](int index) const {
-    CHECK(index >= 0 && index < *(devices_.end()));
-    return *devices_.begin() + index;
-  }
-
-  bool IsEmpty() const { return Size() == 0; }  // NOLINT
-
-  int Index(int device) const {
-    CHECK(Contains(device));
-    return device - *devices_.begin();
-  }
-
-  bool Contains(int device) const {
-    return *devices_.begin() <= device && device < *devices_.end();
-  }
-
-  common::Range::Iterator begin() const { return devices_.begin(); }  // NOLINT
-  common::Range::Iterator end() const { return devices_.end(); }      // NOLINT
-
-  friend bool operator==(const GPUSet& lhs, const GPUSet& rhs) {
-    return lhs.devices_ == rhs.devices_;
-  }
-  friend bool operator!=(const GPUSet& lhs, const GPUSet& rhs) {
-    return !(lhs == rhs);
-  }
-
- private:
-  common::Range devices_;
-};
-}  // namespace xgboost
-
-#endif  // XGBOOST_COMMON_GPU_SET_H_
diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu
index 358e9ecdd..179b1ecbd 100644
--- a/src/common/host_device_vector.cu
+++ b/src/common/host_device_vector.cu
@@ -230,7 +230,7 @@ struct HostDeviceVectorImpl {
     CHECK(devices.Contains(device));
     LazySyncDevice(device, GPUAccess::kWrite);
     return {shards_[devices.Index(device)].data_.data().get(),
-            static_cast<typename common::Span<T>::index_type>(Size())};
+            static_cast<typename common::Span<T>::index_type>(DeviceSize(device))};
   }
 
   common::Span<const T> ConstDeviceSpan(int device) {
@@ -238,7 +238,7 @@ struct HostDeviceVectorImpl {
     CHECK(devices.Contains(device));
     LazySyncDevice(device, GPUAccess::kRead);
     return {shards_[devices.Index(device)].data_.data().get(),
-      static_cast<typename common::Span<const T>::index_type>(Size())};
+      static_cast<typename common::Span<const T>::index_type>(DeviceSize(device))};
   }
 
   size_t DeviceSize(int device) {
diff --git a/src/common/host_device_vector.h b/src/common/host_device_vector.h
index a3ef3082b..3b62bda1b 100644
--- a/src/common/host_device_vector.h
+++ b/src/common/host_device_vector.h
@@ -14,7 +14,7 @@
  * Initialization/Allocation:<br/>
  * One can choose to initialize the vector on CPU or GPU during constructor.
  * (use the 'devices' argument) Or, can choose to use the 'Resize' method to
- * allocate/resize memory explicitly, and use the 'Reshard' method 
+ * allocate/resize memory explicitly, and use the 'Reshard' method
  * to specify the devices.
  *
  * Accessing underlying data:<br/>
@@ -31,7 +31,7 @@
  * DevicePointer but data on CPU  --> this causes a cudaMemcpy to be issued internally.
  *                        subsequent calls to DevicePointer, will NOT incur this penalty.
  *                        (assuming 'HostVector' is not called in between)
- * DevicePointer and data on GPU  --> no problems, the device ptr 
+ * DevicePointer and data on GPU  --> no problems, the device ptr
  *                        will be returned immediately.
  *
  * What if xgboost is compiled without CUDA?<br/>
@@ -40,13 +40,13 @@
  *
  * Why not consider CUDA unified memory?<br/>
  * We did consider. However, it poses complications if we need to support both
- * compiling with and without CUDA toolkit. It was easier to have 
+ * compiling with and without CUDA toolkit. It was easier to have
  * 'HostDeviceVector' with a special-case implementation in host_device_vector.cc
  *
  * @note: Size and Devices methods are thread-safe.
- * DevicePointer, DeviceStart, DeviceSize, tbegin and tend methods are thread-safe 
+ * DevicePointer, DeviceStart, DeviceSize, tbegin and tend methods are thread-safe
  * if different threads call these methods with different values of the device argument.
- * All other methods are not thread safe. 
+ * All other methods are not thread safe.
  */
 
 #ifndef XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_
@@ -59,7 +59,7 @@
 #include <initializer_list>
 #include <vector>
 
-#include "gpu_set.h"
+#include "common.h"
 #include "span.h"
 
 // only include thrust-related files if host_device_vector.h
diff --git a/src/common/timer.h b/src/common/timer.h
index 22c0cf31a..4a7f3cf87 100644
--- a/src/common/timer.h
+++ b/src/common/timer.h
@@ -8,7 +8,7 @@
 #include <map>
 #include <string>
 
-#include "gpu_set.h"
+#include "common.h"
 
 namespace xgboost {
 namespace common {
diff --git a/src/linear/updater_gpu_coordinate.cu b/src/linear/updater_gpu_coordinate.cu
index fe1f5b5fc..a05ddcba2 100644
--- a/src/linear/updater_gpu_coordinate.cu
+++ b/src/linear/updater_gpu_coordinate.cu
@@ -6,7 +6,7 @@
 #include <thrust/execution_policy.h>
 #include <thrust/inner_product.h>
 #include <xgboost/linear_updater.h>
-#include "../common/gpu_set.h"
+#include "../common/common.h"
 #include "../common/device_helpers.cuh"
 #include "../common/timer.h"
 #include "coordinate_common.h"
diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu
index b59564f96..4c11643c6 100644
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -11,7 +11,7 @@
 #include <xgboost/tree_model.h>
 #include <xgboost/tree_updater.h>
 #include <memory>
-#include "../common/gpu_set.h"
+#include "../common/common.h"
 #include "../common/device_helpers.cuh"
 #include "../common/host_device_vector.h"
 
diff --git a/src/tree/updater_gpu.cu b/src/tree/updater_gpu.cu
index e92c3545e..05368a82b 100644
--- a/src/tree/updater_gpu.cu
+++ b/src/tree/updater_gpu.cu
@@ -4,7 +4,7 @@
 #include <xgboost/tree_updater.h>
 #include <utility>
 #include <vector>
-#include "../common/gpu_set.h"
+#include "../common/common.h"
 #include "param.h"
 #include "updater_gpu_common.cuh"
 
diff --git a/tests/cpp/common/test_gpuset.cc b/tests/cpp/common/test_common.cc
similarity index 81%
rename from tests/cpp/common/test_gpuset.cc
rename to tests/cpp/common/test_common.cc
index 3d74ba270..655e9a000 100644
--- a/tests/cpp/common/test_gpuset.cc
+++ b/tests/cpp/common/test_common.cc
@@ -1,8 +1,7 @@
-#include "../../../src/common/gpu_set.h"
+#include "../../../src/common/common.h"
 #include <gtest/gtest.h>
 
 namespace xgboost {
-
 TEST(GPUSet, Basic) {
   GPUSet devices = GPUSet::Empty();
   ASSERT_TRUE(devices.IsEmpty());
@@ -28,10 +27,16 @@ TEST(GPUSet, Basic) {
 
   devices = GPUSet::Range(2, 8);
   EXPECT_EQ(devices.Size(), 8);
+  EXPECT_ANY_THROW(devices[8]);
+  EXPECT_ANY_THROW(devices.Index(0));
+
   devices = devices.Unnormalised();
 
   EXPECT_EQ(*devices.begin(), 0);
   EXPECT_EQ(*devices.end(), devices.Size());
+#ifndef XGBOOST_USE_CUDA
+  EXPECT_EQ(GPUSet::AllVisible(), GPUSet::Empty());
+#endif
 }
-
 }  // namespace xgboost
+
diff --git a/tests/cpp/common/test_common.cu b/tests/cpp/common/test_common.cu
new file mode 100644
index 000000000..90ad56a14
--- /dev/null
+++ b/tests/cpp/common/test_common.cu
@@ -0,0 +1,44 @@
+#include "../../../src/common/common.h"
+#include <gtest/gtest.h>
+
+namespace xgboost {
+
+TEST(GPUSet, GPUBasic) {
+  GPUSet devices = GPUSet::Empty();
+  ASSERT_TRUE(devices.IsEmpty());
+
+  devices = GPUSet{0, 1};
+  ASSERT_TRUE(devices != GPUSet::Empty());
+  EXPECT_EQ(devices.Size(), 1);
+
+  EXPECT_ANY_THROW(devices.Index(1));
+  EXPECT_ANY_THROW(devices.Index(-1));
+
+  devices = GPUSet::Range(1, 0);
+  EXPECT_EQ(devices, GPUSet::Empty());
+  EXPECT_EQ(devices.Size(), 0);
+  EXPECT_TRUE(devices.IsEmpty());
+
+  EXPECT_FALSE(devices.Contains(1));
+
+  devices = GPUSet::Range(2, -1);
+  EXPECT_EQ(devices, GPUSet::Empty());
+  EXPECT_EQ(devices.Size(), 0);
+  EXPECT_TRUE(devices.IsEmpty());
+
+  devices = GPUSet::Range(2, 8);
+  EXPECT_EQ(devices.Size(), 8);
+  devices = devices.Unnormalised();
+
+  EXPECT_EQ(*devices.begin(), 0);
+  EXPECT_EQ(*devices.end(), devices.Size());
+  EXPECT_EQ(8, devices.Size());
+
+  ASSERT_NO_THROW(GPUSet::AllVisible());
+  devices = GPUSet::AllVisible();
+  if (devices.IsEmpty()) {
+    LOG(WARNING) << "Empty devices.";
+  }
+}
+
+}  // namespace xgboost