From 2c502784fffeb4aa35734d70ede47c98a8639fc2 Mon Sep 17 00:00:00 2001
From: trivialfis <ybbs.daans@hotmail.com>
Date: Tue, 14 Aug 2018 13:58:11 +0800
Subject: [PATCH] Span class. (#3548)

* Add basic Span class based on ISO++20.

* Use Span<Entry const> instead of Inst in SparsePage.

* Add DeviceSpan in HostDeviceVector, use it in regression obj.
---
 include/xgboost/data.h                      |  33 +-
 include/xgboost/tree_model.h                |   4 +-
 src/c_api/c_api.cc                          |   8 +-
 src/common/hist_util.cc                     |  14 +-
 src/common/host_device_vector.cc            |   5 +
 src/common/host_device_vector.cu            |  12 +
 src/common/host_device_vector.h             |   3 +
 src/common/span.h                           | 633 ++++++++++++++++++++
 src/data/simple_dmatrix.cc                  |  14 +-
 src/data/simple_dmatrix.h                   |   2 +-
 src/gbm/gblinear.cc                         |  12 +-
 src/linear/coordinate_common.h              |  10 +-
 src/linear/updater_gpu_coordinate.cu        |   8 +-
 src/linear/updater_shotgun.cc               |  12 +-
 src/objective/regression_obj_gpu.cu         |  16 +-
 src/tree/updater_basemaker-inl.h            |  14 +-
 src/tree/updater_colmaker.cc                |  20 +-
 src/tree/updater_gpu.cu                     |   2 +-
 src/tree/updater_histmaker.cc               |  64 +-
 src/tree/updater_skmaker.cc                 |  26 +-
 tests/cpp/c_api/test_c_api.cc               |   4 +-
 tests/cpp/common/test_host_device_vector.cu |  22 +
 tests/cpp/common/test_span.cc               | 423 +++++++++++++
 tests/cpp/common/test_span.cu               | 357 +++++++++++
 tests/cpp/common/test_span.h                | 339 +++++++++++
 tests/cpp/data/test_simple_csr_source.cc    |   2 +-
 tests/cpp/data/test_simple_dmatrix.cc       |   4 +-
 tests/cpp/data/test_sparse_page_dmatrix.cc  |   2 +-
 28 files changed, 1927 insertions(+), 138 deletions(-)
 create mode 100644 src/common/span.h
 create mode 100644 tests/cpp/common/test_host_device_vector.cu
 create mode 100644 tests/cpp/common/test_span.cc
 create mode 100644 tests/cpp/common/test_span.cu
 create mode 100644 tests/cpp/common/test_span.h
diff --git a/include/xgboost/data.h b/include/xgboost/data.h
index 467572fa7..36e872ef1 100644
--- a/include/xgboost/data.h
+++ b/include/xgboost/data.h
@@ -15,6 +15,7 @@
 #include <string>
 #include <vector>
 #include "./base.h"
+#include "../../src/common/span.h"
 
 namespace xgboost {
 // forward declare learner.
@@ -133,7 +134,7 @@ struct Entry {
   /*!
    * \brief constructor with index and value
    * \param index The feature or row index.
-   * \param fvalue THe feature value.
+   * \param fvalue The feature value.
    */
   Entry(bst_uint index, bst_float fvalue) : index(index), fvalue(fvalue) {}
   /*! \brief reversely compare feature values */
@@ -155,24 +156,14 @@ class SparsePage {
   std::vector<Entry> data;
 
   size_t base_rowid;
+
   /*! \brief an instance of sparse vector in the batch */
-  struct Inst {
-    /*! \brief pointer to the elements*/
-    const Entry *data{nullptr};
-    /*! \brief length of the instance */
-    bst_uint length{0};
-    /*! \brief constructor */
-    Inst()  = default;
-    Inst(const Entry *data, bst_uint length) : data(data), length(length) {}
-    /*! \brief get i-th pair in the sparse vector*/
-    inline const Entry& operator[](size_t i) const {
-      return data[i];
-    }
-  };
+  using Inst = common::Span<Entry const>;
 
   /*! \brief get i-th row from the batch */
   inline Inst operator[](size_t i) const {
-    return {data.data() + offset[i], static_cast<bst_uint>(offset[i + 1] - offset[i])};
+    return {data.data() + offset[i],
+            static_cast<Inst::index_type>(offset[i + 1] - offset[i])};
   }
 
   /*! \brief constructor */
@@ -234,12 +225,12 @@ class SparsePage {
    *  \param inst an instance row
    */
   inline void Push(const Inst &inst) {
-    offset.push_back(offset.back() + inst.length);
+    offset.push_back(offset.back() + inst.size());
     size_t begin = data.size();
-    data.resize(begin + inst.length);
-    if (inst.length != 0) {
-      std::memcpy(dmlc::BeginPtr(data) + begin, inst.data,
-                  sizeof(Entry) * inst.length);
+    data.resize(begin + inst.size());
+    if (inst.size() != 0) {
+      std::memcpy(dmlc::BeginPtr(data) + begin, inst.data(),
+                  sizeof(Entry) * inst.size());
     }
   }
 
@@ -328,7 +319,7 @@ class DMatrix {
    * \brief check if column access is supported, if not, initialize column access.
    * \param max_row_perbatch auxiliary information, maximum row used in each column batch.
    *         this is a hint information that can be ignored by the implementation.
-   * \param sorted If column features should be in sorted order           
+   * \param sorted If column features should be in sorted order
    * \return Number of column blocks in the column access.
    */
   virtual void InitColAccess(size_t max_row_perbatch, bool sorted) = 0;
diff --git a/include/xgboost/tree_model.h b/include/xgboost/tree_model.h
index 1736a0a10..8729b74d3 100644
--- a/include/xgboost/tree_model.h
+++ b/include/xgboost/tree_model.h
@@ -574,14 +574,14 @@ inline void RegTree::FVec::Init(size_t size) {
 }
 
 inline void RegTree::FVec::Fill(const SparsePage::Inst& inst) {
-  for (bst_uint i = 0; i < inst.length; ++i) {
+  for (bst_uint i = 0; i < inst.size(); ++i) {
     if (inst[i].index >= data_.size()) continue;
     data_[inst[i].index].fvalue = inst[i].fvalue;
   }
 }
 
 inline void RegTree::FVec::Drop(const SparsePage::Inst& inst) {
-  for (bst_uint i = 0; i < inst.length; ++i) {
+  for (bst_uint i = 0; i < inst.size(); ++i) {
     if (inst[i].index >= data_.size()) continue;
     data_[inst[i].index].flag = -1;
   }
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index c64bed42e..506f52b41 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -687,10 +687,10 @@ XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle,
     const int ridx = idxset[i];
     auto inst = batch[ridx];
     CHECK_LT(static_cast<xgboost::bst_ulong>(ridx), batch.Size());
-    ret.page_.data.insert(ret.page_.data.end(), inst.data,
-                         inst.data + inst.length);
-    ret.page_.offset.push_back(ret.page_.offset.back() + inst.length);
-    ret.info.num_nonzero_ += inst.length;
+    ret.page_.data.insert(ret.page_.data.end(), inst.data(),
+                          inst.data() + inst.size());
+    ret.page_.offset.push_back(ret.page_.offset.back() + inst.size());
+    ret.info.num_nonzero_ += inst.size();
 
     if (src.info.labels_.size() != 0) {
       ret.info.labels_.push_back(src.info.labels_[ridx]);
diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc
index b32ba349c..c0bea5980 100644
--- a/src/common/hist_util.cc
+++ b/src/common/hist_util.cc
@@ -48,9 +48,9 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
         for (size_t i = 0; i < batch.Size(); ++i) { // NOLINT(*)
           size_t ridx = batch.base_rowid + i;
           SparsePage::Inst inst = batch[i];
-          for (bst_uint j = 0; j < inst.length; ++j) {
-            if (inst[j].index >= begin && inst[j].index < end) {
-              sketchs[inst[j].index].Push(inst[j].fvalue, info.GetWeight(ridx));
+          for (auto& ins : inst) {
+            if (ins.index >= begin && ins.index < end) {
+              sketchs[ins.index].Push(ins.fvalue, info.GetWeight(ridx));
             }
           }
         }
@@ -140,7 +140,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) {
      auto &batch = iter->Value();
     const size_t rbegin = row_ptr.size() - 1;
     for (size_t i = 0; i < batch.Size(); ++i) {
-      row_ptr.push_back(batch[i].length + row_ptr.back());
+      row_ptr.push_back(batch[i].size() + row_ptr.back());
     }
     index.resize(row_ptr.back());
 
@@ -154,9 +154,11 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) {
       size_t ibegin = row_ptr[rbegin + i];
       size_t iend = row_ptr[rbegin + i + 1];
       SparsePage::Inst inst = batch[i];
-      CHECK_EQ(ibegin + inst.length, iend);
-      for (bst_uint j = 0; j < inst.length; ++j) {
+
+      CHECK_EQ(ibegin + inst.size(), iend);
+      for (bst_uint j = 0; j < inst.size(); ++j) {
         uint32_t idx = cut.GetBinIdx(inst[j]);
+
         index[ibegin + j] = idx;
         ++hit_count_tloc_[tid * nbins + idx];
       }
diff --git a/src/common/host_device_vector.cc b/src/common/host_device_vector.cc
index e0263de04..f30196b1b 100644
--- a/src/common/host_device_vector.cc
+++ b/src/common/host_device_vector.cc
@@ -53,6 +53,11 @@ GPUSet HostDeviceVector<T>::Devices() const { return GPUSet::Empty(); }
 template <typename T>
 T* HostDeviceVector<T>::DevicePointer(int device) { return nullptr; }
 
+template <typename T>
+common::Span<T> HostDeviceVector<T>::DeviceSpan(int device) {
+  return common::Span<T>();
+}
+
 template <typename T>
 std::vector<T>& HostDeviceVector<T>::HostVector() { return impl_->data_h_; }
 
diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu
index a474be7a0..e0d7dbb85 100644
--- a/src/common/host_device_vector.cu
+++ b/src/common/host_device_vector.cu
@@ -156,6 +156,13 @@ struct HostDeviceVectorImpl {
     return shards_[devices_.Index(device)].data_.data().get();
   }
 
+  common::Span<T> DeviceSpan(int device) {
+    CHECK(devices_.Contains(device));
+    LazySyncDevice(device);
+    return { shards_[devices_.Index(device)].data_.data().get(),
+             static_cast<typename common::Span<T>::index_type>(Size()) };
+  }
+
   size_t DeviceSize(int device) {
     CHECK(devices_.Contains(device));
     LazySyncDevice(device);
@@ -323,6 +330,11 @@ GPUSet HostDeviceVector<T>::Devices() const { return impl_->Devices(); }
 template <typename T>
 T* HostDeviceVector<T>::DevicePointer(int device) { return impl_->DevicePointer(device); }
 
+template <typename T>
+common::Span<T> HostDeviceVector<T>::DeviceSpan(int device) {
+  return impl_->DeviceSpan(device);
+}
+
 template <typename T>
 size_t HostDeviceVector<T>::DeviceStart(int device) { return impl_->DeviceStart(device); }
 
diff --git a/src/common/host_device_vector.h b/src/common/host_device_vector.h
index 5d9762511..ebd54e849 100644
--- a/src/common/host_device_vector.h
+++ b/src/common/host_device_vector.h
@@ -11,6 +11,8 @@
 #include <initializer_list>
 #include <vector>
 
+#include "span.h"
+
 // only include thrust-related files if host_device_vector.h
 // is included from a .cu file
 #ifdef __CUDACC__
@@ -117,6 +119,7 @@ class HostDeviceVector {
   size_t Size() const;
   GPUSet Devices() const;
   T* DevicePointer(int device);
+  common::Span<T> DeviceSpan(int device);
 
   T* HostPointer() { return HostVector().data(); }
   size_t DeviceStart(int device);
diff --git a/src/common/span.h b/src/common/span.h
new file mode 100644
index 000000000..173cb095c
--- /dev/null
+++ b/src/common/span.h
@@ -0,0 +1,633 @@
+/*!
+ * Copyright 2018 XGBoost contributors
+ * \brief span class based on ISO++20 span
+ *
+ * About NOLINTs in this file:
+ *
+ *   If we want Span to work with std interface, like range for loop, the
+ *   naming must be consistant with std, not XGBoost. Also, the interface also
+ *   conflicts with XGBoost coding style, specifically, the use of `explicit'
+ *   keyword.
+ *
+ *
+ * Some of the code is copied from Guidelines Support Library, here is the
+ * license:
+ *
+ * Copyright (c) 2015 Microsoft Corporation. All rights reserved.
+ *
+ * This code is licensed under the MIT License (MIT).
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef XGBOOST_COMMON_SPAN_H_
+#define XGBOOST_COMMON_SPAN_H_
+
+#include <xgboost/logging.h>  // CHECK
+
+#include <cinttypes>    // int64_t
+#include <type_traits>  // remove_cv_t
+
+/*!
+ * The version number 1910 is picked up from GSL.
+ *
+ * We might want to use MOODYCAMEL_NOEXCEPT from dmlc/concurrentqueue.h. But
+ * there are a lot more definitions in that file would cause warnings/troubles
+ * in MSVC 2013. Currently we try to keep the closure of Span as minimal as
+ * possible.
+ *
+ * There are other workarounds for MSVC, like _Unwrapped, _Verify_range ...
+ * Some of these are hiden magics of MSVC and I tried to avoid them. Should any
+ * of them become needed, please consult the source code of GSL, and possibily
+ * some explanations from this thread:
+ *
+ *   https://github.com/Microsoft/GSL/pull/664
+ *
+ * FIXME: Group these MSVC workarounds into a manageable place.
+ */
+#if defined(_MSC_VER) && _MSC_VER < 1910
+
+#define __span_noexcept
+
+#pragma push_macro("constexpr")
+#define constexpr /*constexpr*/
+
+#else
+
+#define __span_noexcept noexcept
+
+#endif
+
+namespace xgboost {
+namespace common {
+
+// Usual logging facility is not available inside device code.
+// FIXME: Make dmlc check more generic.
+#define KERNEL_CHECK(cond)                                      \
+  do {                                                          \
+    if (!(cond)) {                                              \
+      printf("\nKernel error:\n"                                \
+             "In: %s, \tline: %d\n"                             \
+             "\t%s\n\tExpecting: %s\n",                         \
+             __FILE__, __LINE__, __PRETTY_FUNCTION__, # cond);  \
+      asm("trap;");                                             \
+    }                                                           \
+  } while (0);                                                  \
+
+#ifdef __CUDA_ARCH__
+#define SPAN_CHECK KERNEL_CHECK
+#else
+#define SPAN_CHECK CHECK  // check from dmlc
+#endif
+
+namespace detail {
+/*!
+ * By default, XGBoost uses uint32_t for indexing data. int64_t covers all
+ *   values uint32_t can represent. Also, On x86-64 Linux, GCC uses long int to
+ *   represent ptrdiff_t, which is just int64_t. So we make it determinstic
+ *   here.
+ */
+using ptrdiff_t = int64_t;  // NOLINT
+}  // namespace detail
+
+#if defined(_MSC_VER) && _MSC_VER < 1910
+constexpr const detail::ptrdiff_t dynamic_extent = -1;  // NOLINT
+#else
+constexpr detail::ptrdiff_t dynamic_extent = -1;  // NOLINT
+#endif
+
+enum class byte : unsigned char {};  // NOLINT
+
+namespace detail {
+
+template <class ElementType, detail::ptrdiff_t Extent = dynamic_extent>
+class Span;
+
+template <typename SpanType, bool IsConst>
+class SpanIterator {
+  using ElementType = typename SpanType::element_type;
+
+ public:
+  using iterator_category = std::random_access_iterator_tag;      // NOLINT
+  using value_type = typename std::remove_cv<ElementType>::type;  // NOLINT
+  using difference_type = typename SpanType::index_type;          // NOLINT
+
+  using reference = typename std::conditional<                    // NOLINT
+    IsConst, const ElementType, ElementType>::type&;
+  using pointer = typename std::add_pointer<reference>::type&;    // NOLINT
+
+  XGBOOST_DEVICE constexpr SpanIterator() : span_{nullptr}, index_{0} {}
+
+  XGBOOST_DEVICE constexpr SpanIterator(
+      const SpanType* _span,
+      typename SpanType::index_type _idx) __span_noexcept :
+                                           span_(_span), index_(_idx) {}
+
+  friend SpanIterator<SpanType, true>;
+  template <bool B, typename std::enable_if<!B && IsConst>::type* = nullptr>
+  XGBOOST_DEVICE constexpr SpanIterator(                         // NOLINT
+      const SpanIterator<SpanType, B>& other_) __span_noexcept
+      : SpanIterator(other_.span_, other_.index_) {}
+
+  XGBOOST_DEVICE reference operator*() const {
+    SPAN_CHECK(index_ < span_->size());
+    return *(span_->data() + index_);
+  }
+
+  XGBOOST_DEVICE pointer operator->() const {
+    SPAN_CHECK(index_ != span_->size());
+    return  span_->data() + index_;
+  }
+
+  XGBOOST_DEVICE SpanIterator& operator++() {
+    SPAN_CHECK(0 <= index_ && index_ != span_->size());
+    index_++;
+    return *this;
+  }
+
+  XGBOOST_DEVICE SpanIterator operator++(int) {
+    auto ret = *this;
+    ++(*this);
+    return ret;
+  }
+
+  XGBOOST_DEVICE SpanIterator& operator--() {
+    SPAN_CHECK(index_ != 0 && index_ <= span_->size());
+    index_--;
+    return *this;
+  }
+
+  XGBOOST_DEVICE SpanIterator operator--(int) {
+    auto ret = *this;
+    --(*this);
+    return ret;
+  }
+
+  XGBOOST_DEVICE SpanIterator operator+(difference_type n) const {
+    auto ret = *this;
+    return ret += n;
+  }
+
+  XGBOOST_DEVICE SpanIterator& operator+=(difference_type n) {
+    SPAN_CHECK((index_ + n) >= 0 && (index_ + n) <= span_->size());
+    index_ += n;
+    return *this;
+  }
+
+  XGBOOST_DEVICE difference_type operator-(SpanIterator rhs) const {
+    SPAN_CHECK(span_ == rhs.span_);
+    return index_ - rhs.index_;
+  }
+
+  XGBOOST_DEVICE SpanIterator operator-(difference_type n) const {
+    auto ret = *this;
+    return ret -= n;
+  }
+
+  XGBOOST_DEVICE SpanIterator& operator-=(difference_type n) {
+    return *this += -n;
+  }
+
+  // friends
+  XGBOOST_DEVICE constexpr friend bool operator==(
+      SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {
+    return _lhs.span_ == _rhs.span_ && _lhs.index_ == _rhs.index_;
+  }
+
+  XGBOOST_DEVICE constexpr friend bool operator!=(
+      SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {
+    return !(_lhs == _rhs);
+  }
+
+  XGBOOST_DEVICE constexpr friend bool operator<(
+      SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {
+    return _lhs.index_ < _rhs.index_;
+  }
+
+  XGBOOST_DEVICE constexpr friend bool operator<=(
+      SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {
+    return !(_rhs < _lhs);
+  }
+
+  XGBOOST_DEVICE constexpr friend bool operator>(
+      SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {
+    return _rhs < _lhs;
+  }
+
+  XGBOOST_DEVICE constexpr friend bool operator>=(
+      SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {
+    return !(_rhs > _lhs);
+  }
+
+ protected:
+  const SpanType *span_;
+  detail::ptrdiff_t index_;
+};
+
+
+// It's tempting to use constexpr instead of structs to do the following meta
+// programming. But remember that we are supporting MSVC 2013 here.
+
+/*!
+ * The extent E of the span returned by subspan is determined as follows:
+ *
+ *   - If Count is not std::dynamic_extent, Count;
+ *   - Otherwise, if Extent is not std::dynamic_extent, Extent - Offset;
+ *   - Otherwise, std::dynamic_extent.
+ */
+template <detail::ptrdiff_t Extent,
+          detail::ptrdiff_t Offset,
+          detail::ptrdiff_t Count>
+struct ExtentValue : public std::integral_constant<
+  detail::ptrdiff_t, Count != dynamic_extent ?
+  Count : (Extent != dynamic_extent ? Extent - Offset : Extent)> {};
+
+/*!
+ * If N is dynamic_extent, the extent of the returned span E is also
+ * dynamic_extent; otherwise it is detail::ptrdiff_t(sizeof(T)) * N.
+ */
+template <typename T, detail::ptrdiff_t Extent>
+struct ExtentAsBytesValue : public std::integral_constant<
+  detail::ptrdiff_t,
+  Extent == dynamic_extent ?
+  Extent : static_cast<detail::ptrdiff_t>(sizeof(T) * Extent)> {};
+
+template <detail::ptrdiff_t From, detail::ptrdiff_t To>
+struct IsAllowedExtentConversion : public std::integral_constant<
+  bool, From == To || From == dynamic_extent || To == dynamic_extent> {};
+
+template <class From, class To>
+struct IsAllowedElementTypeConversion : public std::integral_constant<
+  bool, std::is_convertible<From(*)[], To(*)[]>::value> {};
+
+template <class T>
+struct IsSpanOracle : std::false_type {};
+
+template <class T, std::ptrdiff_t Extent>
+struct IsSpanOracle<Span<T, Extent>> : std::true_type {};
+
+template <class T>
+struct IsSpan : public IsSpanOracle<typename std::remove_cv<T>::type> {};
+
+// Re-implement std algorithms here to adopt CUDA.
+template <typename T>
+struct Less {
+  XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const {
+    return _x < _y;
+  }
+};
+
+template <typename T>
+struct Greater {
+  XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const {
+    return _x > _y;
+  }
+};
+
+template <class InputIt1, class InputIt2,
+          class Compare =
+          detail::Less<decltype(std::declval<InputIt1>().operator*())>>
+XGBOOST_DEVICE bool LexicographicalCompare(InputIt1 first1, InputIt1 last1,
+                                            InputIt2 first2, InputIt2 last2) {
+  Compare comp;
+  for (; first1 != last1 && first2 != last2; ++first1, ++first2) {
+    if (comp(*first1, *first2)) {
+      return true;
+    }
+    if (comp(*first2, *first1)) {
+      return false;
+    }
+  }
+  return first1 == last1 && first2 != last2;
+}
+
+}  // namespace detail
+
+
+/*!
+ * \brief span class implementation, based on ISO++20 span<T>. The interface
+ *      should be the same.
+ *
+ * What's different from span<T> in Guidelines Support Library (GSL)
+ *
+ *    Interface might be slightly different, we stick with ISO.
+ *
+ *    GSL uses C++14/17 features, which are not available here.
+ *    GSL uses constexpr extensively, which is not possibile with limitation
+ *      of C++11.
+ *    GSL doesn't concern about CUDA.
+ *
+ *    GSL is more thoroughly implemented and tested.
+ *    GSL is more optimized, especially for static extent.
+ *
+ *    GSL uses __buildin_unreachable() when error, Span<T> uses dmlc LOG and
+ *      customized CUDA logging.
+ *
+ *
+ * What's different from span<T> in ISO++20 (ISO)
+ *
+ *    ISO uses functions/structs from std library, which might be not available
+ *      in CUDA.
+ *    Initializing from std::array is not supported.
+ *
+ *    ISO uses constexpr extensively, which is not possibile with limitation
+ *      of C++11.
+ *    ISO uses C++14/17 features, which is not available here.
+ *    ISO doesn't concern about CUDA.
+ *
+ *    ISO uses std::terminate(), Span<T> uses dmlc LOG and customized CUDA
+ *      logging.
+ *
+ *
+ * Limitations:
+ *    With thrust:
+ *       It's not adviced to initialize Span with host_vector directly, since
+ *         host_vector::data() is a host function.
+ *       It's not possible to initialize Span with device_vector directly, since
+ *         device_vector::data() returns a wrapped pointer.
+ *       It's unclear that what kind of thrust algorithm can be used without
+ *         memory error. See the test case "GPUSpan.WithTrust"
+ *
+ *    Pass iterator to kernel:
+ *       Not possible. Use subspan instead.
+ *
+ *       The underlying Span in SpanIterator is a pointer, but CUDA pass kernel
+ *       parameter by value.  If we were to hold a Span value instead of a
+ *       pointer, the following snippet will crash, violating the safety
+ *       purpose of Span:
+ *
+ *       \code{.cpp}
+ *       Span<float> span {arr_a};
+ *       auto beg = span.begin();
+ *
+ *       Span<float> span_b = arr_b;
+ *       span = span_b;
+ *
+ *       delete arr_a;
+ *       beg++;                 // crash
+ *       \endcode
+ *
+ *       While hoding a pointer or reference should avoid the problem, its a
+ *       compromise. Since we have subspan, it's acceptable not to support
+ *       passing iterator.
+ */
+template <typename T,
+          detail::ptrdiff_t Extent = dynamic_extent>
+class Span {
+ public:
+  using element_type = T;                               // NOLINT
+  using value_type = typename std::remove_cv<T>::type;  // NOLINT
+  using index_type = detail::ptrdiff_t;                 // NOLINT
+  using difference_type = detail::ptrdiff_t;            // NOLINT
+  using pointer = T*;                                   // NOLINT
+  using reference = T&;                                 // NOLINT
+
+  using iterator = detail::SpanIterator<Span<T, Extent>, false>;             // NOLINT
+  using const_iterator = const detail::SpanIterator<Span<T, Extent>, true>;  // NOLINT
+  using reverse_iterator = detail::SpanIterator<Span<T, Extent>, false>;     // NOLINT
+  using const_reverse_iterator = const detail::SpanIterator<Span<T, Extent>, true>;  // NOLINT
+
+  // constructors
+
+  XGBOOST_DEVICE constexpr Span() __span_noexcept : size_(0), data_(nullptr) {}
+
+  XGBOOST_DEVICE Span(pointer _ptr, index_type _count) :
+      size_(_count), data_(_ptr) {
+    SPAN_CHECK(_count >= 0);
+    SPAN_CHECK(_ptr || _count == 0);
+  }
+
+  XGBOOST_DEVICE Span(pointer _first, pointer _last) :
+      size_(_last - _first), data_(_first) {
+    SPAN_CHECK(size_ >= 0);
+    SPAN_CHECK(data_ || size_ == 0);
+  }
+
+  template <std::size_t N>
+  XGBOOST_DEVICE constexpr Span(element_type (&arr)[N])  // NOLINT
+      __span_noexcept : size_(N), data_(&arr[0]) {}
+
+  template <class Container,
+            class = typename std::enable_if<
+              !std::is_const<element_type>::value && !detail::IsSpan<Container>::value &&
+              std::is_convertible<typename Container::pointer,
+                                  pointer>::value &&
+              std::is_convertible<
+                typename Container::pointer,
+                decltype(std::declval<Container>().data())>::value>>
+  XGBOOST_DEVICE Span(Container& _cont) :  // NOLINT
+      size_(_cont.size()), data_(_cont.data()) {}
+
+  template <class Container,
+            class = typename std::enable_if<
+              std::is_const<element_type>::value && !detail::IsSpan<Container>::value &&
+              std::is_convertible<typename Container::pointer, pointer>::value &&
+              std::is_convertible<
+                typename Container::pointer,
+                decltype(std::declval<Container>().data())>::value>>
+  XGBOOST_DEVICE Span(const Container& _cont) : size_(_cont.size()),  // NOLINT
+                                                data_(_cont.data()) {}
+
+  template <class U, ptrdiff_t OtherExtent,
+            class = typename std::enable_if<
+              detail::IsAllowedElementTypeConversion<U, T>::value &&
+              detail::IsAllowedExtentConversion<OtherExtent, Extent>::value>>
+  XGBOOST_DEVICE constexpr Span(const Span<U, OtherExtent>& _other)   // NOLINT
+      __span_noexcept : size_(_other.size()), data_(_other.data()) {}
+
+  XGBOOST_DEVICE constexpr Span(const Span& _other)
+      __span_noexcept : size_(_other.size()), data_(_other.data()) {}
+
+  XGBOOST_DEVICE Span& operator=(const Span& _other) __span_noexcept {
+    size_ = _other.size();
+    data_ = _other.data();
+    return *this;
+  }
+
+  XGBOOST_DEVICE ~Span() __span_noexcept {};  // NOLINT
+
+  XGBOOST_DEVICE constexpr iterator begin() const __span_noexcept {  // NOLINT
+    return {this, 0};
+  }
+
+  XGBOOST_DEVICE constexpr iterator end() const __span_noexcept {   // NOLINT
+    return {this, size()};
+  }
+
+  XGBOOST_DEVICE constexpr const_iterator cbegin() const __span_noexcept {  // NOLINT
+    return {this, 0};
+  }
+
+  XGBOOST_DEVICE constexpr const_iterator cend() const __span_noexcept {   // NOLINT
+    return {this, size()};
+  }
+
+  XGBOOST_DEVICE constexpr reverse_iterator rbegin() const __span_noexcept {  // NOLINT
+    return reverse_iterator{end()};
+  }
+
+  XGBOOST_DEVICE constexpr reverse_iterator rend() const __span_noexcept {    // NOLINT
+    return reverse_iterator{begin()};
+  }
+
+  XGBOOST_DEVICE constexpr const_reverse_iterator crbegin() const __span_noexcept {  // NOLINT
+    return const_reverse_iterator{cend()};
+  }
+
+  XGBOOST_DEVICE constexpr const_reverse_iterator crend() const __span_noexcept {    // NOLINT
+    return const_reverse_iterator{cbegin()};
+  }
+
+  XGBOOST_DEVICE reference operator[](index_type _idx) const {
+    SPAN_CHECK(_idx >= 0 && _idx < size());
+    return data()[_idx];
+  }
+
+  XGBOOST_DEVICE constexpr reference operator()(index_type _idx) const {
+    return this->operator[](_idx);
+  }
+
+  XGBOOST_DEVICE constexpr pointer data() const __span_noexcept {   // NOLINT
+    return data_;
+  }
+
+  // Observers
+  XGBOOST_DEVICE constexpr index_type size() const __span_noexcept {  // NOLINT
+    return size_;
+  }
+  XGBOOST_DEVICE constexpr index_type size_bytes() const __span_noexcept {  // NOLINT
+    return size() * sizeof(T);
+  }
+
+  XGBOOST_DEVICE constexpr bool empty() const __span_noexcept {  // NOLINT
+    return size() == 0;
+  }
+
+  // Subviews
+  template <detail::ptrdiff_t Count >
+  XGBOOST_DEVICE Span<element_type, Count> first() const {  // NOLINT
+    SPAN_CHECK(Count >= 0 && Count <= size());
+    return {data(), Count};
+  }
+
+  XGBOOST_DEVICE Span<element_type, dynamic_extent> first(  // NOLINT
+      detail::ptrdiff_t _count) const {
+    SPAN_CHECK(_count >= 0 && _count <= size());
+    return {data(), _count};
+  }
+
+  template <detail::ptrdiff_t Count >
+  XGBOOST_DEVICE Span<element_type, Count> last() const {  // NOLINT
+    SPAN_CHECK(Count >=0 && size() - Count >= 0);
+    return {data() + size() - Count, Count};
+  }
+
+  XGBOOST_DEVICE Span<element_type, dynamic_extent> last(  // NOLINT
+      detail::ptrdiff_t _count) const {
+    SPAN_CHECK(_count >= 0 && _count <= size());
+    return subspan(size() - _count, _count);
+  }
+
+  /*!
+   * If Count is std::dynamic_extent, r.size() == this->size() - Offset;
+   * Otherwise r.size() == Count.
+   */
+  template < detail::ptrdiff_t Offset,
+             detail::ptrdiff_t Count = dynamic_extent >
+  XGBOOST_DEVICE auto subspan() const ->                   // NOLINT
+      Span<element_type,
+           detail::ExtentValue<Extent, Offset, Count>::value> {
+    SPAN_CHECK(Offset >= 0 && Offset < size());
+    SPAN_CHECK(Count == dynamic_extent ||
+               Count >= 0 && Offset + Count <= size());
+
+    return {data() + Offset, Count == dynamic_extent ? size() - Offset : Count};
+  }
+
+  XGBOOST_DEVICE Span<element_type, dynamic_extent> subspan(  // NOLINT
+      detail::ptrdiff_t _offset,
+      detail::ptrdiff_t _count = dynamic_extent) const {
+    SPAN_CHECK(_offset >= 0 && _offset < size());
+    SPAN_CHECK(_count == dynamic_extent ||
+               _count >= 0 && _offset + _count <= size());
+
+    return {data() + _offset, _count ==
+            dynamic_extent ? size() - _offset : _count};
+  }
+
+ private:
+  index_type size_;
+  pointer data_;
+};
+
+template <class T, detail::ptrdiff_t X, class U, detail::ptrdiff_t Y>
+XGBOOST_DEVICE bool operator==(Span<T, X> l, Span<U, Y> r) {
+  if (l.size() != r.size()) {
+    return false;
+  }
+  for (auto l_beg = l.cbegin(), r_beg = r.cbegin(); l_beg != l.cend();
+       ++l_beg, ++r_beg) {
+    if (*l_beg != *r_beg) {
+      return false;
+    }
+  }
+  return true;
+}
+
+template <class T, detail::ptrdiff_t X, class U, detail::ptrdiff_t Y>
+XGBOOST_DEVICE constexpr bool operator!=(Span<T, X> l, Span<U, Y> r) {
+  return !(l == r);
+}
+
+template <class T, detail::ptrdiff_t X, class U, detail::ptrdiff_t Y>
+XGBOOST_DEVICE constexpr bool operator<(Span<T, X> l, Span<U, Y> r) {
+  return detail::LexicographicalCompare(l.begin(), l.end(),
+                                         r.begin(), r.end());
+}
+
+template <class T, detail::ptrdiff_t X, class U, detail::ptrdiff_t Y>
+XGBOOST_DEVICE constexpr bool operator<=(Span<T, X> l, Span<U, Y> r) {
+  return !(l > r);
+}
+
+template <class T, detail::ptrdiff_t X, class U, detail::ptrdiff_t Y>
+XGBOOST_DEVICE constexpr bool operator>(Span<T, X> l, Span<U, Y> r) {
+  return detail::LexicographicalCompare<
+    typename Span<T, X>::iterator, typename Span<U, Y>::iterator,
+    detail::Greater<typename Span<T, X>::element_type>>(l.begin(), l.end(),
+                                                        r.begin(), r.end());
+}
+
+template <class T, detail::ptrdiff_t X, class U, detail::ptrdiff_t Y>
+XGBOOST_DEVICE constexpr bool operator>=(Span<T, X> l, Span<U, Y> r) {
+  return !(l < r);
+}
+
+template <class T, detail::ptrdiff_t E>
+XGBOOST_DEVICE auto as_bytes(Span<T, E> s) __span_noexcept ->           // NOLINT
+    Span<const byte, detail::ExtentAsBytesValue<T, E>::value> {
+  return {reinterpret_cast<const byte*>(s.data()), s.size_bytes()};
+}
+
+template <class T, detail::ptrdiff_t E>
+XGBOOST_DEVICE auto as_writable_bytes(Span<T, E> s) __span_noexcept ->  // NOLINT
+    Span<byte, detail::ExtentAsBytesValue<T, E>::value> {
+  return {reinterpret_cast<byte*>(s.data()), s.size_bytes()};
+}
+
+}  // namespace common
+}  // namespace xgboost
+
+#if defined(_MSC_VER) &&_MSC_VER < 1910
+#undef constexpr
+#pragma pop_macro("constexpr")
+#undef __span_noexcept
+#endif  // _MSC_VER < 1910
+
+#endif  // XGBOOST_COMMON_SPAN_H_
diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc
index 59cc32da3..c14faf0ce 100644
--- a/src/data/simple_dmatrix.cc
+++ b/src/data/simple_dmatrix.cc
@@ -58,8 +58,8 @@ void SimpleDMatrix::MakeOneBatch(SparsePage* pcol, bool sorted) {
     for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
       int tid = omp_get_thread_num();
       auto inst = batch[i];
-      for (bst_uint j = 0; j < inst.length; ++j) {
-        builder.AddBudget(inst[j].index, tid);
+      for (auto& ins : inst) {
+        builder.AddBudget(ins.index, tid);
       }
     }
   }
@@ -72,11 +72,11 @@ void SimpleDMatrix::MakeOneBatch(SparsePage* pcol, bool sorted) {
     for (long i = 0; i < static_cast<long>(batch.Size()); ++i) { // NOLINT(*)
       int tid = omp_get_thread_num();
       auto inst = batch[i];
-      for (bst_uint j = 0; j < inst.length; ++j) {
-        builder.Push(
-            inst[j].index,
-            Entry(static_cast<bst_uint>(batch.base_rowid + i), inst[j].fvalue),
-            tid);
+      for (auto& ins : inst) {
+        builder.Push(ins.index,
+                     Entry(static_cast<bst_uint>(batch.base_rowid + i),
+                           ins.fvalue),
+                     tid);
       }
     }
   }
diff --git a/src/data/simple_dmatrix.h b/src/data/simple_dmatrix.h
index b0d49fe52..c1d1babdd 100644
--- a/src/data/simple_dmatrix.h
+++ b/src/data/simple_dmatrix.h
@@ -45,7 +45,7 @@ class SimpleDMatrix : public DMatrix {
 
   size_t GetColSize(size_t cidx) const override {
     auto& batch = *col_iter_.column_page_;
-    return batch[cidx].length;
+    return batch[cidx].size();
   }
 
   float GetColDensity(size_t cidx) const override {
diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc
index 7f6d424ac..6a432c057 100644
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -166,9 +166,9 @@ class GBLinear : public GradientBooster {
         for (int gid = 0; gid < ngroup; ++gid) {
           bst_float *p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns];
           // calculate linear terms' contributions
-          for (bst_uint c = 0; c < inst.length; ++c) {
-            if (inst[c].index >= model_.param.num_feature) continue;
-            p_contribs[inst[c].index] = inst[c].fvalue * model_[inst[c].index][gid];
+          for (auto& ins : inst) {
+            if (ins.index >= model_.param.num_feature) continue;
+            p_contribs[ins.index] = ins.fvalue * model_[ins.index][gid];
           }
           // add base margin to BIAS
           p_contribs[ncolumns - 1] = model_.bias()[gid] +
@@ -268,9 +268,9 @@ class GBLinear : public GradientBooster {
   inline void Pred(const SparsePage::Inst &inst, bst_float *preds, int gid,
                    bst_float base) {
     bst_float psum = model_.bias()[gid] + base;
-    for (bst_uint i = 0; i < inst.length; ++i) {
-      if (inst[i].index >= model_.param.num_feature) continue;
-      psum += inst[i].fvalue * model_[inst[i].index][gid];
+    for (const auto& ins : inst) {
+      if (ins.index >= model_.param.num_feature) continue;
+      psum += ins.fvalue * model_[ins.index][gid];
     }
     preds[gid] = psum;
   }
diff --git a/src/linear/coordinate_common.h b/src/linear/coordinate_common.h
index 18d87fbf1..72b0c9802 100644
--- a/src/linear/coordinate_common.h
+++ b/src/linear/coordinate_common.h
@@ -69,7 +69,7 @@ inline std::pair<double, double> GetGradient(int group_idx, int num_group, int f
   while (iter->Next()) {
     auto &batch = iter->Value();
     auto col = batch[fidx];
-    const auto ndata = static_cast<bst_omp_uint>(col.length);
+    const auto ndata = static_cast<bst_omp_uint>(col.size());
     for (bst_omp_uint j = 0; j < ndata; ++j) {
       const bst_float v = col[j].fvalue;
       auto &p = gpair[col[j].index * num_group + group_idx];
@@ -100,7 +100,7 @@ inline std::pair<double, double> GetGradientParallel(int group_idx, int num_grou
   while (iter->Next()) {
     auto &batch = iter->Value();
     auto col = batch[fidx];
-    const auto ndata = static_cast<bst_omp_uint>(col.length);
+    const auto ndata = static_cast<bst_omp_uint>(col.size());
 #pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
     for (bst_omp_uint j = 0; j < ndata; ++j) {
       const bst_float v = col[j].fvalue;
@@ -159,7 +159,7 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
     auto &batch = iter->Value();
     auto col = batch[fidx];
     // update grad value
-    const auto num_row = static_cast<bst_omp_uint>(col.length);
+    const auto num_row = static_cast<bst_omp_uint>(col.size());
 #pragma omp parallel for schedule(static)
     for (bst_omp_uint j = 0; j < num_row; ++j) {
       GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx];
@@ -331,7 +331,7 @@ class GreedyFeatureSelector : public FeatureSelector {
       #pragma omp parallel for schedule(static)
       for (bst_omp_uint i = 0; i < nfeat; ++i) {
         const auto col = batch[i];
-        const bst_uint ndata = col.length;
+        const bst_uint ndata = col.size();
         auto &sums = gpair_sums_[group_idx * nfeat + i];
         for (bst_uint j = 0u; j < ndata; ++j) {
           const bst_float v = col[j].fvalue;
@@ -399,7 +399,7 @@ class ThriftyFeatureSelector : public FeatureSelector {
       #pragma omp parallel for schedule(static)
       for (bst_omp_uint i = 0; i < nfeat; ++i) {
         const auto col = batch[i];
-        const bst_uint ndata = col.length;
+        const bst_uint ndata = col.size();
         for (bst_uint gid = 0u; gid < ngroup; ++gid) {
           auto &sums = gpair_sums_[gid * nfeat + i];
           for (bst_uint j = 0u; j < ndata; ++j) {
diff --git a/src/linear/updater_gpu_coordinate.cu b/src/linear/updater_gpu_coordinate.cu
index cf4e47c61..ca1536cd1 100644
--- a/src/linear/updater_gpu_coordinate.cu
+++ b/src/linear/updater_gpu_coordinate.cu
@@ -118,13 +118,13 @@ class DeviceShard {
         return e1.index < e2.index;
       };
       auto column_begin =
-          std::lower_bound(col.data, col.data + col.length,
+          std::lower_bound(col.data(), col.data() + col.size(),
                            Entry(row_begin, 0.0f), cmp);
       auto column_end =
-          std::upper_bound(col.data, col.data + col.length,
+          std::upper_bound(col.data(), col.data() + col.size(),
                            Entry(row_end, 0.0f), cmp);
       column_segments.push_back(
-          std::make_pair(column_begin - col.data, column_end - col.data));
+          std::make_pair(column_begin - col.data(), column_end - col.data()));
       row_ptr_.push_back(row_ptr_.back() + column_end - column_begin);
     }
     ba_.Allocate(device_idx, param.silent, &data_, row_ptr_.back(), &gpair_,
@@ -134,7 +134,7 @@ class DeviceShard {
       auto col = batch[fidx];
       auto seg = column_segments[fidx];
       dh::safe_cuda(cudaMemcpy(
-          data_.Data() + row_ptr_[fidx], col.data + seg.first,
+          data_.Data() + row_ptr_[fidx], col.data() + seg.first,
           sizeof(Entry) * (seg.second - seg.first), cudaMemcpyHostToDevice));
     }
     // Rescale indices with respect to current shard
diff --git a/src/linear/updater_shotgun.cc b/src/linear/updater_shotgun.cc
index 11b91cbce..fc666cfa1 100644
--- a/src/linear/updater_shotgun.cc
+++ b/src/linear/updater_shotgun.cc
@@ -92,10 +92,10 @@ class ShotgunUpdater : public LinearUpdater {
         auto col = batch[ii];
         for (int gid = 0; gid < ngroup; ++gid) {
           double sum_grad = 0.0, sum_hess = 0.0;
-          for (bst_uint j = 0; j < col.length; ++j) {
-            GradientPair &p = gpair[col[j].index * ngroup + gid];
+          for (auto& c : col) {
+            GradientPair &p = gpair[c.index * ngroup + gid];
             if (p.GetHess() < 0.0f) continue;
-            const bst_float v = col[j].fvalue;
+            const bst_float v = c.fvalue;
             sum_grad += p.GetGrad() * v;
             sum_hess += p.GetHess() * v * v;
           }
@@ -107,10 +107,10 @@ class ShotgunUpdater : public LinearUpdater {
           if (dw == 0.f) continue;
           w += dw;
           // update grad values
-          for (bst_uint j = 0; j < col.length; ++j) {
-            GradientPair &p = gpair[col[j].index * ngroup + gid];
+          for (auto& c : col) {
+            GradientPair &p = gpair[c.index * ngroup + gid];
             if (p.GetHess() < 0.0f) continue;
-            p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
+            p += GradientPair(p.GetHess() * c.fvalue * dw, 0);
           }
         }
       }
diff --git a/src/objective/regression_obj_gpu.cu b/src/objective/regression_obj_gpu.cu
index 5d7c21ebd..d4ac49bfe 100644
--- a/src/objective/regression_obj_gpu.cu
+++ b/src/objective/regression_obj_gpu.cu
@@ -12,6 +12,7 @@
 #include <memory>
 #include <vector>
 
+#include "../common/span.h"
 #include "../common/device_helpers.cuh"
 #include "../common/host_device_vector.h"
 #include "./regression_loss.h"
@@ -44,8 +45,8 @@ struct GPURegLossParam : public dmlc::Parameter<GPURegLossParam> {
 // GPU kernel for gradient computation
 template<typename Loss>
 __global__ void get_gradient_k
-(GradientPair *__restrict__ out_gpair,  unsigned int *__restrict__ label_correct,
- const float * __restrict__ preds, const float * __restrict__ labels,
+(common::Span<GradientPair> out_gpair,  common::Span<unsigned int> label_correct,
+ common::Span<const float> preds, common::Span<const float> labels,
  const float * __restrict__ weights, int n, float scale_pos_weight) {
   int i = threadIdx.x + blockIdx.x * blockDim.x;
   if (i >= n)
@@ -56,14 +57,14 @@ __global__ void get_gradient_k
   if (label == 1.0f)
     w *= scale_pos_weight;
   if (!Loss::CheckLabel(label))
-    atomicAnd(label_correct, 0);
+    atomicAnd(label_correct.data(), 0);
   out_gpair[i] = GradientPair
     (Loss::FirstOrderGradient(p, label) * w, Loss::SecondOrderGradient(p, label) * w);
 }
 
 // GPU kernel for predicate transformation
 template<typename Loss>
-__global__ void pred_transform_k(float * __restrict__ preds, int n) {
+__global__ void pred_transform_k(common::Span<float> preds, int n) {
   int i = threadIdx.x + blockIdx.x * blockDim.x;
   if (i >= n)
     return;
@@ -144,8 +145,8 @@ class GPURegLossObj : public ObjFunction {
       size_t n = preds->DeviceSize(d);
       if (n > 0) {
         get_gradient_k<Loss><<<dh::DivRoundUp(n, block), block>>>
-          (out_gpair->DevicePointer(d), label_correct_.DevicePointer(d),
-           preds->DevicePointer(d), labels_.DevicePointer(d),
+          (out_gpair->DeviceSpan(d), label_correct_.DeviceSpan(d),
+           preds->DeviceSpan(d), labels_.DeviceSpan(d),
            info.weights_.size() > 0 ? weights_.DevicePointer(d) : nullptr,
            n, param_.scale_pos_weight);
         dh::safe_cuda(cudaGetLastError());
@@ -180,7 +181,8 @@ class GPURegLossObj : public ObjFunction {
       const int block = 256;
       size_t n = preds->DeviceSize(d);
       if (n > 0) {
-        pred_transform_k<Loss><<<dh::DivRoundUp(n, block), block>>>(preds->DevicePointer(d), n);
+        pred_transform_k<Loss><<<dh::DivRoundUp(n, block), block>>>(
+            preds->DeviceSpan(d), n);
         dh::safe_cuda(cudaGetLastError());
       }
       dh::safe_cuda(cudaDeviceSynchronize());
diff --git a/src/tree/updater_basemaker-inl.h b/src/tree/updater_basemaker-inl.h
index 06ca474ba..55ff2c774 100644
--- a/src/tree/updater_basemaker-inl.h
+++ b/src/tree/updater_basemaker-inl.h
@@ -49,9 +49,9 @@ class BaseMaker: public TreeUpdater {
         auto &batch = iter->Value();
         for (bst_uint fid = 0; fid < batch.Size(); ++fid) {
            auto c = batch[fid];
-          if (c.length != 0) {
+          if (c.size() != 0) {
             fminmax_[fid * 2 + 0] = std::max(-c[0].fvalue, fminmax_[fid * 2 + 0]);
-            fminmax_[fid * 2 + 1] = std::max(c[c.length - 1].fvalue, fminmax_[fid * 2 + 1]);
+            fminmax_[fid * 2 + 1] = std::max(c[c.size() - 1].fvalue, fminmax_[fid * 2 + 1]);
           }
         }
       }
@@ -106,9 +106,9 @@ class BaseMaker: public TreeUpdater {
   inline static int NextLevel(const SparsePage::Inst &inst, const RegTree &tree, int nid) {
     const RegTree::Node &n = tree[nid];
     bst_uint findex = n.SplitIndex();
-    for (unsigned i = 0; i < inst.length; ++i) {
-      if (findex == inst[i].index) {
-        if (inst[i].fvalue < n.SplitCond()) {
+    for (const auto& ins : inst) {
+      if (findex == ins.index) {
+        if (ins.fvalue < n.SplitCond()) {
           return n.LeftChild();
         } else {
           return n.RightChild();
@@ -250,7 +250,7 @@ class BaseMaker: public TreeUpdater {
       auto it = std::lower_bound(sorted_split_set.begin(), sorted_split_set.end(), fid);
 
       if (it != sorted_split_set.end() && *it == fid) {
-        const auto ndata = static_cast<bst_omp_uint>(col.length);
+        const auto ndata = static_cast<bst_omp_uint>(col.size());
         #pragma omp parallel for schedule(static)
         for (bst_omp_uint j = 0; j < ndata; ++j) {
           const bst_uint ridx = col[j].index;
@@ -308,7 +308,7 @@ class BaseMaker: public TreeUpdater {
       auto &batch = iter->Value();
       for (auto fid : fsplits) {
         auto col = batch[fid];
-        const auto ndata = static_cast<bst_omp_uint>(col.length);
+        const auto ndata = static_cast<bst_omp_uint>(col.size());
         #pragma omp parallel for schedule(static)
         for (bst_omp_uint j = 0; j < ndata; ++j) {
           const bst_uint ridx = col[j].index;
diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc
index a87d96b65..d4eaab7af 100644
--- a/src/tree/updater_colmaker.cc
+++ b/src/tree/updater_colmaker.cc
@@ -269,7 +269,7 @@ class ColMaker: public TreeUpdater {
                                   const std::vector<GradientPair> &gpair) {
       // TODO(tqchen): double check stats order.
       const MetaInfo& info = fmat.Info();
-      const bool ind = col.length != 0 && col.data[0].fvalue == col.data[col.length - 1].fvalue;
+      const bool ind = col.size() != 0 && col[0].fvalue == col[col.size() - 1].fvalue;
       bool need_forward = param_.NeedForwardSearch(fmat.GetColDensity(fid), ind);
       bool need_backward = param_.NeedBackwardSearch(fmat.GetColDensity(fid), ind);
       const std::vector<int> &qexpand = qexpand_;
@@ -281,8 +281,8 @@ class ColMaker: public TreeUpdater {
         for (int j : qexpand) {
           temp[j].stats.Clear();
         }
-        bst_uint step = (col.length + this->nthread_ - 1) / this->nthread_;
-        bst_uint end = std::min(col.length, step * (tid + 1));
+        bst_uint step = (col.size() + this->nthread_ - 1) / this->nthread_;
+        bst_uint end = std::min(static_cast<bst_uint>(col.size()), step * (tid + 1));
         for (bst_uint i = tid * step; i < end; ++i) {
           const bst_uint ridx = col[i].index;
           const int nid = position_[ridx];
@@ -363,8 +363,8 @@ class ColMaker: public TreeUpdater {
         GradStats c(param_), cright(param_);
         const int tid = omp_get_thread_num();
         std::vector<ThreadEntry> &temp = stemp_[tid];
-        bst_uint step = (col.length + this->nthread_ - 1) / this->nthread_;
-        bst_uint end = std::min(col.length, step * (tid + 1));
+        bst_uint step = (col.size() + this->nthread_ - 1) / this->nthread_;
+        bst_uint end = std::min(static_cast<bst_uint>(col.size()), step * (tid + 1));
         for (bst_uint i = tid * step; i < end; ++i) {
           const bst_uint ridx = col[i].index;
           const int nid = position_[ridx];
@@ -620,13 +620,13 @@ class ColMaker: public TreeUpdater {
           int fid = feat_set[i];
           const int tid = omp_get_thread_num();
           auto c = batch[fid];
-          const bool ind = c.length != 0 && c.data[0].fvalue == c.data[c.length - 1].fvalue;
+          const bool ind = c.size() != 0 && c[0].fvalue == c[c.size() - 1].fvalue;
           if (param_.NeedForwardSearch(fmat.GetColDensity(fid), ind)) {
-            this->EnumerateSplit(c.data, c.data + c.length, +1,
+            this->EnumerateSplit(c.data(), c.data() + c.size(), +1,
                                  fid, gpair, info, stemp_[tid]);
           }
           if (param_.NeedBackwardSearch(fmat.GetColDensity(fid), ind)) {
-            this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1,
+            this->EnumerateSplit(c.data() + c.size() - 1, c.data() - 1, -1,
                                  fid, gpair, info, stemp_[tid]);
           }
         }
@@ -734,7 +734,7 @@ class ColMaker: public TreeUpdater {
         auto &batch = iter->Value();
         for (auto fid : fsplits) {
           auto col = batch[fid];
-          const auto ndata = static_cast<bst_omp_uint>(col.length);
+          const auto ndata = static_cast<bst_omp_uint>(col.size());
           #pragma omp parallel for schedule(static)
           for (bst_omp_uint j = 0; j < ndata; ++j) {
             const bst_uint ridx = col[j].index;
@@ -865,7 +865,7 @@ class DistColMaker : public ColMaker {
         auto &batch = iter->Value();
         for (auto fid : fsplits) {
           auto col = batch[fid];
-          const auto ndata = static_cast<bst_omp_uint>(col.length);
+          const auto ndata = static_cast<bst_omp_uint>(col.size());
           #pragma omp parallel for schedule(static)
           for (bst_omp_uint j = 0; j < ndata; ++j) {
             const bst_uint ridx = col[j].index;
diff --git a/src/tree/updater_gpu.cu b/src/tree/updater_gpu.cu
index 616c75179..ee23299ac 100644
--- a/src/tree/updater_gpu.cu
+++ b/src/tree/updater_gpu.cu
@@ -669,7 +669,7 @@ class GPUMaker : public TreeUpdater {
       auto &batch = iter->Value();
       for (int i = 0; i < batch.Size(); i++) {
         auto col = batch[i];
-        for (const Entry* it = col.data; it != col.data + col.length;
+        for (const Entry* it = col.data(); it != col.data() + col.size();
              it++) {
           int inst_id = static_cast<int>(it->index);
           fval->push_back(it->fvalue);
diff --git a/src/tree/updater_histmaker.cc b/src/tree/updater_histmaker.cc
index 97d03359a..62b5b13e1 100644
--- a/src/tree/updater_histmaker.cc
+++ b/src/tree/updater_histmaker.cc
@@ -496,13 +496,13 @@ class CQHistMaker: public HistMaker<TStats> {
   }
 
   inline void UpdateHistCol(const std::vector<GradientPair> &gpair,
-                            const SparsePage::Inst &c,
+                            const SparsePage::Inst &col,
                             const MetaInfo &info,
                             const RegTree &tree,
                             const std::vector<bst_uint> &fset,
                             bst_uint fid_offset,
                             std::vector<HistEntry> *p_temp) {
-    if (c.length == 0) return;
+    if (col.size() == 0) return;
     // initialize sbuilder for use
     std::vector<HistEntry> &hbuilder = *p_temp;
     hbuilder.resize(tree.param.num_nodes);
@@ -514,46 +514,46 @@ class CQHistMaker: public HistMaker<TStats> {
     }
     if (TStats::kSimpleStats != 0 && this->param_.cache_opt != 0) {
       constexpr bst_uint kBuffer = 32;
-      bst_uint align_length = c.length / kBuffer * kBuffer;
+      bst_uint align_length = col.size() / kBuffer * kBuffer;
       int buf_position[kBuffer];
       GradientPair buf_gpair[kBuffer];
       for (bst_uint j = 0; j < align_length; j += kBuffer) {
         for (bst_uint i = 0; i < kBuffer; ++i) {
-          bst_uint ridx = c[j + i].index;
+          bst_uint ridx = col[j + i].index;
           buf_position[i] = this->position_[ridx];
           buf_gpair[i] = gpair[ridx];
         }
         for (bst_uint i = 0; i < kBuffer; ++i) {
           const int nid = buf_position[i];
           if (nid >= 0) {
-            hbuilder[nid].Add(c[j + i].fvalue, buf_gpair[i]);
+            hbuilder[nid].Add(col[j + i].fvalue, buf_gpair[i]);
           }
         }
       }
-      for (bst_uint j = align_length; j < c.length; ++j) {
-        const bst_uint ridx = c[j].index;
+      for (bst_uint j = align_length; j < col.size(); ++j) {
+        const bst_uint ridx = col[j].index;
         const int nid = this->position_[ridx];
         if (nid >= 0) {
-          hbuilder[nid].Add(c[j].fvalue, gpair[ridx]);
+          hbuilder[nid].Add(col[j].fvalue, gpair[ridx]);
         }
       }
     } else {
-      for (bst_uint j = 0; j < c.length; ++j) {
-        const bst_uint ridx = c[j].index;
+      for (const auto& c : col) {
+        const bst_uint ridx = c.index;
         const int nid = this->position_[ridx];
         if (nid >= 0) {
-          hbuilder[nid].Add(c[j].fvalue, gpair, info, ridx);
+          hbuilder[nid].Add(c.fvalue, gpair, info, ridx);
         }
       }
     }
   }
   inline void UpdateSketchCol(const std::vector<GradientPair> &gpair,
-                              const SparsePage::Inst &c,
+                              const SparsePage::Inst &col,
                               const RegTree &tree,
                               size_t work_set_size,
                               bst_uint offset,
                               std::vector<BaseMaker::SketchEntry> *p_temp) {
-    if (c.length == 0) return;
+    if (col.size() == 0) return;
     // initialize sbuilder for use
     std::vector<BaseMaker::SketchEntry> &sbuilder = *p_temp;
     sbuilder.resize(tree.param.num_nodes);
@@ -565,18 +565,18 @@ class CQHistMaker: public HistMaker<TStats> {
     }
 
     // first pass, get sum of weight, TODO, optimization to skip first pass
-    for (bst_uint j = 0; j < c.length; ++j) {
-        const bst_uint ridx = c[j].index;
+    for (const auto& c : col) {
+        const bst_uint ridx = c.index;
         const int nid = this->position_[ridx];
         if (nid >= 0) {
-        sbuilder[nid].sum_total += gpair[ridx].GetHess();
+          sbuilder[nid].sum_total += gpair[ridx].GetHess();
       }
     }
     // if only one value, no need to do second pass
-    if (c[0].fvalue  == c[c.length-1].fvalue) {
+    if (col[0].fvalue  == col[col.size()-1].fvalue) {
       for (size_t i = 0; i < this->qexpand_.size(); ++i) {
         const int nid = this->qexpand_[i];
-        sbuilder[nid].sketch->Push(c[0].fvalue, static_cast<bst_float>(sbuilder[nid].sum_total));
+        sbuilder[nid].sketch->Push(col[0].fvalue, static_cast<bst_float>(sbuilder[nid].sum_total));
       }
       return;
     }
@@ -589,35 +589,35 @@ class CQHistMaker: public HistMaker<TStats> {
     // second pass, build the sketch
     if (TStats::kSimpleStats != 0 && this->param_.cache_opt != 0) {
       constexpr bst_uint kBuffer = 32;
-      bst_uint align_length = c.length / kBuffer * kBuffer;
+      bst_uint align_length = col.size() / kBuffer * kBuffer;
       int buf_position[kBuffer];
       bst_float buf_hess[kBuffer];
       for (bst_uint j = 0; j < align_length; j += kBuffer) {
         for (bst_uint i = 0; i < kBuffer; ++i) {
-          bst_uint ridx = c[j + i].index;
+          bst_uint ridx = col[j + i].index;
           buf_position[i] = this->position_[ridx];
           buf_hess[i] = gpair[ridx].GetHess();
         }
         for (bst_uint i = 0; i < kBuffer; ++i) {
           const int nid = buf_position[i];
           if (nid >= 0) {
-            sbuilder[nid].Push(c[j + i].fvalue, buf_hess[i], max_size);
+            sbuilder[nid].Push(col[j + i].fvalue, buf_hess[i], max_size);
           }
         }
       }
-      for (bst_uint j = align_length; j < c.length; ++j) {
-        const bst_uint ridx = c[j].index;
+      for (bst_uint j = align_length; j < col.size(); ++j) {
+        const bst_uint ridx = col[j].index;
         const int nid = this->position_[ridx];
         if (nid >= 0) {
-          sbuilder[nid].Push(c[j].fvalue, gpair[ridx].GetHess(), max_size);
+          sbuilder[nid].Push(col[j].fvalue, gpair[ridx].GetHess(), max_size);
         }
       }
     } else {
-      for (bst_uint j = 0; j < c.length; ++j) {
-        const bst_uint ridx = c[j].index;
+      for (const auto& c : col) {
+        const bst_uint ridx = c.index;
         const int nid = this->position_[ridx];
         if (nid >= 0) {
-          sbuilder[nid].Push(c[j].fvalue, gpair[ridx].GetHess(), max_size);
+          sbuilder[nid].Push(c.fvalue, gpair[ridx].GetHess(), max_size);
         }
       }
     }
@@ -794,8 +794,8 @@ class QuantileHistMaker: public HistMaker<TStats> {
           if (this->node2workindex_[nid] < 0) {
             this->position_[ridx] = ~nid;
           } else {
-            for (bst_uint j = 0; j < inst.length; ++j) {
-              builder.AddBudget(inst[j].index, omp_get_thread_num());
+            for (auto& ins : inst) {
+              builder.AddBudget(ins.index, omp_get_thread_num());
             }
           }
         }
@@ -807,9 +807,9 @@ class QuantileHistMaker: public HistMaker<TStats> {
         const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
         const int nid = this->position_[ridx];
         if (nid >= 0) {
-          for (bst_uint j = 0; j < inst.length; ++j) {
-            builder.Push(inst[j].index,
-                         Entry(nid, inst[j].fvalue),
+          for (auto& ins : inst) {
+            builder.Push(ins.index,
+                         Entry(nid, ins.fvalue),
                          omp_get_thread_num());
           }
         }
diff --git a/src/tree/updater_skmaker.cc b/src/tree/updater_skmaker.cc
index 9549ff0c6..50f1a56c4 100644
--- a/src/tree/updater_skmaker.cc
+++ b/src/tree/updater_skmaker.cc
@@ -155,7 +155,7 @@ class SketchMaker: public BaseMaker {
         this->UpdateSketchCol(gpair, batch[fidx], tree,
                               node_stats_,
                               fidx,
-                              batch[fidx].length == nrows,
+                              batch[fidx].size() == nrows,
                               &thread_sketch_[omp_get_thread_num()]);
       }
     }
@@ -174,13 +174,13 @@ class SketchMaker: public BaseMaker {
   }
   // update sketch information in column fid
   inline void UpdateSketchCol(const std::vector<GradientPair> &gpair,
-                              const SparsePage::Inst &c,
+                              const SparsePage::Inst &col,
                               const RegTree &tree,
                               const std::vector<SKStats> &nstats,
                               bst_uint fid,
                               bool col_full,
                               std::vector<SketchEntry> *p_temp) {
-    if (c.length == 0) return;
+    if (col.size() == 0) return;
     // initialize sbuilder for use
     std::vector<SketchEntry> &sbuilder = *p_temp;
     sbuilder.resize(tree.param.num_nodes * 3);
@@ -192,10 +192,10 @@ class SketchMaker: public BaseMaker {
       }
     }
     if (!col_full) {
-      for (bst_uint j = 0; j < c.length; ++j) {
-        const bst_uint ridx = c[j].index;
+      for (const auto& c : col) {
+        const bst_uint ridx = c.index;
         const int nid = this->position_[ridx];
-        if (nid >= 0) {
+        if (nid > 0) {
           const GradientPair &e = gpair[ridx];
           if (e.GetGrad() >= 0.0f) {
             sbuilder[3 * nid + 0].sum_total += e.GetGrad();
@@ -213,10 +213,10 @@ class SketchMaker: public BaseMaker {
       }
     }
     // if only one value, no need to do second pass
-    if (c[0].fvalue  == c[c.length-1].fvalue) {
+    if (col[0].fvalue  == col[col.size()-1].fvalue) {
       for (int nid : this->qexpand_) {
         for (int k = 0; k < 3; ++k) {
-          sbuilder[3 * nid + k].sketch->Push(c[0].fvalue,
+          sbuilder[3 * nid + k].sketch->Push(col[0].fvalue,
                                              static_cast<bst_float>(
                                                  sbuilder[3 * nid + k].sum_total));
         }
@@ -231,17 +231,17 @@ class SketchMaker: public BaseMaker {
       }
     }
     // second pass, build the sketch
-    for (bst_uint j = 0; j < c.length; ++j) {
-      const bst_uint ridx = c[j].index;
+    for (const auto& c : col) {
+      const bst_uint ridx = c.index;
       const int nid = this->position_[ridx];
       if (nid >= 0) {
         const GradientPair &e = gpair[ridx];
         if (e.GetGrad() >= 0.0f) {
-          sbuilder[3 * nid + 0].Push(c[j].fvalue, e.GetGrad(), max_size);
+          sbuilder[3 * nid + 0].Push(c.fvalue, e.GetGrad(), max_size);
         } else {
-          sbuilder[3 * nid + 1].Push(c[j].fvalue, -e.GetGrad(), max_size);
+          sbuilder[3 * nid + 1].Push(c.fvalue, -e.GetGrad(), max_size);
         }
-        sbuilder[3 * nid + 2].Push(c[j].fvalue, e.GetHess(), max_size);
+        sbuilder[3 * nid + 2].Push(c.fvalue, e.GetHess(), max_size);
       }
     }
     for (int nid : this->qexpand_) {
diff --git a/tests/cpp/c_api/test_c_api.cc b/tests/cpp/c_api/test_c_api.cc
index a05fd74fa..e779deaa0 100644
--- a/tests/cpp/c_api/test_c_api.cc
+++ b/tests/cpp/c_api/test_c_api.cc
@@ -59,10 +59,10 @@ TEST(c_api, XGDMatrixCreateFromMat_omp) {
       auto batch = iter->Value();
       for (int i = 0; i < batch.Size(); i++) {
         auto inst = batch[i];
-        for (int j = 0; i < inst.length; i++) {
+        for (int j = 0; i < inst.size(); i++) {
           ASSERT_EQ(inst[j].fvalue, 1.5);
         }
       }
     }
   }
-}
\ No newline at end of file
+}
diff --git a/tests/cpp/common/test_host_device_vector.cu b/tests/cpp/common/test_host_device_vector.cu
new file mode 100644
index 000000000..da3192600
--- /dev/null
+++ b/tests/cpp/common/test_host_device_vector.cu
@@ -0,0 +1,22 @@
+/*!
+ * Copyright 2018 XGBoost contributors
+ */
+
+#include <gtest/gtest.h>
+#include "../../../src/common/host_device_vector.h"
+#include "../../../src/common/device_helpers.cuh"
+
+namespace xgboost {
+namespace common {
+
+TEST(HostDeviceVector, Span) {
+  HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
+  vec.Reshard(GPUSet{0, 1});
+  auto span = vec.DeviceSpan(0);
+  ASSERT_EQ(vec.Size(), span.size());
+  ASSERT_EQ(vec.DevicePointer(0), span.data());
+}
+
+}  // namespace common
+}  // namespace xgboost
+
diff --git a/tests/cpp/common/test_span.cc b/tests/cpp/common/test_span.cc
new file mode 100644
index 000000000..516aeffe5
--- /dev/null
+++ b/tests/cpp/common/test_span.cc
@@ -0,0 +1,423 @@
+/*!
+ * Copyright 2018 XGBoost contributors
+ */
+#include <gtest/gtest.h>
+#include <vector>
+
+#include "../../../src/common/span.h"
+#include "test_span.h"
+
+namespace xgboost {
+namespace common {
+
+TEST(Span, TestStatus) {
+  int status = 1;
+  TestTestStatus {&status}();
+  ASSERT_EQ(status, -1);
+}
+
+TEST(Span, DlfConstructors) {
+  // Dynamic extent
+  {
+    Span<int> s;
+    ASSERT_EQ(s.size(), 0);
+    ASSERT_EQ(s.data(), nullptr);
+
+    Span<int const> cs;
+    ASSERT_EQ(cs.size(), 0);
+    ASSERT_EQ(cs.data(), nullptr);
+  }
+
+  // Static extent
+  {
+    Span<int, 0> s;
+    ASSERT_EQ(s.size(), 0);
+    ASSERT_EQ(s.data(), nullptr);
+
+    Span<int const, 0> cs;
+    ASSERT_EQ(cs.size(), 0);
+    ASSERT_EQ(cs.data(), nullptr);
+  }
+
+  // Init list.
+  {
+    Span<float> s {};
+    ASSERT_EQ(s.size(), 0);
+    ASSERT_EQ(s.data(), nullptr);
+
+    Span<int const> cs {};
+    ASSERT_EQ(cs.size(), 0);
+    ASSERT_EQ(cs.data(), nullptr);
+  }
+}
+
+TEST(Span, FromNullPtr) {
+  // dynamic extent
+  {
+    Span<float> s {nullptr, static_cast<Span<float>::index_type>(0)};
+    ASSERT_EQ(s.size(), 0);
+    ASSERT_EQ(s.data(), nullptr);
+
+    Span<float const> cs {nullptr, static_cast<Span<float>::index_type>(0)};
+    ASSERT_EQ(cs.size(), 0);
+    ASSERT_EQ(cs.data(), nullptr);
+  }
+  // static extent
+  {
+    Span<float, 0> s {nullptr, static_cast<Span<float>::index_type>(0)};
+    ASSERT_EQ(s.size(), 0);
+    ASSERT_EQ(s.data(), nullptr);
+
+    Span<float const, 0> cs {nullptr, static_cast<Span<float>::index_type>(0)};
+    ASSERT_EQ(cs.size(), 0);
+    ASSERT_EQ(cs.data(), nullptr);
+  }
+}
+
+TEST(Span, FromPtrLen) {
+  float arr[16];
+  InitializeRange(arr, arr+16);
+
+  // static extent
+  {
+    Span<float> s (arr, 16);
+    ASSERT_EQ (s.size(), 16);
+    ASSERT_EQ (s.data(), arr);
+
+    for (Span<float>::index_type i = 0; i < 16; ++i) {
+      ASSERT_EQ (s[i], arr[i]);
+    }
+
+    Span<float const> cs (arr, 16);
+    ASSERT_EQ (cs.size(), 16);
+    ASSERT_EQ (cs.data(), arr);
+
+    for (Span<float const>::index_type i = 0; i < 16; ++i) {
+      ASSERT_EQ (cs[i], arr[i]);
+    }
+  }
+
+  {
+    EXPECT_ANY_THROW(Span<float> tmp (arr, -1););
+  }
+
+  // dynamic extent
+  {
+    Span<float, 16> s (arr, 16);
+    ASSERT_EQ (s.size(), 16);
+    ASSERT_EQ (s.data(), arr);
+
+    for (size_t i = 0; i < 16; ++i) {
+      ASSERT_EQ (s[i], arr[i]);
+    }
+
+    Span<float const, 16> cs (arr, 16);
+    ASSERT_EQ (cs.size(), 16);
+    ASSERT_EQ (cs.data(), arr);
+
+    for (Span<float const>::index_type i = 0; i < 16; ++i) {
+      ASSERT_EQ (cs[i], arr[i]);
+    }
+  }
+}
+
+TEST(Span, FromFirstLast) {
+  float arr[16];
+  InitializeRange(arr, arr+16);
+
+  // dynamic extent
+  {
+    Span<float> s (arr, arr + 16);
+    ASSERT_EQ (s.size(), 16);
+    ASSERT_EQ (s.data(), arr);
+    ASSERT_EQ (s.data() + s.size(), arr + 16);
+
+    for (size_t i = 0; i < 16; ++i) {
+      ASSERT_EQ (s[i], arr[i]);
+    }
+
+    Span<float const> cs (arr, arr + 16);
+    ASSERT_EQ (cs.size(), 16);
+    ASSERT_EQ (cs.data(), arr);
+    ASSERT_EQ (cs.data() + cs.size(), arr + 16);
+
+    for (size_t i = 0; i < 16; ++i) {
+      ASSERT_EQ (cs[i], arr[i]);
+    }
+  }
+
+  // static extent
+  {
+    Span<float, 16> s (arr, arr + 16);
+    ASSERT_EQ (s.size(), 16);
+    ASSERT_EQ (s.data(), arr);
+    ASSERT_EQ (s.data() + s.size(), arr + 16);
+
+    for (size_t i = 0; i < 16; ++i) {
+      ASSERT_EQ (s[i], arr[i]);
+    }
+
+    Span<float const> cs (arr, arr + 16);
+    ASSERT_EQ (cs.size(), 16);
+    ASSERT_EQ (cs.data(), arr);
+    ASSERT_EQ (cs.data() + cs.size(), arr + 16);
+
+    for (size_t i = 0; i < 16; ++i) {
+      ASSERT_EQ (cs[i], arr[i]);
+    }
+  }
+}
+
+struct BaseClass {
+  virtual void operator()() {}
+};
+struct DerivedClass : public BaseClass {
+  virtual void operator()() {}
+};
+
+TEST(Span, FromOther) {
+
+  // convert constructor
+  {
+    Span<DerivedClass> derived;
+    Span<BaseClass> base { derived };
+    ASSERT_EQ(base.size(), derived.size());
+    ASSERT_EQ(base.data(), derived.data());
+  }
+
+  float arr[16];
+  InitializeRange(arr, arr + 16);
+
+  // default copy constructor
+  {
+    Span<float> s0 (arr);
+    Span<float> s1 (s0);
+    ASSERT_EQ(s0.size(), s1.size());
+    ASSERT_EQ(s0.data(), s1.data());
+  }
+}
+
+TEST(Span, FromArray) {
+  float arr[16];
+  InitializeRange(arr, arr + 16);
+
+  {
+    Span<float> s (arr);
+    ASSERT_EQ(&arr[0], s.data());
+    ASSERT_EQ(s.size(), 16);
+    for (size_t i = 0; i < 16; ++i) {
+      ASSERT_EQ(arr[i], s[i]);
+    }
+  }
+
+  {
+    Span<float, 16> s (arr);
+    ASSERT_EQ(&arr[0], s.data());
+    ASSERT_EQ(s.size(), 16);
+    for (size_t i = 0; i < 16; ++i) {
+      ASSERT_EQ(arr[i], s[i]);
+    }
+  }
+}
+
+TEST(Span, FromContainer) {
+  std::vector<float> vec (16);
+  InitializeRange(vec.begin(), vec.end());
+
+  Span<float> s(vec);
+  ASSERT_EQ(s.size(), vec.size());
+  ASSERT_EQ(s.data(), vec.data());
+
+  bool res = std::equal(vec.begin(), vec.end(), s.begin());
+  ASSERT_TRUE(res);
+}
+
+TEST(Span, Assignment) {
+  int status = 1;
+  TestAssignment{&status}();
+  ASSERT_EQ(status, 1);
+}
+
+TEST(SpanIter, Construct) {
+  int status = 1;
+  TestIterConstruct{&status}();
+  ASSERT_EQ(status, 1);
+}
+
+TEST(SpanIter, Ref) {
+  int status = 1;
+  TestIterRef{&status}();
+  ASSERT_EQ(status, 1);
+}
+
+TEST(SpanIter, Calculate) {
+  int status = 1;
+  TestIterCalculate{&status}();
+  ASSERT_EQ(status, 1);
+}
+
+TEST(SpanIter, Compare) {
+  int status = 1;
+  TestIterCompare{&status}();
+  ASSERT_EQ(status, 1);
+}
+
+TEST(Span, BeginEnd) {
+  int status = 1;
+  TestBeginEnd{&status}();
+  ASSERT_EQ(status, 1);
+}
+
+TEST(Span, RBeginREnd) {
+  int status = 1;
+  TestRBeginREnd{&status}();
+  ASSERT_EQ(status, 1);
+}
+
+TEST(Span, ElementAccess) {
+  float arr[16];
+  InitializeRange(arr, arr + 16);
+
+  Span<float> s (arr);
+  size_t j = 0;
+  for (auto i : s) {
+    ASSERT_EQ(i, arr[j]);
+    ++j;
+  }
+
+  EXPECT_ANY_THROW(s[16]);
+  EXPECT_ANY_THROW(s[-1]);
+
+  EXPECT_ANY_THROW(s(16));
+  EXPECT_ANY_THROW(s(-1));
+}
+
+TEST(Span, Obversers) {
+  int status = 1;
+  TestObservers{&status}();
+  ASSERT_EQ(status, 1);
+}
+
+TEST(Span, FirstLast) {
+  // static extent
+  {
+    float arr[16];
+    InitializeRange(arr, arr + 16);
+
+    Span<float> s (arr);
+    Span<float, 4> first = s.first<4>();
+
+    ASSERT_EQ(first.size(), 4);
+    ASSERT_EQ(first.data(), arr);
+
+    for (size_t i = 0; i < first.size(); ++i) {
+      ASSERT_EQ(first[i], arr[i]);
+    }
+
+    EXPECT_ANY_THROW(s.first<-1>());
+    EXPECT_ANY_THROW(s.first<17>());
+    EXPECT_ANY_THROW(s.first<32>());
+  }
+
+  {
+    float arr[16];
+    InitializeRange(arr, arr + 16);
+
+    Span<float> s (arr);
+    Span<float, 4> last = s.last<4>();
+
+    ASSERT_EQ(last.size(), 4);
+    ASSERT_EQ(last.data(), arr + 12);
+
+    for (size_t i = 0; i < last.size(); ++i) {
+      ASSERT_EQ(last[i], arr[i+12]);
+    }
+
+    EXPECT_ANY_THROW(s.last<-1>());
+    EXPECT_ANY_THROW(s.last<17>());
+    EXPECT_ANY_THROW(s.last<32>());
+  }
+
+  // dynamic extent
+  {
+    float *arr = new float[16];
+    InitializeRange(arr, arr + 16);
+    Span<float> s (arr, 16);
+    Span<float> first = s.first(4);
+
+    ASSERT_EQ(first.size(), 4);
+    ASSERT_EQ(first.data(), s.data());
+
+    for (size_t i = 0; i < first.size(); ++i) {
+      ASSERT_EQ(first[i], s[i]);
+    }
+
+    EXPECT_ANY_THROW(s.first(-1));
+    EXPECT_ANY_THROW(s.first(17));
+    EXPECT_ANY_THROW(s.first(32));
+
+    delete [] arr;
+  }
+
+  {
+    float *arr = new float[16];
+    InitializeRange(arr, arr + 16);
+    Span<float> s (arr, 16);
+    Span<float> last = s.last(4);
+
+    ASSERT_EQ(last.size(), 4);
+    ASSERT_EQ(last.data(), s.data() + 12);
+
+    for (size_t i = 0; i < last.size(); ++i) {
+      ASSERT_EQ(s[12 + i], last[i]);
+    }
+
+    EXPECT_ANY_THROW(s.last(-1));
+    EXPECT_ANY_THROW(s.last(17));
+    EXPECT_ANY_THROW(s.last(32));
+
+    delete [] arr;
+  }
+}
+
+TEST(Span, Subspan) {
+  int arr[16] {0};
+  Span<int> s1 (arr);
+  auto s2 = s1.subspan<4>();
+  ASSERT_EQ(s1.size() - 4, s2.size());
+
+  auto s3 = s1.subspan(2, 4);
+  ASSERT_EQ(s1.data() + 2, s3.data());
+  ASSERT_EQ(s3.size(), 4);
+
+  auto s4 = s1.subspan(2, dynamic_extent);
+  ASSERT_EQ(s1.data() + 2, s4.data());
+  ASSERT_EQ(s4.size(), s1.size() - 2);
+
+  EXPECT_ANY_THROW(s1.subspan(-1, 0));
+  EXPECT_ANY_THROW(s1.subspan(16, 0));
+
+  EXPECT_ANY_THROW(s1.subspan<-1>());
+  EXPECT_ANY_THROW(s1.subspan<16>());
+}
+
+TEST(Span, Compare) {
+  int status = 1;
+  TestCompare{&status}();
+  ASSERT_EQ(status, 1);
+}
+
+TEST(Span, AsBytes) {
+  int status = 1;
+  TestAsBytes{&status}();
+  ASSERT_EQ(status, 1);
+}
+
+TEST(Span, AsWritableBytes) {
+  int status = 1;
+  TestAsWritableBytes{&status}();
+  ASSERT_EQ(status, 1);
+}
+
+}  // namespace common
+}  // namespace xgboost
diff --git a/tests/cpp/common/test_span.cu b/tests/cpp/common/test_span.cu
new file mode 100644
index 000000000..f42052b07
--- /dev/null
+++ b/tests/cpp/common/test_span.cu
@@ -0,0 +1,357 @@
+/*!
+ * Copyright 2018 XGBoost contributors
+ */
+#include <gtest/gtest.h>
+
+#include <thrust/host_vector.h>
+#include <thrust/device_vector.h>
+#include <thrust/execution_policy.h>
+
+#include "../../../src/common/device_helpers.cuh"
+#include "../../../src/common/span.h"
+#include "test_span.h"
+
+namespace xgboost {
+namespace common {
+
+struct TestStatus {
+  int *status_;
+
+ public:
+  TestStatus () {
+    dh::safe_cuda(cudaMalloc(&status_, sizeof(int)));
+    int h_status = 1;
+    dh::safe_cuda(cudaMemcpy(status_, &h_status,
+                             sizeof(int), cudaMemcpyHostToDevice));
+  }
+  ~TestStatus() {
+    dh::safe_cuda(cudaFree(status_));
+  }
+
+  int get() {
+    int h_status;
+    dh::safe_cuda(cudaMemcpy(&h_status, status_,
+                             sizeof(int), cudaMemcpyDeviceToHost));
+    return h_status;
+  }
+
+  int* data() {
+    return status_;
+  }
+};
+
+__global__ void test_from_other_kernel(Span<float> span) {
+  // don't get optimized out
+  size_t idx = threadIdx.x + blockIdx.x * blockDim.x;
+
+  if (idx >= span.size())
+    return;
+}
+// Test converting different T
+  __global__ void test_from_other_kernel_const(Span<float const, 16> span) {
+  // don't get optimized out
+  size_t idx = threadIdx.x + blockIdx.x * blockDim.x;
+
+  if (idx >= span.size())
+    return;
+}
+
+/*!
+ * \brief Here we just test whether the code compiles.
+ */
+TEST(GPUSpan, FromOther) {
+  thrust::host_vector<float> h_vec (16);
+  InitializeRange(h_vec.begin(), h_vec.end());
+
+  thrust::device_vector<float> d_vec (h_vec.size());
+  thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
+  // dynamic extent
+  {
+    Span<float> span (d_vec.data().get(), d_vec.size());
+    test_from_other_kernel<<<1, 16>>>(span);
+  }
+  {
+    Span<float> span (d_vec.data().get(), d_vec.size());
+    test_from_other_kernel_const<<<1, 16>>>(span);
+  }
+  // static extent
+  {
+    Span<float, 16> span(d_vec.data().get(), d_vec.data().get() + 16);
+    test_from_other_kernel<<<1, 16>>>(span);
+  }
+  {
+    Span<float, 16> span(d_vec.data().get(), d_vec.data().get() + 16);
+    test_from_other_kernel_const<<<1, 16>>>(span);
+  }
+}
+
+TEST(GPUSpan, Assignment) {
+  TestStatus status;
+  dh::LaunchN(0, 16, TestAssignment{status.data()});
+  ASSERT_EQ(status.get(), 1);
+}
+
+TEST(GPUSpan, TestStatus) {
+  TestStatus status;
+  dh::LaunchN(0, 16, TestTestStatus{status.data()});
+  ASSERT_EQ(status.get(), -1);
+}
+
+template <typename T>
+struct TestEqual {
+  T *lhs_, *rhs_;
+  int *status_;
+
+  TestEqual(T* _lhs, T* _rhs, int * _status) :
+      lhs_(_lhs), rhs_(_rhs), status_(_status) {}
+
+  XGBOOST_DEVICE void operator()(size_t _idx) {
+    bool res = lhs_[_idx] == rhs_[_idx];
+    SPAN_ASSERT_TRUE(res, status_);
+  }
+};
+
+TEST(GPUSpan, WithTrust) {
+  // Not adviced to initialize span with host_vector, since h_vec.data() is
+  // a host function.
+  thrust::host_vector<float> h_vec (16);
+  InitializeRange(h_vec.begin(), h_vec.end());
+
+  thrust::device_vector<float> d_vec (h_vec.size());
+  thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
+
+  // Can't initialize span with device_vector, since d_vec.data() is not raw
+  // pointer
+  {
+    Span<float> s (d_vec.data().get(), d_vec.size());
+
+    ASSERT_EQ(d_vec.size(), s.size());
+    ASSERT_EQ(d_vec.data().get(), s.data());
+  }
+
+  {
+    TestStatus status;
+    thrust::device_vector<float> d_vec1 (d_vec.size());
+    thrust::copy(thrust::device, d_vec.begin(), d_vec.end(), d_vec1.begin());
+    Span<float> s (d_vec1.data().get(), d_vec.size());
+
+    dh::LaunchN(0, 16, TestEqual<float>{
+        thrust::raw_pointer_cast(d_vec1.data()),
+        s.data(), status.data()});
+    ASSERT_EQ(status.get(), 1);
+
+    // FIXME: memory error!
+    // bool res = thrust::equal(thrust::device,
+    //                          d_vec.begin(), d_vec.end(),
+    //                          s.begin());
+  }
+}
+
+TEST(GPUSpan, BeginEnd) {
+  TestStatus status;
+  dh::LaunchN(0, 16, TestBeginEnd{status.data()});
+  ASSERT_EQ(status.get(), 1);
+}
+
+TEST(GPUSpan, RBeginREnd) {
+  TestStatus status;
+  dh::LaunchN(0, 16, TestRBeginREnd{status.data()});
+  ASSERT_EQ(status.get(), 1);
+}
+
+__global__ void test_modify_kernel(Span<float> span) {
+  size_t idx = threadIdx.x + blockIdx.x * blockDim.x;
+
+  if (idx >= span.size())
+    return;
+
+  span[idx] = span.size() - idx;
+}
+
+TEST(GPUSpan, Modify) {
+  thrust::host_vector<float> h_vec (16);
+  InitializeRange(h_vec.begin(), h_vec.end());
+
+  thrust::device_vector<float> d_vec (h_vec.size());
+  thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
+
+  Span<float> span (d_vec.data().get(), d_vec.size());
+
+  test_modify_kernel<<<1, 16>>>(span);
+
+  for (size_t i = 0; i < d_vec.size(); ++i) {
+    ASSERT_EQ(d_vec[i], d_vec.size() - i);
+  }
+}
+
+TEST(GPUSpan, Observers) {
+  TestStatus status;
+  dh::LaunchN(0, 16, TestObservers{status.data()});
+  ASSERT_EQ(status.get(), 1);
+}
+
+TEST(GPUSpan, Compare) {
+  TestStatus status;
+  dh::LaunchN(0, 16, TestIterCompare{status.data()});
+  ASSERT_EQ(status.get(), 1);
+}
+
+struct TestElementAccess {
+  Span<float> span_;
+
+  XGBOOST_DEVICE TestElementAccess (Span<float> _span) : span_(_span) {}
+
+  XGBOOST_DEVICE float operator()(size_t _idx) {
+    float tmp = span_[_idx];
+    return tmp;
+  }
+};
+
+TEST(GPUSpan, ElementAccess) {
+  EXPECT_DEATH({
+      thrust::host_vector<float> h_vec (16);
+      InitializeRange(h_vec.begin(), h_vec.end());
+
+      thrust::device_vector<float> d_vec (h_vec.size());
+      thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
+
+      Span<float> span (d_vec.data().get(), d_vec.size());
+      dh::LaunchN(0, 17, TestElementAccess{span});}, "");
+}
+
+__global__ void test_first_dynamic_kernel(Span<float> _span) {
+  _span.first<-1>();
+}
+__global__ void test_first_static_kernel(Span<float> _span) {
+  _span.first(-1);
+}
+__global__ void test_last_dynamic_kernel(Span<float> _span) {
+  _span.last<-1>();
+}
+__global__ void test_last_static_kernel(Span<float> _span) {
+  _span.last(-1);
+}
+
+TEST(GPUSpan, FirstLast) {
+  // We construct vectors multiple times since thrust can not recover from
+  // death test.
+  auto lambda_first_dy = []() {
+    thrust::host_vector<float> h_vec (4);
+    InitializeRange(h_vec.begin(), h_vec.end());
+
+    thrust::device_vector<float> d_vec (h_vec.size());
+    thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
+
+    Span<float> span (d_vec.data().get(), d_vec.size());
+    test_first_dynamic_kernel<<<1, 1>>>(span);
+  };
+  EXPECT_DEATH(lambda_first_dy(), "");
+
+  auto lambda_first_static = []() {
+    thrust::host_vector<float> h_vec (4);
+    InitializeRange(h_vec.begin(), h_vec.end());
+
+    thrust::device_vector<float> d_vec (h_vec.size());
+    thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
+
+    Span<float> span (d_vec.data().get(), d_vec.size());
+    test_first_static_kernel<<<1, 1>>>(span);
+  };
+  EXPECT_DEATH(lambda_first_static(), "");
+
+  auto lambda_last_dy = []() {
+    thrust::host_vector<float> h_vec (4);
+    InitializeRange(h_vec.begin(), h_vec.end());
+
+    thrust::device_vector<float> d_vec (h_vec.size());
+    thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
+
+    Span<float> span (d_vec.data().get(), d_vec.size());
+    test_last_dynamic_kernel<<<1, 1>>>(span);
+  };
+  EXPECT_DEATH(lambda_last_dy(), "");
+
+  auto lambda_last_static = []() {
+    thrust::host_vector<float> h_vec (4);
+    InitializeRange(h_vec.begin(), h_vec.end());
+
+    thrust::device_vector<float> d_vec (h_vec.size());
+    thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
+
+    Span<float> span (d_vec.data().get(), d_vec.size());
+    test_last_static_kernel<<<1, 1>>>(span);
+  };
+  EXPECT_DEATH(lambda_last_static(), "");
+}
+
+
+__global__ void test_subspan_dynamic_kernel(Span<float> _span) {
+  _span.subspan(16, 0);
+}
+__global__ void test_subspan_static_kernel(Span<float> _span) {
+  _span.subspan<16>();
+}
+TEST(GPUSpan, Subspan) {
+  auto lambda_subspan_dynamic = []() {
+    thrust::host_vector<float> h_vec (4);
+    InitializeRange(h_vec.begin(), h_vec.end());
+
+    thrust::device_vector<float> d_vec (h_vec.size());
+    thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
+
+    Span<float> span (d_vec.data().get(), d_vec.size());
+    test_subspan_dynamic_kernel<<<1, 1>>>(span);
+  };
+  EXPECT_DEATH(lambda_subspan_dynamic(), "");
+
+  auto lambda_subspan_static = []() {
+    thrust::host_vector<float> h_vec (4);
+    InitializeRange(h_vec.begin(), h_vec.end());
+
+    thrust::device_vector<float> d_vec (h_vec.size());
+    thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
+
+    Span<float> span (d_vec.data().get(), d_vec.size());
+    test_subspan_static_kernel<<<1, 1>>>(span);
+  };
+  EXPECT_DEATH(lambda_subspan_static(), "");
+}
+
+TEST(GPUSpanIter, Construct) {
+  TestStatus status;
+  dh::LaunchN(0, 16, TestIterConstruct{status.data()});
+  ASSERT_EQ(status.get(), 1);
+}
+
+TEST(GPUSpanIter, Ref) {
+  TestStatus status;
+  dh::LaunchN(0, 16, TestIterRef{status.data()});
+  ASSERT_EQ(status.get(), 1);
+}
+
+TEST(GPUSpanIter, Calculate) {
+  TestStatus status;
+  dh::LaunchN(0, 16, TestIterCalculate{status.data()});
+  ASSERT_EQ(status.get(), 1);
+}
+
+TEST(GPUSpanIter, Compare) {
+  TestStatus status;
+  dh::LaunchN(0, 16, TestIterCompare{status.data()});
+  ASSERT_EQ(status.get(), 1);
+}
+
+TEST(GPUSpan, AsBytes) {
+  TestStatus status;
+  dh::LaunchN(0, 16, TestAsBytes{status.data()});
+  ASSERT_EQ(status.get(), 1);
+}
+
+TEST(GPUSpan, AsWritableBytes) {
+  TestStatus status;
+  dh::LaunchN(0, 16, TestAsWritableBytes{status.data()});
+  ASSERT_EQ(status.get(), 1);
+}
+
+}  // namespace common
+}  // namespace xgboost
diff --git a/tests/cpp/common/test_span.h b/tests/cpp/common/test_span.h
new file mode 100644
index 000000000..194a356ce
--- /dev/null
+++ b/tests/cpp/common/test_span.h
@@ -0,0 +1,339 @@
+/*!
+ * Copyright 2018 XGBoost contributors
+ */
+#ifndef XGBOOST_TEST_SPAN_H_
+#define XGBOOST_TEST_SPAN_H_
+
+#include "../../include/xgboost/base.h"
+#include "../../../src/common/span.h"
+
+namespace xgboost {
+namespace common {
+
+#define SPAN_ASSERT_TRUE(cond, status)          \
+  if (!(cond)) {                                \
+    *(status) = -1;                             \
+  }
+
+#define SPAN_ASSERT_FALSE(cond, status)         \
+  if ((cond)) {                                 \
+    *(status) = -1;                             \
+  }
+
+template <typename Iter>
+XGBOOST_DEVICE void InitializeRange(Iter _begin, Iter _end) {
+  float j = 0;
+  for (Iter i = _begin; i != _end; ++i, ++j) {
+    *i = j;
+  }
+}
+
+struct TestTestStatus {
+  int * status_;
+
+  TestTestStatus(int* _status): status_(_status) {}
+
+  XGBOOST_DEVICE void operator()() {
+    this->operator()(0);
+  }
+  XGBOOST_DEVICE void operator()(int _idx) {
+    SPAN_ASSERT_TRUE(false, status_);
+  }
+};
+
+struct TestAssignment {
+  int* status_;
+
+  TestAssignment(int* _status) : status_(_status) {}
+
+  XGBOOST_DEVICE void operator()() {
+    this->operator()(0);
+  }
+  XGBOOST_DEVICE void operator()(int _idx) {
+    Span<float> s1;
+
+    float arr[] = {3, 4, 5};
+
+    Span<const float> s2 = arr;
+    SPAN_ASSERT_TRUE(s2.size() == 3, status_);
+    SPAN_ASSERT_TRUE(s2.data() == &arr[0], status_);
+
+    s2 = s1;
+    SPAN_ASSERT_TRUE(s2.empty(), status_);
+  }
+};
+
+struct TestBeginEnd {
+  int* status_;
+
+  TestBeginEnd(int* _status) : status_(_status) {}
+
+  XGBOOST_DEVICE void operator()() {
+    this->operator()(0);
+  }
+  XGBOOST_DEVICE void operator()(int _idx) {
+    float arr[16];
+    InitializeRange(arr, arr + 16);
+
+    Span<float> s (arr);
+    Span<float>::iterator beg { s.begin() };
+    Span<float>::iterator end { s.end() };
+
+    SPAN_ASSERT_TRUE(end ==  beg + 16, status_);
+    SPAN_ASSERT_TRUE(*beg == arr[0], status_);
+    SPAN_ASSERT_TRUE(*(end - 1) == arr[15], status_);
+  }
+};
+
+struct TestRBeginREnd {
+  int * status_;
+
+  TestRBeginREnd(int* _status): status_(_status) {}
+
+  XGBOOST_DEVICE void operator()() {
+    this->operator()(0);
+  }
+  XGBOOST_DEVICE void operator()(int _idx) {
+    float arr[16];
+    InitializeRange(arr, arr + 16);
+
+    Span<float> s (arr);
+    Span<float>::iterator rbeg { s.rbegin() };
+    Span<float>::iterator rend { s.rend() };
+
+    SPAN_ASSERT_TRUE(rbeg == rend + 16, status_);
+    SPAN_ASSERT_TRUE(*(rbeg - 1) == arr[15], status_);
+    SPAN_ASSERT_TRUE(*rend == arr[0], status_);
+  }
+};
+
+struct TestObservers {
+  int * status_;
+
+  TestObservers(int * _status): status_(_status) {}
+
+  XGBOOST_DEVICE void operator()() {
+    this->operator()(0);
+  }
+  XGBOOST_DEVICE void operator()(int _idx) {
+    // empty
+    {
+      float *arr = nullptr;
+      Span<float> s(arr, static_cast<Span<float>::index_type>(0));
+      SPAN_ASSERT_TRUE(s.empty(), status_);
+    }
+
+    // size, size_types
+    {
+      float* arr = new float[16];
+      Span<float> s (arr, 16);
+      SPAN_ASSERT_TRUE(s.size() == 16, status_);
+      SPAN_ASSERT_TRUE(s.size_bytes() == 16 * sizeof(float), status_);
+      delete [] arr;
+    }
+  }
+};
+
+struct TestCompare {
+  int * status_;
+
+  TestCompare(int * _status): status_(_status) {}
+
+  XGBOOST_DEVICE void operator()() {
+    this->operator()(0);
+  }
+  XGBOOST_DEVICE void operator()(int _idx) {
+    float lhs_arr[16], rhs_arr[16];
+    InitializeRange(lhs_arr, lhs_arr + 16);
+    InitializeRange(rhs_arr, rhs_arr + 16);
+
+    Span<float> lhs(lhs_arr);
+    Span<float> rhs(rhs_arr);
+
+    SPAN_ASSERT_TRUE(lhs == rhs, status_);
+    SPAN_ASSERT_FALSE(lhs != rhs, status_);
+
+    SPAN_ASSERT_TRUE(lhs <= rhs, status_);
+    SPAN_ASSERT_TRUE(lhs >= rhs, status_);
+
+    lhs[2] -= 1;
+
+    SPAN_ASSERT_FALSE(lhs == rhs, status_);
+    SPAN_ASSERT_TRUE(lhs < rhs, status_);
+    SPAN_ASSERT_FALSE(lhs > rhs, status_);
+  }
+};
+
+struct TestIterConstruct {
+  int * status_;
+
+  TestIterConstruct(int * _status): status_(_status) {}
+
+  XGBOOST_DEVICE void operator()() {
+    this->operator()(0);
+  }
+  XGBOOST_DEVICE void operator()(int _idx) {
+    Span<float>::iterator it1;
+    Span<float>::iterator it2;
+    SPAN_ASSERT_TRUE(it1 == it2, status_);
+
+    Span<float>::const_iterator cit1;
+    Span<float>::const_iterator cit2;
+    SPAN_ASSERT_TRUE(cit1 == cit2, status_);
+  }
+};
+
+struct TestIterRef {
+  int * status_;
+
+  TestIterRef(int * _status): status_(_status) {}
+
+  XGBOOST_DEVICE void operator()() {
+    this->operator()(0);
+  }
+  XGBOOST_DEVICE void operator()(int _idx) {
+    float arr[16];
+    InitializeRange(arr, arr + 16);
+
+    Span<float> s (arr);
+    SPAN_ASSERT_TRUE(*(s.begin()) == s[0], status_);
+    SPAN_ASSERT_TRUE(*(s.end() - 1) == s[15], status_);
+  }
+};
+
+struct TestIterCalculate {
+  int * status_;
+
+  TestIterCalculate(int * _status): status_(_status) {}
+
+  XGBOOST_DEVICE void operator()() {
+    this->operator()(0);
+  }
+  XGBOOST_DEVICE void operator()(int _idx) {
+    float arr[16];
+    InitializeRange(arr, arr + 16);
+
+    Span<float> s (arr);
+    Span<float>::iterator beg { s.begin() };
+
+    beg += 4;
+    SPAN_ASSERT_TRUE(*beg == 4, status_);
+
+    beg -= 2;
+    SPAN_ASSERT_TRUE(*beg == 2, status_);
+
+    ++beg;
+    SPAN_ASSERT_TRUE(*beg == 3, status_);
+
+    --beg;
+    SPAN_ASSERT_TRUE(*beg == 2, status_);
+
+    beg++;
+    beg--;
+    SPAN_ASSERT_TRUE(*beg == 2, status_);
+  }
+};
+
+struct TestIterCompare {
+  int * status_;
+
+  TestIterCompare(int * _status): status_(_status) {}
+
+  XGBOOST_DEVICE void operator()() {
+    this->operator()(0);
+  }
+  XGBOOST_DEVICE void operator()(int _idx) {
+    float arr[16];
+    InitializeRange(arr, arr + 16);
+    Span<float> s (arr);
+    Span<float>::iterator left { s.begin() };
+    Span<float>::iterator right { s.end() };
+
+    left += 1;
+    right -= 15;
+
+    SPAN_ASSERT_TRUE(left == right, status_);
+
+    SPAN_ASSERT_TRUE(left >= right, status_);
+    SPAN_ASSERT_TRUE(left <= right, status_);
+
+    ++right;
+    SPAN_ASSERT_TRUE(right > left, status_);
+    SPAN_ASSERT_TRUE(left < right, status_);
+    SPAN_ASSERT_TRUE(left <= right, status_);
+  }
+};
+
+struct TestAsBytes {
+  int * status_;
+
+  TestAsBytes(int * _status): status_(_status) {}
+
+  XGBOOST_DEVICE void operator()() {
+    this->operator()(0);
+  }
+  XGBOOST_DEVICE void operator()(int _idx) {
+    float arr[16];
+    InitializeRange(arr, arr + 16);
+
+    {
+      const Span<const float> s {arr};
+      const Span<const byte> bs = as_bytes(s);
+      SPAN_ASSERT_TRUE(bs.size() == s.size_bytes(), status_);
+      SPAN_ASSERT_TRUE(static_cast<const void*>(bs.data()) ==
+                       static_cast<const void*>(s.data()),
+                       status_);
+    }
+
+    {
+      Span<float> s;
+      const Span<const byte> bs = as_bytes(s);
+      SPAN_ASSERT_TRUE(bs.size() == s.size(), status_);
+      SPAN_ASSERT_TRUE(bs.size() == 0, status_);
+      SPAN_ASSERT_TRUE(bs.size_bytes() == 0, status_);
+      SPAN_ASSERT_TRUE(static_cast<const void*>(bs.data()) ==
+                       static_cast<const void*>(s.data()),
+                       status_);
+      SPAN_ASSERT_TRUE(bs.data() == nullptr, status_);
+    }
+  }
+};
+
+struct TestAsWritableBytes {
+  int * status_;
+
+  TestAsWritableBytes(int * _status): status_(_status) {}
+
+  XGBOOST_DEVICE void operator()() {
+    this->operator()(0);
+  }
+  XGBOOST_DEVICE void operator()(int _idx) {
+    float arr[16];
+    InitializeRange(arr, arr + 16);
+
+    {
+      Span<float> s;
+      Span<byte> bs = as_writable_bytes(s);
+      SPAN_ASSERT_TRUE(bs.size() == s.size(), status_);
+      SPAN_ASSERT_TRUE(bs.size_bytes() == s.size_bytes(), status_);
+      SPAN_ASSERT_TRUE(bs.size() == 0, status_);
+      SPAN_ASSERT_TRUE(bs.size_bytes() == 0, status_);
+      SPAN_ASSERT_TRUE(bs.data() == nullptr, status_);
+      SPAN_ASSERT_TRUE(static_cast<void*>(bs.data()) ==
+                       static_cast<void*>(s.data()), status_);
+    }
+
+    {
+      Span<float> s { arr };
+      Span<byte> bs { as_writable_bytes(s) };
+      SPAN_ASSERT_TRUE(s.size_bytes() == bs.size_bytes(), status_);
+      SPAN_ASSERT_TRUE(static_cast<void*>(bs.data()) ==
+                       static_cast<void*>(s.data()), status_);
+    }
+  }
+};
+
+}  // namespace common
+}  // namespace xgboost
+
+#endif
diff --git a/tests/cpp/data/test_simple_csr_source.cc b/tests/cpp/data/test_simple_csr_source.cc
index 459e4570d..8da8a9730 100644
--- a/tests/cpp/data/test_simple_csr_source.cc
+++ b/tests/cpp/data/test_simple_csr_source.cc
@@ -25,7 +25,7 @@ TEST(SimpleCSRSource, SaveLoadBinary) {
   row_iter_read->BeforeFirst(); row_iter_read->Next();
   auto first_row = row_iter->Value()[0];
   auto first_row_read = row_iter_read->Value()[0];
-  EXPECT_EQ(first_row.length, first_row_read.length);
+  EXPECT_EQ(first_row.size(), first_row_read.size());
   EXPECT_EQ(first_row[2].index, first_row_read[2].index);
   EXPECT_EQ(first_row[2].fvalue, first_row_read[2].fvalue);
   row_iter = nullptr; row_iter_read = nullptr;
diff --git a/tests/cpp/data/test_simple_dmatrix.cc b/tests/cpp/data/test_simple_dmatrix.cc
index 600c7d662..10ded726e 100644
--- a/tests/cpp/data/test_simple_dmatrix.cc
+++ b/tests/cpp/data/test_simple_dmatrix.cc
@@ -31,7 +31,7 @@ TEST(SimpleDMatrix, RowAccess) {
   row_iter->BeforeFirst();
   row_iter->Next();
   auto first_row = row_iter->Value()[0];
-  ASSERT_EQ(first_row.length, 3);
+  ASSERT_EQ(first_row.size(), 3);
   EXPECT_EQ(first_row[2].index, 2);
   EXPECT_EQ(first_row[2].fvalue, 20);
   row_iter = nullptr;
@@ -70,7 +70,7 @@ TEST(SimpleDMatrix, ColAccessWithoutBatches) {
     EXPECT_EQ(col_iter->Value().Size(), dmat->Info().num_col_)
       << "Expected batch size = number of cells as #batches is 1.";
     for (int i = 0; i < static_cast<int>(col_iter->Value().Size()); ++i) {
-      EXPECT_EQ(col_iter->Value()[i].length, dmat->GetColSize(i))
+      EXPECT_EQ(col_iter->Value()[i].size(), dmat->GetColSize(i))
         << "Expected length of each colbatch = colsize as #batches is 1.";
     }
   }
diff --git a/tests/cpp/data/test_sparse_page_dmatrix.cc b/tests/cpp/data/test_sparse_page_dmatrix.cc
index f08e1183a..572279c8c 100644
--- a/tests/cpp/data/test_sparse_page_dmatrix.cc
+++ b/tests/cpp/data/test_sparse_page_dmatrix.cc
@@ -40,7 +40,7 @@ TEST(SparsePageDMatrix, RowAccess) {
   row_iter->BeforeFirst();
   row_iter->Next();
   auto first_row = row_iter->Value()[0];
-  ASSERT_EQ(first_row.length, 3);
+  ASSERT_EQ(first_row.size(), 3);
   EXPECT_EQ(first_row[2].index, 2);
   EXPECT_EQ(first_row[2].fvalue, 20);
   row_iter = nullptr;