From 2c502784fffeb4aa35734d70ede47c98a8639fc2 Mon Sep 17 00:00:00 2001 From: trivialfis Date: Tue, 14 Aug 2018 13:58:11 +0800 Subject: [PATCH] Span class. (#3548) * Add basic Span class based on ISO++20. * Use Span instead of Inst in SparsePage. * Add DeviceSpan in HostDeviceVector, use it in regression obj. --- include/xgboost/data.h | 33 +- include/xgboost/tree_model.h | 4 +- src/c_api/c_api.cc | 8 +- src/common/hist_util.cc | 14 +- src/common/host_device_vector.cc | 5 + src/common/host_device_vector.cu | 12 + src/common/host_device_vector.h | 3 + src/common/span.h | 633 ++++++++++++++++++++ src/data/simple_dmatrix.cc | 14 +- src/data/simple_dmatrix.h | 2 +- src/gbm/gblinear.cc | 12 +- src/linear/coordinate_common.h | 10 +- src/linear/updater_gpu_coordinate.cu | 8 +- src/linear/updater_shotgun.cc | 12 +- src/objective/regression_obj_gpu.cu | 16 +- src/tree/updater_basemaker-inl.h | 14 +- src/tree/updater_colmaker.cc | 20 +- src/tree/updater_gpu.cu | 2 +- src/tree/updater_histmaker.cc | 64 +- src/tree/updater_skmaker.cc | 26 +- tests/cpp/c_api/test_c_api.cc | 4 +- tests/cpp/common/test_host_device_vector.cu | 22 + tests/cpp/common/test_span.cc | 423 +++++++++++++ tests/cpp/common/test_span.cu | 357 +++++++++++ tests/cpp/common/test_span.h | 339 +++++++++++ tests/cpp/data/test_simple_csr_source.cc | 2 +- tests/cpp/data/test_simple_dmatrix.cc | 4 +- tests/cpp/data/test_sparse_page_dmatrix.cc | 2 +- 28 files changed, 1927 insertions(+), 138 deletions(-) create mode 100644 src/common/span.h create mode 100644 tests/cpp/common/test_host_device_vector.cu create mode 100644 tests/cpp/common/test_span.cc create mode 100644 tests/cpp/common/test_span.cu create mode 100644 tests/cpp/common/test_span.h diff --git a/include/xgboost/data.h b/include/xgboost/data.h index 467572fa7..36e872ef1 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -15,6 +15,7 @@ #include #include #include "./base.h" +#include "../../src/common/span.h" namespace xgboost { // forward declare learner. @@ -133,7 +134,7 @@ struct Entry { /*! * \brief constructor with index and value * \param index The feature or row index. - * \param fvalue THe feature value. + * \param fvalue The feature value. */ Entry(bst_uint index, bst_float fvalue) : index(index), fvalue(fvalue) {} /*! \brief reversely compare feature values */ @@ -155,24 +156,14 @@ class SparsePage { std::vector data; size_t base_rowid; + /*! \brief an instance of sparse vector in the batch */ - struct Inst { - /*! \brief pointer to the elements*/ - const Entry *data{nullptr}; - /*! \brief length of the instance */ - bst_uint length{0}; - /*! \brief constructor */ - Inst() = default; - Inst(const Entry *data, bst_uint length) : data(data), length(length) {} - /*! \brief get i-th pair in the sparse vector*/ - inline const Entry& operator[](size_t i) const { - return data[i]; - } - }; + using Inst = common::Span; /*! \brief get i-th row from the batch */ inline Inst operator[](size_t i) const { - return {data.data() + offset[i], static_cast(offset[i + 1] - offset[i])}; + return {data.data() + offset[i], + static_cast(offset[i + 1] - offset[i])}; } /*! \brief constructor */ @@ -234,12 +225,12 @@ class SparsePage { * \param inst an instance row */ inline void Push(const Inst &inst) { - offset.push_back(offset.back() + inst.length); + offset.push_back(offset.back() + inst.size()); size_t begin = data.size(); - data.resize(begin + inst.length); - if (inst.length != 0) { - std::memcpy(dmlc::BeginPtr(data) + begin, inst.data, - sizeof(Entry) * inst.length); + data.resize(begin + inst.size()); + if (inst.size() != 0) { + std::memcpy(dmlc::BeginPtr(data) + begin, inst.data(), + sizeof(Entry) * inst.size()); } } @@ -328,7 +319,7 @@ class DMatrix { * \brief check if column access is supported, if not, initialize column access. * \param max_row_perbatch auxiliary information, maximum row used in each column batch. * this is a hint information that can be ignored by the implementation. - * \param sorted If column features should be in sorted order + * \param sorted If column features should be in sorted order * \return Number of column blocks in the column access. */ virtual void InitColAccess(size_t max_row_perbatch, bool sorted) = 0; diff --git a/include/xgboost/tree_model.h b/include/xgboost/tree_model.h index 1736a0a10..8729b74d3 100644 --- a/include/xgboost/tree_model.h +++ b/include/xgboost/tree_model.h @@ -574,14 +574,14 @@ inline void RegTree::FVec::Init(size_t size) { } inline void RegTree::FVec::Fill(const SparsePage::Inst& inst) { - for (bst_uint i = 0; i < inst.length; ++i) { + for (bst_uint i = 0; i < inst.size(); ++i) { if (inst[i].index >= data_.size()) continue; data_[inst[i].index].fvalue = inst[i].fvalue; } } inline void RegTree::FVec::Drop(const SparsePage::Inst& inst) { - for (bst_uint i = 0; i < inst.length; ++i) { + for (bst_uint i = 0; i < inst.size(); ++i) { if (inst[i].index >= data_.size()) continue; data_[inst[i].index].flag = -1; } diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index c64bed42e..506f52b41 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -687,10 +687,10 @@ XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle, const int ridx = idxset[i]; auto inst = batch[ridx]; CHECK_LT(static_cast(ridx), batch.Size()); - ret.page_.data.insert(ret.page_.data.end(), inst.data, - inst.data + inst.length); - ret.page_.offset.push_back(ret.page_.offset.back() + inst.length); - ret.info.num_nonzero_ += inst.length; + ret.page_.data.insert(ret.page_.data.end(), inst.data(), + inst.data() + inst.size()); + ret.page_.offset.push_back(ret.page_.offset.back() + inst.size()); + ret.info.num_nonzero_ += inst.size(); if (src.info.labels_.size() != 0) { ret.info.labels_.push_back(src.info.labels_[ridx]); diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index b32ba349c..c0bea5980 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -48,9 +48,9 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { for (size_t i = 0; i < batch.Size(); ++i) { // NOLINT(*) size_t ridx = batch.base_rowid + i; SparsePage::Inst inst = batch[i]; - for (bst_uint j = 0; j < inst.length; ++j) { - if (inst[j].index >= begin && inst[j].index < end) { - sketchs[inst[j].index].Push(inst[j].fvalue, info.GetWeight(ridx)); + for (auto& ins : inst) { + if (ins.index >= begin && ins.index < end) { + sketchs[ins.index].Push(ins.fvalue, info.GetWeight(ridx)); } } } @@ -140,7 +140,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) { auto &batch = iter->Value(); const size_t rbegin = row_ptr.size() - 1; for (size_t i = 0; i < batch.Size(); ++i) { - row_ptr.push_back(batch[i].length + row_ptr.back()); + row_ptr.push_back(batch[i].size() + row_ptr.back()); } index.resize(row_ptr.back()); @@ -154,9 +154,11 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) { size_t ibegin = row_ptr[rbegin + i]; size_t iend = row_ptr[rbegin + i + 1]; SparsePage::Inst inst = batch[i]; - CHECK_EQ(ibegin + inst.length, iend); - for (bst_uint j = 0; j < inst.length; ++j) { + + CHECK_EQ(ibegin + inst.size(), iend); + for (bst_uint j = 0; j < inst.size(); ++j) { uint32_t idx = cut.GetBinIdx(inst[j]); + index[ibegin + j] = idx; ++hit_count_tloc_[tid * nbins + idx]; } diff --git a/src/common/host_device_vector.cc b/src/common/host_device_vector.cc index e0263de04..f30196b1b 100644 --- a/src/common/host_device_vector.cc +++ b/src/common/host_device_vector.cc @@ -53,6 +53,11 @@ GPUSet HostDeviceVector::Devices() const { return GPUSet::Empty(); } template T* HostDeviceVector::DevicePointer(int device) { return nullptr; } +template +common::Span HostDeviceVector::DeviceSpan(int device) { + return common::Span(); +} + template std::vector& HostDeviceVector::HostVector() { return impl_->data_h_; } diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu index a474be7a0..e0d7dbb85 100644 --- a/src/common/host_device_vector.cu +++ b/src/common/host_device_vector.cu @@ -156,6 +156,13 @@ struct HostDeviceVectorImpl { return shards_[devices_.Index(device)].data_.data().get(); } + common::Span DeviceSpan(int device) { + CHECK(devices_.Contains(device)); + LazySyncDevice(device); + return { shards_[devices_.Index(device)].data_.data().get(), + static_cast::index_type>(Size()) }; + } + size_t DeviceSize(int device) { CHECK(devices_.Contains(device)); LazySyncDevice(device); @@ -323,6 +330,11 @@ GPUSet HostDeviceVector::Devices() const { return impl_->Devices(); } template T* HostDeviceVector::DevicePointer(int device) { return impl_->DevicePointer(device); } +template +common::Span HostDeviceVector::DeviceSpan(int device) { + return impl_->DeviceSpan(device); +} + template size_t HostDeviceVector::DeviceStart(int device) { return impl_->DeviceStart(device); } diff --git a/src/common/host_device_vector.h b/src/common/host_device_vector.h index 5d9762511..ebd54e849 100644 --- a/src/common/host_device_vector.h +++ b/src/common/host_device_vector.h @@ -11,6 +11,8 @@ #include #include +#include "span.h" + // only include thrust-related files if host_device_vector.h // is included from a .cu file #ifdef __CUDACC__ @@ -117,6 +119,7 @@ class HostDeviceVector { size_t Size() const; GPUSet Devices() const; T* DevicePointer(int device); + common::Span DeviceSpan(int device); T* HostPointer() { return HostVector().data(); } size_t DeviceStart(int device); diff --git a/src/common/span.h b/src/common/span.h new file mode 100644 index 000000000..173cb095c --- /dev/null +++ b/src/common/span.h @@ -0,0 +1,633 @@ +/*! + * Copyright 2018 XGBoost contributors + * \brief span class based on ISO++20 span + * + * About NOLINTs in this file: + * + * If we want Span to work with std interface, like range for loop, the + * naming must be consistant with std, not XGBoost. Also, the interface also + * conflicts with XGBoost coding style, specifically, the use of `explicit' + * keyword. + * + * + * Some of the code is copied from Guidelines Support Library, here is the + * license: + * + * Copyright (c) 2015 Microsoft Corporation. All rights reserved. + * + * This code is licensed under the MIT License (MIT). + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef XGBOOST_COMMON_SPAN_H_ +#define XGBOOST_COMMON_SPAN_H_ + +#include // CHECK + +#include // int64_t +#include // remove_cv_t + +/*! + * The version number 1910 is picked up from GSL. + * + * We might want to use MOODYCAMEL_NOEXCEPT from dmlc/concurrentqueue.h. But + * there are a lot more definitions in that file would cause warnings/troubles + * in MSVC 2013. Currently we try to keep the closure of Span as minimal as + * possible. + * + * There are other workarounds for MSVC, like _Unwrapped, _Verify_range ... + * Some of these are hiden magics of MSVC and I tried to avoid them. Should any + * of them become needed, please consult the source code of GSL, and possibily + * some explanations from this thread: + * + * https://github.com/Microsoft/GSL/pull/664 + * + * FIXME: Group these MSVC workarounds into a manageable place. + */ +#if defined(_MSC_VER) && _MSC_VER < 1910 + +#define __span_noexcept + +#pragma push_macro("constexpr") +#define constexpr /*constexpr*/ + +#else + +#define __span_noexcept noexcept + +#endif + +namespace xgboost { +namespace common { + +// Usual logging facility is not available inside device code. +// FIXME: Make dmlc check more generic. +#define KERNEL_CHECK(cond) \ + do { \ + if (!(cond)) { \ + printf("\nKernel error:\n" \ + "In: %s, \tline: %d\n" \ + "\t%s\n\tExpecting: %s\n", \ + __FILE__, __LINE__, __PRETTY_FUNCTION__, # cond); \ + asm("trap;"); \ + } \ + } while (0); \ + +#ifdef __CUDA_ARCH__ +#define SPAN_CHECK KERNEL_CHECK +#else +#define SPAN_CHECK CHECK // check from dmlc +#endif + +namespace detail { +/*! + * By default, XGBoost uses uint32_t for indexing data. int64_t covers all + * values uint32_t can represent. Also, On x86-64 Linux, GCC uses long int to + * represent ptrdiff_t, which is just int64_t. So we make it determinstic + * here. + */ +using ptrdiff_t = int64_t; // NOLINT +} // namespace detail + +#if defined(_MSC_VER) && _MSC_VER < 1910 +constexpr const detail::ptrdiff_t dynamic_extent = -1; // NOLINT +#else +constexpr detail::ptrdiff_t dynamic_extent = -1; // NOLINT +#endif + +enum class byte : unsigned char {}; // NOLINT + +namespace detail { + +template +class Span; + +template +class SpanIterator { + using ElementType = typename SpanType::element_type; + + public: + using iterator_category = std::random_access_iterator_tag; // NOLINT + using value_type = typename std::remove_cv::type; // NOLINT + using difference_type = typename SpanType::index_type; // NOLINT + + using reference = typename std::conditional< // NOLINT + IsConst, const ElementType, ElementType>::type&; + using pointer = typename std::add_pointer::type&; // NOLINT + + XGBOOST_DEVICE constexpr SpanIterator() : span_{nullptr}, index_{0} {} + + XGBOOST_DEVICE constexpr SpanIterator( + const SpanType* _span, + typename SpanType::index_type _idx) __span_noexcept : + span_(_span), index_(_idx) {} + + friend SpanIterator; + template ::type* = nullptr> + XGBOOST_DEVICE constexpr SpanIterator( // NOLINT + const SpanIterator& other_) __span_noexcept + : SpanIterator(other_.span_, other_.index_) {} + + XGBOOST_DEVICE reference operator*() const { + SPAN_CHECK(index_ < span_->size()); + return *(span_->data() + index_); + } + + XGBOOST_DEVICE pointer operator->() const { + SPAN_CHECK(index_ != span_->size()); + return span_->data() + index_; + } + + XGBOOST_DEVICE SpanIterator& operator++() { + SPAN_CHECK(0 <= index_ && index_ != span_->size()); + index_++; + return *this; + } + + XGBOOST_DEVICE SpanIterator operator++(int) { + auto ret = *this; + ++(*this); + return ret; + } + + XGBOOST_DEVICE SpanIterator& operator--() { + SPAN_CHECK(index_ != 0 && index_ <= span_->size()); + index_--; + return *this; + } + + XGBOOST_DEVICE SpanIterator operator--(int) { + auto ret = *this; + --(*this); + return ret; + } + + XGBOOST_DEVICE SpanIterator operator+(difference_type n) const { + auto ret = *this; + return ret += n; + } + + XGBOOST_DEVICE SpanIterator& operator+=(difference_type n) { + SPAN_CHECK((index_ + n) >= 0 && (index_ + n) <= span_->size()); + index_ += n; + return *this; + } + + XGBOOST_DEVICE difference_type operator-(SpanIterator rhs) const { + SPAN_CHECK(span_ == rhs.span_); + return index_ - rhs.index_; + } + + XGBOOST_DEVICE SpanIterator operator-(difference_type n) const { + auto ret = *this; + return ret -= n; + } + + XGBOOST_DEVICE SpanIterator& operator-=(difference_type n) { + return *this += -n; + } + + // friends + XGBOOST_DEVICE constexpr friend bool operator==( + SpanIterator _lhs, SpanIterator _rhs) __span_noexcept { + return _lhs.span_ == _rhs.span_ && _lhs.index_ == _rhs.index_; + } + + XGBOOST_DEVICE constexpr friend bool operator!=( + SpanIterator _lhs, SpanIterator _rhs) __span_noexcept { + return !(_lhs == _rhs); + } + + XGBOOST_DEVICE constexpr friend bool operator<( + SpanIterator _lhs, SpanIterator _rhs) __span_noexcept { + return _lhs.index_ < _rhs.index_; + } + + XGBOOST_DEVICE constexpr friend bool operator<=( + SpanIterator _lhs, SpanIterator _rhs) __span_noexcept { + return !(_rhs < _lhs); + } + + XGBOOST_DEVICE constexpr friend bool operator>( + SpanIterator _lhs, SpanIterator _rhs) __span_noexcept { + return _rhs < _lhs; + } + + XGBOOST_DEVICE constexpr friend bool operator>=( + SpanIterator _lhs, SpanIterator _rhs) __span_noexcept { + return !(_rhs > _lhs); + } + + protected: + const SpanType *span_; + detail::ptrdiff_t index_; +}; + + +// It's tempting to use constexpr instead of structs to do the following meta +// programming. But remember that we are supporting MSVC 2013 here. + +/*! + * The extent E of the span returned by subspan is determined as follows: + * + * - If Count is not std::dynamic_extent, Count; + * - Otherwise, if Extent is not std::dynamic_extent, Extent - Offset; + * - Otherwise, std::dynamic_extent. + */ +template +struct ExtentValue : public std::integral_constant< + detail::ptrdiff_t, Count != dynamic_extent ? + Count : (Extent != dynamic_extent ? Extent - Offset : Extent)> {}; + +/*! + * If N is dynamic_extent, the extent of the returned span E is also + * dynamic_extent; otherwise it is detail::ptrdiff_t(sizeof(T)) * N. + */ +template +struct ExtentAsBytesValue : public std::integral_constant< + detail::ptrdiff_t, + Extent == dynamic_extent ? + Extent : static_cast(sizeof(T) * Extent)> {}; + +template +struct IsAllowedExtentConversion : public std::integral_constant< + bool, From == To || From == dynamic_extent || To == dynamic_extent> {}; + +template +struct IsAllowedElementTypeConversion : public std::integral_constant< + bool, std::is_convertible::value> {}; + +template +struct IsSpanOracle : std::false_type {}; + +template +struct IsSpanOracle> : std::true_type {}; + +template +struct IsSpan : public IsSpanOracle::type> {}; + +// Re-implement std algorithms here to adopt CUDA. +template +struct Less { + XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const { + return _x < _y; + } +}; + +template +struct Greater { + XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const { + return _x > _y; + } +}; + +template ().operator*())>> +XGBOOST_DEVICE bool LexicographicalCompare(InputIt1 first1, InputIt1 last1, + InputIt2 first2, InputIt2 last2) { + Compare comp; + for (; first1 != last1 && first2 != last2; ++first1, ++first2) { + if (comp(*first1, *first2)) { + return true; + } + if (comp(*first2, *first1)) { + return false; + } + } + return first1 == last1 && first2 != last2; +} + +} // namespace detail + + +/*! + * \brief span class implementation, based on ISO++20 span. The interface + * should be the same. + * + * What's different from span in Guidelines Support Library (GSL) + * + * Interface might be slightly different, we stick with ISO. + * + * GSL uses C++14/17 features, which are not available here. + * GSL uses constexpr extensively, which is not possibile with limitation + * of C++11. + * GSL doesn't concern about CUDA. + * + * GSL is more thoroughly implemented and tested. + * GSL is more optimized, especially for static extent. + * + * GSL uses __buildin_unreachable() when error, Span uses dmlc LOG and + * customized CUDA logging. + * + * + * What's different from span in ISO++20 (ISO) + * + * ISO uses functions/structs from std library, which might be not available + * in CUDA. + * Initializing from std::array is not supported. + * + * ISO uses constexpr extensively, which is not possibile with limitation + * of C++11. + * ISO uses C++14/17 features, which is not available here. + * ISO doesn't concern about CUDA. + * + * ISO uses std::terminate(), Span uses dmlc LOG and customized CUDA + * logging. + * + * + * Limitations: + * With thrust: + * It's not adviced to initialize Span with host_vector directly, since + * host_vector::data() is a host function. + * It's not possible to initialize Span with device_vector directly, since + * device_vector::data() returns a wrapped pointer. + * It's unclear that what kind of thrust algorithm can be used without + * memory error. See the test case "GPUSpan.WithTrust" + * + * Pass iterator to kernel: + * Not possible. Use subspan instead. + * + * The underlying Span in SpanIterator is a pointer, but CUDA pass kernel + * parameter by value. If we were to hold a Span value instead of a + * pointer, the following snippet will crash, violating the safety + * purpose of Span: + * + * \code{.cpp} + * Span span {arr_a}; + * auto beg = span.begin(); + * + * Span span_b = arr_b; + * span = span_b; + * + * delete arr_a; + * beg++; // crash + * \endcode + * + * While hoding a pointer or reference should avoid the problem, its a + * compromise. Since we have subspan, it's acceptable not to support + * passing iterator. + */ +template +class Span { + public: + using element_type = T; // NOLINT + using value_type = typename std::remove_cv::type; // NOLINT + using index_type = detail::ptrdiff_t; // NOLINT + using difference_type = detail::ptrdiff_t; // NOLINT + using pointer = T*; // NOLINT + using reference = T&; // NOLINT + + using iterator = detail::SpanIterator, false>; // NOLINT + using const_iterator = const detail::SpanIterator, true>; // NOLINT + using reverse_iterator = detail::SpanIterator, false>; // NOLINT + using const_reverse_iterator = const detail::SpanIterator, true>; // NOLINT + + // constructors + + XGBOOST_DEVICE constexpr Span() __span_noexcept : size_(0), data_(nullptr) {} + + XGBOOST_DEVICE Span(pointer _ptr, index_type _count) : + size_(_count), data_(_ptr) { + SPAN_CHECK(_count >= 0); + SPAN_CHECK(_ptr || _count == 0); + } + + XGBOOST_DEVICE Span(pointer _first, pointer _last) : + size_(_last - _first), data_(_first) { + SPAN_CHECK(size_ >= 0); + SPAN_CHECK(data_ || size_ == 0); + } + + template + XGBOOST_DEVICE constexpr Span(element_type (&arr)[N]) // NOLINT + __span_noexcept : size_(N), data_(&arr[0]) {} + + template ::value && !detail::IsSpan::value && + std::is_convertible::value && + std::is_convertible< + typename Container::pointer, + decltype(std::declval().data())>::value>> + XGBOOST_DEVICE Span(Container& _cont) : // NOLINT + size_(_cont.size()), data_(_cont.data()) {} + + template ::value && !detail::IsSpan::value && + std::is_convertible::value && + std::is_convertible< + typename Container::pointer, + decltype(std::declval().data())>::value>> + XGBOOST_DEVICE Span(const Container& _cont) : size_(_cont.size()), // NOLINT + data_(_cont.data()) {} + + template ::value && + detail::IsAllowedExtentConversion::value>> + XGBOOST_DEVICE constexpr Span(const Span& _other) // NOLINT + __span_noexcept : size_(_other.size()), data_(_other.data()) {} + + XGBOOST_DEVICE constexpr Span(const Span& _other) + __span_noexcept : size_(_other.size()), data_(_other.data()) {} + + XGBOOST_DEVICE Span& operator=(const Span& _other) __span_noexcept { + size_ = _other.size(); + data_ = _other.data(); + return *this; + } + + XGBOOST_DEVICE ~Span() __span_noexcept {}; // NOLINT + + XGBOOST_DEVICE constexpr iterator begin() const __span_noexcept { // NOLINT + return {this, 0}; + } + + XGBOOST_DEVICE constexpr iterator end() const __span_noexcept { // NOLINT + return {this, size()}; + } + + XGBOOST_DEVICE constexpr const_iterator cbegin() const __span_noexcept { // NOLINT + return {this, 0}; + } + + XGBOOST_DEVICE constexpr const_iterator cend() const __span_noexcept { // NOLINT + return {this, size()}; + } + + XGBOOST_DEVICE constexpr reverse_iterator rbegin() const __span_noexcept { // NOLINT + return reverse_iterator{end()}; + } + + XGBOOST_DEVICE constexpr reverse_iterator rend() const __span_noexcept { // NOLINT + return reverse_iterator{begin()}; + } + + XGBOOST_DEVICE constexpr const_reverse_iterator crbegin() const __span_noexcept { // NOLINT + return const_reverse_iterator{cend()}; + } + + XGBOOST_DEVICE constexpr const_reverse_iterator crend() const __span_noexcept { // NOLINT + return const_reverse_iterator{cbegin()}; + } + + XGBOOST_DEVICE reference operator[](index_type _idx) const { + SPAN_CHECK(_idx >= 0 && _idx < size()); + return data()[_idx]; + } + + XGBOOST_DEVICE constexpr reference operator()(index_type _idx) const { + return this->operator[](_idx); + } + + XGBOOST_DEVICE constexpr pointer data() const __span_noexcept { // NOLINT + return data_; + } + + // Observers + XGBOOST_DEVICE constexpr index_type size() const __span_noexcept { // NOLINT + return size_; + } + XGBOOST_DEVICE constexpr index_type size_bytes() const __span_noexcept { // NOLINT + return size() * sizeof(T); + } + + XGBOOST_DEVICE constexpr bool empty() const __span_noexcept { // NOLINT + return size() == 0; + } + + // Subviews + template + XGBOOST_DEVICE Span first() const { // NOLINT + SPAN_CHECK(Count >= 0 && Count <= size()); + return {data(), Count}; + } + + XGBOOST_DEVICE Span first( // NOLINT + detail::ptrdiff_t _count) const { + SPAN_CHECK(_count >= 0 && _count <= size()); + return {data(), _count}; + } + + template + XGBOOST_DEVICE Span last() const { // NOLINT + SPAN_CHECK(Count >=0 && size() - Count >= 0); + return {data() + size() - Count, Count}; + } + + XGBOOST_DEVICE Span last( // NOLINT + detail::ptrdiff_t _count) const { + SPAN_CHECK(_count >= 0 && _count <= size()); + return subspan(size() - _count, _count); + } + + /*! + * If Count is std::dynamic_extent, r.size() == this->size() - Offset; + * Otherwise r.size() == Count. + */ + template < detail::ptrdiff_t Offset, + detail::ptrdiff_t Count = dynamic_extent > + XGBOOST_DEVICE auto subspan() const -> // NOLINT + Span::value> { + SPAN_CHECK(Offset >= 0 && Offset < size()); + SPAN_CHECK(Count == dynamic_extent || + Count >= 0 && Offset + Count <= size()); + + return {data() + Offset, Count == dynamic_extent ? size() - Offset : Count}; + } + + XGBOOST_DEVICE Span subspan( // NOLINT + detail::ptrdiff_t _offset, + detail::ptrdiff_t _count = dynamic_extent) const { + SPAN_CHECK(_offset >= 0 && _offset < size()); + SPAN_CHECK(_count == dynamic_extent || + _count >= 0 && _offset + _count <= size()); + + return {data() + _offset, _count == + dynamic_extent ? size() - _offset : _count}; + } + + private: + index_type size_; + pointer data_; +}; + +template +XGBOOST_DEVICE bool operator==(Span l, Span r) { + if (l.size() != r.size()) { + return false; + } + for (auto l_beg = l.cbegin(), r_beg = r.cbegin(); l_beg != l.cend(); + ++l_beg, ++r_beg) { + if (*l_beg != *r_beg) { + return false; + } + } + return true; +} + +template +XGBOOST_DEVICE constexpr bool operator!=(Span l, Span r) { + return !(l == r); +} + +template +XGBOOST_DEVICE constexpr bool operator<(Span l, Span r) { + return detail::LexicographicalCompare(l.begin(), l.end(), + r.begin(), r.end()); +} + +template +XGBOOST_DEVICE constexpr bool operator<=(Span l, Span r) { + return !(l > r); +} + +template +XGBOOST_DEVICE constexpr bool operator>(Span l, Span r) { + return detail::LexicographicalCompare< + typename Span::iterator, typename Span::iterator, + detail::Greater::element_type>>(l.begin(), l.end(), + r.begin(), r.end()); +} + +template +XGBOOST_DEVICE constexpr bool operator>=(Span l, Span r) { + return !(l < r); +} + +template +XGBOOST_DEVICE auto as_bytes(Span s) __span_noexcept -> // NOLINT + Span::value> { + return {reinterpret_cast(s.data()), s.size_bytes()}; +} + +template +XGBOOST_DEVICE auto as_writable_bytes(Span s) __span_noexcept -> // NOLINT + Span::value> { + return {reinterpret_cast(s.data()), s.size_bytes()}; +} + +} // namespace common +} // namespace xgboost + +#if defined(_MSC_VER) &&_MSC_VER < 1910 +#undef constexpr +#pragma pop_macro("constexpr") +#undef __span_noexcept +#endif // _MSC_VER < 1910 + +#endif // XGBOOST_COMMON_SPAN_H_ diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc index 59cc32da3..c14faf0ce 100644 --- a/src/data/simple_dmatrix.cc +++ b/src/data/simple_dmatrix.cc @@ -58,8 +58,8 @@ void SimpleDMatrix::MakeOneBatch(SparsePage* pcol, bool sorted) { for (long i = 0; i < batch_size; ++i) { // NOLINT(*) int tid = omp_get_thread_num(); auto inst = batch[i]; - for (bst_uint j = 0; j < inst.length; ++j) { - builder.AddBudget(inst[j].index, tid); + for (auto& ins : inst) { + builder.AddBudget(ins.index, tid); } } } @@ -72,11 +72,11 @@ void SimpleDMatrix::MakeOneBatch(SparsePage* pcol, bool sorted) { for (long i = 0; i < static_cast(batch.Size()); ++i) { // NOLINT(*) int tid = omp_get_thread_num(); auto inst = batch[i]; - for (bst_uint j = 0; j < inst.length; ++j) { - builder.Push( - inst[j].index, - Entry(static_cast(batch.base_rowid + i), inst[j].fvalue), - tid); + for (auto& ins : inst) { + builder.Push(ins.index, + Entry(static_cast(batch.base_rowid + i), + ins.fvalue), + tid); } } } diff --git a/src/data/simple_dmatrix.h b/src/data/simple_dmatrix.h index b0d49fe52..c1d1babdd 100644 --- a/src/data/simple_dmatrix.h +++ b/src/data/simple_dmatrix.h @@ -45,7 +45,7 @@ class SimpleDMatrix : public DMatrix { size_t GetColSize(size_t cidx) const override { auto& batch = *col_iter_.column_page_; - return batch[cidx].length; + return batch[cidx].size(); } float GetColDensity(size_t cidx) const override { diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc index 7f6d424ac..6a432c057 100644 --- a/src/gbm/gblinear.cc +++ b/src/gbm/gblinear.cc @@ -166,9 +166,9 @@ class GBLinear : public GradientBooster { for (int gid = 0; gid < ngroup; ++gid) { bst_float *p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns]; // calculate linear terms' contributions - for (bst_uint c = 0; c < inst.length; ++c) { - if (inst[c].index >= model_.param.num_feature) continue; - p_contribs[inst[c].index] = inst[c].fvalue * model_[inst[c].index][gid]; + for (auto& ins : inst) { + if (ins.index >= model_.param.num_feature) continue; + p_contribs[ins.index] = ins.fvalue * model_[ins.index][gid]; } // add base margin to BIAS p_contribs[ncolumns - 1] = model_.bias()[gid] + @@ -268,9 +268,9 @@ class GBLinear : public GradientBooster { inline void Pred(const SparsePage::Inst &inst, bst_float *preds, int gid, bst_float base) { bst_float psum = model_.bias()[gid] + base; - for (bst_uint i = 0; i < inst.length; ++i) { - if (inst[i].index >= model_.param.num_feature) continue; - psum += inst[i].fvalue * model_[inst[i].index][gid]; + for (const auto& ins : inst) { + if (ins.index >= model_.param.num_feature) continue; + psum += ins.fvalue * model_[ins.index][gid]; } preds[gid] = psum; } diff --git a/src/linear/coordinate_common.h b/src/linear/coordinate_common.h index 18d87fbf1..72b0c9802 100644 --- a/src/linear/coordinate_common.h +++ b/src/linear/coordinate_common.h @@ -69,7 +69,7 @@ inline std::pair GetGradient(int group_idx, int num_group, int f while (iter->Next()) { auto &batch = iter->Value(); auto col = batch[fidx]; - const auto ndata = static_cast(col.length); + const auto ndata = static_cast(col.size()); for (bst_omp_uint j = 0; j < ndata; ++j) { const bst_float v = col[j].fvalue; auto &p = gpair[col[j].index * num_group + group_idx]; @@ -100,7 +100,7 @@ inline std::pair GetGradientParallel(int group_idx, int num_grou while (iter->Next()) { auto &batch = iter->Value(); auto col = batch[fidx]; - const auto ndata = static_cast(col.length); + const auto ndata = static_cast(col.size()); #pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess) for (bst_omp_uint j = 0; j < ndata; ++j) { const bst_float v = col[j].fvalue; @@ -159,7 +159,7 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group, auto &batch = iter->Value(); auto col = batch[fidx]; // update grad value - const auto num_row = static_cast(col.length); + const auto num_row = static_cast(col.size()); #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < num_row; ++j) { GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx]; @@ -331,7 +331,7 @@ class GreedyFeatureSelector : public FeatureSelector { #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nfeat; ++i) { const auto col = batch[i]; - const bst_uint ndata = col.length; + const bst_uint ndata = col.size(); auto &sums = gpair_sums_[group_idx * nfeat + i]; for (bst_uint j = 0u; j < ndata; ++j) { const bst_float v = col[j].fvalue; @@ -399,7 +399,7 @@ class ThriftyFeatureSelector : public FeatureSelector { #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nfeat; ++i) { const auto col = batch[i]; - const bst_uint ndata = col.length; + const bst_uint ndata = col.size(); for (bst_uint gid = 0u; gid < ngroup; ++gid) { auto &sums = gpair_sums_[gid * nfeat + i]; for (bst_uint j = 0u; j < ndata; ++j) { diff --git a/src/linear/updater_gpu_coordinate.cu b/src/linear/updater_gpu_coordinate.cu index cf4e47c61..ca1536cd1 100644 --- a/src/linear/updater_gpu_coordinate.cu +++ b/src/linear/updater_gpu_coordinate.cu @@ -118,13 +118,13 @@ class DeviceShard { return e1.index < e2.index; }; auto column_begin = - std::lower_bound(col.data, col.data + col.length, + std::lower_bound(col.data(), col.data() + col.size(), Entry(row_begin, 0.0f), cmp); auto column_end = - std::upper_bound(col.data, col.data + col.length, + std::upper_bound(col.data(), col.data() + col.size(), Entry(row_end, 0.0f), cmp); column_segments.push_back( - std::make_pair(column_begin - col.data, column_end - col.data)); + std::make_pair(column_begin - col.data(), column_end - col.data())); row_ptr_.push_back(row_ptr_.back() + column_end - column_begin); } ba_.Allocate(device_idx, param.silent, &data_, row_ptr_.back(), &gpair_, @@ -134,7 +134,7 @@ class DeviceShard { auto col = batch[fidx]; auto seg = column_segments[fidx]; dh::safe_cuda(cudaMemcpy( - data_.Data() + row_ptr_[fidx], col.data + seg.first, + data_.Data() + row_ptr_[fidx], col.data() + seg.first, sizeof(Entry) * (seg.second - seg.first), cudaMemcpyHostToDevice)); } // Rescale indices with respect to current shard diff --git a/src/linear/updater_shotgun.cc b/src/linear/updater_shotgun.cc index 11b91cbce..fc666cfa1 100644 --- a/src/linear/updater_shotgun.cc +++ b/src/linear/updater_shotgun.cc @@ -92,10 +92,10 @@ class ShotgunUpdater : public LinearUpdater { auto col = batch[ii]; for (int gid = 0; gid < ngroup; ++gid) { double sum_grad = 0.0, sum_hess = 0.0; - for (bst_uint j = 0; j < col.length; ++j) { - GradientPair &p = gpair[col[j].index * ngroup + gid]; + for (auto& c : col) { + GradientPair &p = gpair[c.index * ngroup + gid]; if (p.GetHess() < 0.0f) continue; - const bst_float v = col[j].fvalue; + const bst_float v = c.fvalue; sum_grad += p.GetGrad() * v; sum_hess += p.GetHess() * v * v; } @@ -107,10 +107,10 @@ class ShotgunUpdater : public LinearUpdater { if (dw == 0.f) continue; w += dw; // update grad values - for (bst_uint j = 0; j < col.length; ++j) { - GradientPair &p = gpair[col[j].index * ngroup + gid]; + for (auto& c : col) { + GradientPair &p = gpair[c.index * ngroup + gid]; if (p.GetHess() < 0.0f) continue; - p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0); + p += GradientPair(p.GetHess() * c.fvalue * dw, 0); } } } diff --git a/src/objective/regression_obj_gpu.cu b/src/objective/regression_obj_gpu.cu index 5d7c21ebd..d4ac49bfe 100644 --- a/src/objective/regression_obj_gpu.cu +++ b/src/objective/regression_obj_gpu.cu @@ -12,6 +12,7 @@ #include #include +#include "../common/span.h" #include "../common/device_helpers.cuh" #include "../common/host_device_vector.h" #include "./regression_loss.h" @@ -44,8 +45,8 @@ struct GPURegLossParam : public dmlc::Parameter { // GPU kernel for gradient computation template __global__ void get_gradient_k -(GradientPair *__restrict__ out_gpair, unsigned int *__restrict__ label_correct, - const float * __restrict__ preds, const float * __restrict__ labels, +(common::Span out_gpair, common::Span label_correct, + common::Span preds, common::Span labels, const float * __restrict__ weights, int n, float scale_pos_weight) { int i = threadIdx.x + blockIdx.x * blockDim.x; if (i >= n) @@ -56,14 +57,14 @@ __global__ void get_gradient_k if (label == 1.0f) w *= scale_pos_weight; if (!Loss::CheckLabel(label)) - atomicAnd(label_correct, 0); + atomicAnd(label_correct.data(), 0); out_gpair[i] = GradientPair (Loss::FirstOrderGradient(p, label) * w, Loss::SecondOrderGradient(p, label) * w); } // GPU kernel for predicate transformation template -__global__ void pred_transform_k(float * __restrict__ preds, int n) { +__global__ void pred_transform_k(common::Span preds, int n) { int i = threadIdx.x + blockIdx.x * blockDim.x; if (i >= n) return; @@ -144,8 +145,8 @@ class GPURegLossObj : public ObjFunction { size_t n = preds->DeviceSize(d); if (n > 0) { get_gradient_k<<>> - (out_gpair->DevicePointer(d), label_correct_.DevicePointer(d), - preds->DevicePointer(d), labels_.DevicePointer(d), + (out_gpair->DeviceSpan(d), label_correct_.DeviceSpan(d), + preds->DeviceSpan(d), labels_.DeviceSpan(d), info.weights_.size() > 0 ? weights_.DevicePointer(d) : nullptr, n, param_.scale_pos_weight); dh::safe_cuda(cudaGetLastError()); @@ -180,7 +181,8 @@ class GPURegLossObj : public ObjFunction { const int block = 256; size_t n = preds->DeviceSize(d); if (n > 0) { - pred_transform_k<<>>(preds->DevicePointer(d), n); + pred_transform_k<<>>( + preds->DeviceSpan(d), n); dh::safe_cuda(cudaGetLastError()); } dh::safe_cuda(cudaDeviceSynchronize()); diff --git a/src/tree/updater_basemaker-inl.h b/src/tree/updater_basemaker-inl.h index 06ca474ba..55ff2c774 100644 --- a/src/tree/updater_basemaker-inl.h +++ b/src/tree/updater_basemaker-inl.h @@ -49,9 +49,9 @@ class BaseMaker: public TreeUpdater { auto &batch = iter->Value(); for (bst_uint fid = 0; fid < batch.Size(); ++fid) { auto c = batch[fid]; - if (c.length != 0) { + if (c.size() != 0) { fminmax_[fid * 2 + 0] = std::max(-c[0].fvalue, fminmax_[fid * 2 + 0]); - fminmax_[fid * 2 + 1] = std::max(c[c.length - 1].fvalue, fminmax_[fid * 2 + 1]); + fminmax_[fid * 2 + 1] = std::max(c[c.size() - 1].fvalue, fminmax_[fid * 2 + 1]); } } } @@ -106,9 +106,9 @@ class BaseMaker: public TreeUpdater { inline static int NextLevel(const SparsePage::Inst &inst, const RegTree &tree, int nid) { const RegTree::Node &n = tree[nid]; bst_uint findex = n.SplitIndex(); - for (unsigned i = 0; i < inst.length; ++i) { - if (findex == inst[i].index) { - if (inst[i].fvalue < n.SplitCond()) { + for (const auto& ins : inst) { + if (findex == ins.index) { + if (ins.fvalue < n.SplitCond()) { return n.LeftChild(); } else { return n.RightChild(); @@ -250,7 +250,7 @@ class BaseMaker: public TreeUpdater { auto it = std::lower_bound(sorted_split_set.begin(), sorted_split_set.end(), fid); if (it != sorted_split_set.end() && *it == fid) { - const auto ndata = static_cast(col.length); + const auto ndata = static_cast(col.size()); #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < ndata; ++j) { const bst_uint ridx = col[j].index; @@ -308,7 +308,7 @@ class BaseMaker: public TreeUpdater { auto &batch = iter->Value(); for (auto fid : fsplits) { auto col = batch[fid]; - const auto ndata = static_cast(col.length); + const auto ndata = static_cast(col.size()); #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < ndata; ++j) { const bst_uint ridx = col[j].index; diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc index a87d96b65..d4eaab7af 100644 --- a/src/tree/updater_colmaker.cc +++ b/src/tree/updater_colmaker.cc @@ -269,7 +269,7 @@ class ColMaker: public TreeUpdater { const std::vector &gpair) { // TODO(tqchen): double check stats order. const MetaInfo& info = fmat.Info(); - const bool ind = col.length != 0 && col.data[0].fvalue == col.data[col.length - 1].fvalue; + const bool ind = col.size() != 0 && col[0].fvalue == col[col.size() - 1].fvalue; bool need_forward = param_.NeedForwardSearch(fmat.GetColDensity(fid), ind); bool need_backward = param_.NeedBackwardSearch(fmat.GetColDensity(fid), ind); const std::vector &qexpand = qexpand_; @@ -281,8 +281,8 @@ class ColMaker: public TreeUpdater { for (int j : qexpand) { temp[j].stats.Clear(); } - bst_uint step = (col.length + this->nthread_ - 1) / this->nthread_; - bst_uint end = std::min(col.length, step * (tid + 1)); + bst_uint step = (col.size() + this->nthread_ - 1) / this->nthread_; + bst_uint end = std::min(static_cast(col.size()), step * (tid + 1)); for (bst_uint i = tid * step; i < end; ++i) { const bst_uint ridx = col[i].index; const int nid = position_[ridx]; @@ -363,8 +363,8 @@ class ColMaker: public TreeUpdater { GradStats c(param_), cright(param_); const int tid = omp_get_thread_num(); std::vector &temp = stemp_[tid]; - bst_uint step = (col.length + this->nthread_ - 1) / this->nthread_; - bst_uint end = std::min(col.length, step * (tid + 1)); + bst_uint step = (col.size() + this->nthread_ - 1) / this->nthread_; + bst_uint end = std::min(static_cast(col.size()), step * (tid + 1)); for (bst_uint i = tid * step; i < end; ++i) { const bst_uint ridx = col[i].index; const int nid = position_[ridx]; @@ -620,13 +620,13 @@ class ColMaker: public TreeUpdater { int fid = feat_set[i]; const int tid = omp_get_thread_num(); auto c = batch[fid]; - const bool ind = c.length != 0 && c.data[0].fvalue == c.data[c.length - 1].fvalue; + const bool ind = c.size() != 0 && c[0].fvalue == c[c.size() - 1].fvalue; if (param_.NeedForwardSearch(fmat.GetColDensity(fid), ind)) { - this->EnumerateSplit(c.data, c.data + c.length, +1, + this->EnumerateSplit(c.data(), c.data() + c.size(), +1, fid, gpair, info, stemp_[tid]); } if (param_.NeedBackwardSearch(fmat.GetColDensity(fid), ind)) { - this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1, + this->EnumerateSplit(c.data() + c.size() - 1, c.data() - 1, -1, fid, gpair, info, stemp_[tid]); } } @@ -734,7 +734,7 @@ class ColMaker: public TreeUpdater { auto &batch = iter->Value(); for (auto fid : fsplits) { auto col = batch[fid]; - const auto ndata = static_cast(col.length); + const auto ndata = static_cast(col.size()); #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < ndata; ++j) { const bst_uint ridx = col[j].index; @@ -865,7 +865,7 @@ class DistColMaker : public ColMaker { auto &batch = iter->Value(); for (auto fid : fsplits) { auto col = batch[fid]; - const auto ndata = static_cast(col.length); + const auto ndata = static_cast(col.size()); #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < ndata; ++j) { const bst_uint ridx = col[j].index; diff --git a/src/tree/updater_gpu.cu b/src/tree/updater_gpu.cu index 616c75179..ee23299ac 100644 --- a/src/tree/updater_gpu.cu +++ b/src/tree/updater_gpu.cu @@ -669,7 +669,7 @@ class GPUMaker : public TreeUpdater { auto &batch = iter->Value(); for (int i = 0; i < batch.Size(); i++) { auto col = batch[i]; - for (const Entry* it = col.data; it != col.data + col.length; + for (const Entry* it = col.data(); it != col.data() + col.size(); it++) { int inst_id = static_cast(it->index); fval->push_back(it->fvalue); diff --git a/src/tree/updater_histmaker.cc b/src/tree/updater_histmaker.cc index 97d03359a..62b5b13e1 100644 --- a/src/tree/updater_histmaker.cc +++ b/src/tree/updater_histmaker.cc @@ -496,13 +496,13 @@ class CQHistMaker: public HistMaker { } inline void UpdateHistCol(const std::vector &gpair, - const SparsePage::Inst &c, + const SparsePage::Inst &col, const MetaInfo &info, const RegTree &tree, const std::vector &fset, bst_uint fid_offset, std::vector *p_temp) { - if (c.length == 0) return; + if (col.size() == 0) return; // initialize sbuilder for use std::vector &hbuilder = *p_temp; hbuilder.resize(tree.param.num_nodes); @@ -514,46 +514,46 @@ class CQHistMaker: public HistMaker { } if (TStats::kSimpleStats != 0 && this->param_.cache_opt != 0) { constexpr bst_uint kBuffer = 32; - bst_uint align_length = c.length / kBuffer * kBuffer; + bst_uint align_length = col.size() / kBuffer * kBuffer; int buf_position[kBuffer]; GradientPair buf_gpair[kBuffer]; for (bst_uint j = 0; j < align_length; j += kBuffer) { for (bst_uint i = 0; i < kBuffer; ++i) { - bst_uint ridx = c[j + i].index; + bst_uint ridx = col[j + i].index; buf_position[i] = this->position_[ridx]; buf_gpair[i] = gpair[ridx]; } for (bst_uint i = 0; i < kBuffer; ++i) { const int nid = buf_position[i]; if (nid >= 0) { - hbuilder[nid].Add(c[j + i].fvalue, buf_gpair[i]); + hbuilder[nid].Add(col[j + i].fvalue, buf_gpair[i]); } } } - for (bst_uint j = align_length; j < c.length; ++j) { - const bst_uint ridx = c[j].index; + for (bst_uint j = align_length; j < col.size(); ++j) { + const bst_uint ridx = col[j].index; const int nid = this->position_[ridx]; if (nid >= 0) { - hbuilder[nid].Add(c[j].fvalue, gpair[ridx]); + hbuilder[nid].Add(col[j].fvalue, gpair[ridx]); } } } else { - for (bst_uint j = 0; j < c.length; ++j) { - const bst_uint ridx = c[j].index; + for (const auto& c : col) { + const bst_uint ridx = c.index; const int nid = this->position_[ridx]; if (nid >= 0) { - hbuilder[nid].Add(c[j].fvalue, gpair, info, ridx); + hbuilder[nid].Add(c.fvalue, gpair, info, ridx); } } } } inline void UpdateSketchCol(const std::vector &gpair, - const SparsePage::Inst &c, + const SparsePage::Inst &col, const RegTree &tree, size_t work_set_size, bst_uint offset, std::vector *p_temp) { - if (c.length == 0) return; + if (col.size() == 0) return; // initialize sbuilder for use std::vector &sbuilder = *p_temp; sbuilder.resize(tree.param.num_nodes); @@ -565,18 +565,18 @@ class CQHistMaker: public HistMaker { } // first pass, get sum of weight, TODO, optimization to skip first pass - for (bst_uint j = 0; j < c.length; ++j) { - const bst_uint ridx = c[j].index; + for (const auto& c : col) { + const bst_uint ridx = c.index; const int nid = this->position_[ridx]; if (nid >= 0) { - sbuilder[nid].sum_total += gpair[ridx].GetHess(); + sbuilder[nid].sum_total += gpair[ridx].GetHess(); } } // if only one value, no need to do second pass - if (c[0].fvalue == c[c.length-1].fvalue) { + if (col[0].fvalue == col[col.size()-1].fvalue) { for (size_t i = 0; i < this->qexpand_.size(); ++i) { const int nid = this->qexpand_[i]; - sbuilder[nid].sketch->Push(c[0].fvalue, static_cast(sbuilder[nid].sum_total)); + sbuilder[nid].sketch->Push(col[0].fvalue, static_cast(sbuilder[nid].sum_total)); } return; } @@ -589,35 +589,35 @@ class CQHistMaker: public HistMaker { // second pass, build the sketch if (TStats::kSimpleStats != 0 && this->param_.cache_opt != 0) { constexpr bst_uint kBuffer = 32; - bst_uint align_length = c.length / kBuffer * kBuffer; + bst_uint align_length = col.size() / kBuffer * kBuffer; int buf_position[kBuffer]; bst_float buf_hess[kBuffer]; for (bst_uint j = 0; j < align_length; j += kBuffer) { for (bst_uint i = 0; i < kBuffer; ++i) { - bst_uint ridx = c[j + i].index; + bst_uint ridx = col[j + i].index; buf_position[i] = this->position_[ridx]; buf_hess[i] = gpair[ridx].GetHess(); } for (bst_uint i = 0; i < kBuffer; ++i) { const int nid = buf_position[i]; if (nid >= 0) { - sbuilder[nid].Push(c[j + i].fvalue, buf_hess[i], max_size); + sbuilder[nid].Push(col[j + i].fvalue, buf_hess[i], max_size); } } } - for (bst_uint j = align_length; j < c.length; ++j) { - const bst_uint ridx = c[j].index; + for (bst_uint j = align_length; j < col.size(); ++j) { + const bst_uint ridx = col[j].index; const int nid = this->position_[ridx]; if (nid >= 0) { - sbuilder[nid].Push(c[j].fvalue, gpair[ridx].GetHess(), max_size); + sbuilder[nid].Push(col[j].fvalue, gpair[ridx].GetHess(), max_size); } } } else { - for (bst_uint j = 0; j < c.length; ++j) { - const bst_uint ridx = c[j].index; + for (const auto& c : col) { + const bst_uint ridx = c.index; const int nid = this->position_[ridx]; if (nid >= 0) { - sbuilder[nid].Push(c[j].fvalue, gpair[ridx].GetHess(), max_size); + sbuilder[nid].Push(c.fvalue, gpair[ridx].GetHess(), max_size); } } } @@ -794,8 +794,8 @@ class QuantileHistMaker: public HistMaker { if (this->node2workindex_[nid] < 0) { this->position_[ridx] = ~nid; } else { - for (bst_uint j = 0; j < inst.length; ++j) { - builder.AddBudget(inst[j].index, omp_get_thread_num()); + for (auto& ins : inst) { + builder.AddBudget(ins.index, omp_get_thread_num()); } } } @@ -807,9 +807,9 @@ class QuantileHistMaker: public HistMaker { const bst_uint ridx = static_cast(batch.base_rowid + i); const int nid = this->position_[ridx]; if (nid >= 0) { - for (bst_uint j = 0; j < inst.length; ++j) { - builder.Push(inst[j].index, - Entry(nid, inst[j].fvalue), + for (auto& ins : inst) { + builder.Push(ins.index, + Entry(nid, ins.fvalue), omp_get_thread_num()); } } diff --git a/src/tree/updater_skmaker.cc b/src/tree/updater_skmaker.cc index 9549ff0c6..50f1a56c4 100644 --- a/src/tree/updater_skmaker.cc +++ b/src/tree/updater_skmaker.cc @@ -155,7 +155,7 @@ class SketchMaker: public BaseMaker { this->UpdateSketchCol(gpair, batch[fidx], tree, node_stats_, fidx, - batch[fidx].length == nrows, + batch[fidx].size() == nrows, &thread_sketch_[omp_get_thread_num()]); } } @@ -174,13 +174,13 @@ class SketchMaker: public BaseMaker { } // update sketch information in column fid inline void UpdateSketchCol(const std::vector &gpair, - const SparsePage::Inst &c, + const SparsePage::Inst &col, const RegTree &tree, const std::vector &nstats, bst_uint fid, bool col_full, std::vector *p_temp) { - if (c.length == 0) return; + if (col.size() == 0) return; // initialize sbuilder for use std::vector &sbuilder = *p_temp; sbuilder.resize(tree.param.num_nodes * 3); @@ -192,10 +192,10 @@ class SketchMaker: public BaseMaker { } } if (!col_full) { - for (bst_uint j = 0; j < c.length; ++j) { - const bst_uint ridx = c[j].index; + for (const auto& c : col) { + const bst_uint ridx = c.index; const int nid = this->position_[ridx]; - if (nid >= 0) { + if (nid > 0) { const GradientPair &e = gpair[ridx]; if (e.GetGrad() >= 0.0f) { sbuilder[3 * nid + 0].sum_total += e.GetGrad(); @@ -213,10 +213,10 @@ class SketchMaker: public BaseMaker { } } // if only one value, no need to do second pass - if (c[0].fvalue == c[c.length-1].fvalue) { + if (col[0].fvalue == col[col.size()-1].fvalue) { for (int nid : this->qexpand_) { for (int k = 0; k < 3; ++k) { - sbuilder[3 * nid + k].sketch->Push(c[0].fvalue, + sbuilder[3 * nid + k].sketch->Push(col[0].fvalue, static_cast( sbuilder[3 * nid + k].sum_total)); } @@ -231,17 +231,17 @@ class SketchMaker: public BaseMaker { } } // second pass, build the sketch - for (bst_uint j = 0; j < c.length; ++j) { - const bst_uint ridx = c[j].index; + for (const auto& c : col) { + const bst_uint ridx = c.index; const int nid = this->position_[ridx]; if (nid >= 0) { const GradientPair &e = gpair[ridx]; if (e.GetGrad() >= 0.0f) { - sbuilder[3 * nid + 0].Push(c[j].fvalue, e.GetGrad(), max_size); + sbuilder[3 * nid + 0].Push(c.fvalue, e.GetGrad(), max_size); } else { - sbuilder[3 * nid + 1].Push(c[j].fvalue, -e.GetGrad(), max_size); + sbuilder[3 * nid + 1].Push(c.fvalue, -e.GetGrad(), max_size); } - sbuilder[3 * nid + 2].Push(c[j].fvalue, e.GetHess(), max_size); + sbuilder[3 * nid + 2].Push(c.fvalue, e.GetHess(), max_size); } } for (int nid : this->qexpand_) { diff --git a/tests/cpp/c_api/test_c_api.cc b/tests/cpp/c_api/test_c_api.cc index a05fd74fa..e779deaa0 100644 --- a/tests/cpp/c_api/test_c_api.cc +++ b/tests/cpp/c_api/test_c_api.cc @@ -59,10 +59,10 @@ TEST(c_api, XGDMatrixCreateFromMat_omp) { auto batch = iter->Value(); for (int i = 0; i < batch.Size(); i++) { auto inst = batch[i]; - for (int j = 0; i < inst.length; i++) { + for (int j = 0; i < inst.size(); i++) { ASSERT_EQ(inst[j].fvalue, 1.5); } } } } -} \ No newline at end of file +} diff --git a/tests/cpp/common/test_host_device_vector.cu b/tests/cpp/common/test_host_device_vector.cu new file mode 100644 index 000000000..da3192600 --- /dev/null +++ b/tests/cpp/common/test_host_device_vector.cu @@ -0,0 +1,22 @@ +/*! + * Copyright 2018 XGBoost contributors + */ + +#include +#include "../../../src/common/host_device_vector.h" +#include "../../../src/common/device_helpers.cuh" + +namespace xgboost { +namespace common { + +TEST(HostDeviceVector, Span) { + HostDeviceVector vec {1.0f, 2.0f, 3.0f, 4.0f}; + vec.Reshard(GPUSet{0, 1}); + auto span = vec.DeviceSpan(0); + ASSERT_EQ(vec.Size(), span.size()); + ASSERT_EQ(vec.DevicePointer(0), span.data()); +} + +} // namespace common +} // namespace xgboost + diff --git a/tests/cpp/common/test_span.cc b/tests/cpp/common/test_span.cc new file mode 100644 index 000000000..516aeffe5 --- /dev/null +++ b/tests/cpp/common/test_span.cc @@ -0,0 +1,423 @@ +/*! + * Copyright 2018 XGBoost contributors + */ +#include +#include + +#include "../../../src/common/span.h" +#include "test_span.h" + +namespace xgboost { +namespace common { + +TEST(Span, TestStatus) { + int status = 1; + TestTestStatus {&status}(); + ASSERT_EQ(status, -1); +} + +TEST(Span, DlfConstructors) { + // Dynamic extent + { + Span s; + ASSERT_EQ(s.size(), 0); + ASSERT_EQ(s.data(), nullptr); + + Span cs; + ASSERT_EQ(cs.size(), 0); + ASSERT_EQ(cs.data(), nullptr); + } + + // Static extent + { + Span s; + ASSERT_EQ(s.size(), 0); + ASSERT_EQ(s.data(), nullptr); + + Span cs; + ASSERT_EQ(cs.size(), 0); + ASSERT_EQ(cs.data(), nullptr); + } + + // Init list. + { + Span s {}; + ASSERT_EQ(s.size(), 0); + ASSERT_EQ(s.data(), nullptr); + + Span cs {}; + ASSERT_EQ(cs.size(), 0); + ASSERT_EQ(cs.data(), nullptr); + } +} + +TEST(Span, FromNullPtr) { + // dynamic extent + { + Span s {nullptr, static_cast::index_type>(0)}; + ASSERT_EQ(s.size(), 0); + ASSERT_EQ(s.data(), nullptr); + + Span cs {nullptr, static_cast::index_type>(0)}; + ASSERT_EQ(cs.size(), 0); + ASSERT_EQ(cs.data(), nullptr); + } + // static extent + { + Span s {nullptr, static_cast::index_type>(0)}; + ASSERT_EQ(s.size(), 0); + ASSERT_EQ(s.data(), nullptr); + + Span cs {nullptr, static_cast::index_type>(0)}; + ASSERT_EQ(cs.size(), 0); + ASSERT_EQ(cs.data(), nullptr); + } +} + +TEST(Span, FromPtrLen) { + float arr[16]; + InitializeRange(arr, arr+16); + + // static extent + { + Span s (arr, 16); + ASSERT_EQ (s.size(), 16); + ASSERT_EQ (s.data(), arr); + + for (Span::index_type i = 0; i < 16; ++i) { + ASSERT_EQ (s[i], arr[i]); + } + + Span cs (arr, 16); + ASSERT_EQ (cs.size(), 16); + ASSERT_EQ (cs.data(), arr); + + for (Span::index_type i = 0; i < 16; ++i) { + ASSERT_EQ (cs[i], arr[i]); + } + } + + { + EXPECT_ANY_THROW(Span tmp (arr, -1);); + } + + // dynamic extent + { + Span s (arr, 16); + ASSERT_EQ (s.size(), 16); + ASSERT_EQ (s.data(), arr); + + for (size_t i = 0; i < 16; ++i) { + ASSERT_EQ (s[i], arr[i]); + } + + Span cs (arr, 16); + ASSERT_EQ (cs.size(), 16); + ASSERT_EQ (cs.data(), arr); + + for (Span::index_type i = 0; i < 16; ++i) { + ASSERT_EQ (cs[i], arr[i]); + } + } +} + +TEST(Span, FromFirstLast) { + float arr[16]; + InitializeRange(arr, arr+16); + + // dynamic extent + { + Span s (arr, arr + 16); + ASSERT_EQ (s.size(), 16); + ASSERT_EQ (s.data(), arr); + ASSERT_EQ (s.data() + s.size(), arr + 16); + + for (size_t i = 0; i < 16; ++i) { + ASSERT_EQ (s[i], arr[i]); + } + + Span cs (arr, arr + 16); + ASSERT_EQ (cs.size(), 16); + ASSERT_EQ (cs.data(), arr); + ASSERT_EQ (cs.data() + cs.size(), arr + 16); + + for (size_t i = 0; i < 16; ++i) { + ASSERT_EQ (cs[i], arr[i]); + } + } + + // static extent + { + Span s (arr, arr + 16); + ASSERT_EQ (s.size(), 16); + ASSERT_EQ (s.data(), arr); + ASSERT_EQ (s.data() + s.size(), arr + 16); + + for (size_t i = 0; i < 16; ++i) { + ASSERT_EQ (s[i], arr[i]); + } + + Span cs (arr, arr + 16); + ASSERT_EQ (cs.size(), 16); + ASSERT_EQ (cs.data(), arr); + ASSERT_EQ (cs.data() + cs.size(), arr + 16); + + for (size_t i = 0; i < 16; ++i) { + ASSERT_EQ (cs[i], arr[i]); + } + } +} + +struct BaseClass { + virtual void operator()() {} +}; +struct DerivedClass : public BaseClass { + virtual void operator()() {} +}; + +TEST(Span, FromOther) { + + // convert constructor + { + Span derived; + Span base { derived }; + ASSERT_EQ(base.size(), derived.size()); + ASSERT_EQ(base.data(), derived.data()); + } + + float arr[16]; + InitializeRange(arr, arr + 16); + + // default copy constructor + { + Span s0 (arr); + Span s1 (s0); + ASSERT_EQ(s0.size(), s1.size()); + ASSERT_EQ(s0.data(), s1.data()); + } +} + +TEST(Span, FromArray) { + float arr[16]; + InitializeRange(arr, arr + 16); + + { + Span s (arr); + ASSERT_EQ(&arr[0], s.data()); + ASSERT_EQ(s.size(), 16); + for (size_t i = 0; i < 16; ++i) { + ASSERT_EQ(arr[i], s[i]); + } + } + + { + Span s (arr); + ASSERT_EQ(&arr[0], s.data()); + ASSERT_EQ(s.size(), 16); + for (size_t i = 0; i < 16; ++i) { + ASSERT_EQ(arr[i], s[i]); + } + } +} + +TEST(Span, FromContainer) { + std::vector vec (16); + InitializeRange(vec.begin(), vec.end()); + + Span s(vec); + ASSERT_EQ(s.size(), vec.size()); + ASSERT_EQ(s.data(), vec.data()); + + bool res = std::equal(vec.begin(), vec.end(), s.begin()); + ASSERT_TRUE(res); +} + +TEST(Span, Assignment) { + int status = 1; + TestAssignment{&status}(); + ASSERT_EQ(status, 1); +} + +TEST(SpanIter, Construct) { + int status = 1; + TestIterConstruct{&status}(); + ASSERT_EQ(status, 1); +} + +TEST(SpanIter, Ref) { + int status = 1; + TestIterRef{&status}(); + ASSERT_EQ(status, 1); +} + +TEST(SpanIter, Calculate) { + int status = 1; + TestIterCalculate{&status}(); + ASSERT_EQ(status, 1); +} + +TEST(SpanIter, Compare) { + int status = 1; + TestIterCompare{&status}(); + ASSERT_EQ(status, 1); +} + +TEST(Span, BeginEnd) { + int status = 1; + TestBeginEnd{&status}(); + ASSERT_EQ(status, 1); +} + +TEST(Span, RBeginREnd) { + int status = 1; + TestRBeginREnd{&status}(); + ASSERT_EQ(status, 1); +} + +TEST(Span, ElementAccess) { + float arr[16]; + InitializeRange(arr, arr + 16); + + Span s (arr); + size_t j = 0; + for (auto i : s) { + ASSERT_EQ(i, arr[j]); + ++j; + } + + EXPECT_ANY_THROW(s[16]); + EXPECT_ANY_THROW(s[-1]); + + EXPECT_ANY_THROW(s(16)); + EXPECT_ANY_THROW(s(-1)); +} + +TEST(Span, Obversers) { + int status = 1; + TestObservers{&status}(); + ASSERT_EQ(status, 1); +} + +TEST(Span, FirstLast) { + // static extent + { + float arr[16]; + InitializeRange(arr, arr + 16); + + Span s (arr); + Span first = s.first<4>(); + + ASSERT_EQ(first.size(), 4); + ASSERT_EQ(first.data(), arr); + + for (size_t i = 0; i < first.size(); ++i) { + ASSERT_EQ(first[i], arr[i]); + } + + EXPECT_ANY_THROW(s.first<-1>()); + EXPECT_ANY_THROW(s.first<17>()); + EXPECT_ANY_THROW(s.first<32>()); + } + + { + float arr[16]; + InitializeRange(arr, arr + 16); + + Span s (arr); + Span last = s.last<4>(); + + ASSERT_EQ(last.size(), 4); + ASSERT_EQ(last.data(), arr + 12); + + for (size_t i = 0; i < last.size(); ++i) { + ASSERT_EQ(last[i], arr[i+12]); + } + + EXPECT_ANY_THROW(s.last<-1>()); + EXPECT_ANY_THROW(s.last<17>()); + EXPECT_ANY_THROW(s.last<32>()); + } + + // dynamic extent + { + float *arr = new float[16]; + InitializeRange(arr, arr + 16); + Span s (arr, 16); + Span first = s.first(4); + + ASSERT_EQ(first.size(), 4); + ASSERT_EQ(first.data(), s.data()); + + for (size_t i = 0; i < first.size(); ++i) { + ASSERT_EQ(first[i], s[i]); + } + + EXPECT_ANY_THROW(s.first(-1)); + EXPECT_ANY_THROW(s.first(17)); + EXPECT_ANY_THROW(s.first(32)); + + delete [] arr; + } + + { + float *arr = new float[16]; + InitializeRange(arr, arr + 16); + Span s (arr, 16); + Span last = s.last(4); + + ASSERT_EQ(last.size(), 4); + ASSERT_EQ(last.data(), s.data() + 12); + + for (size_t i = 0; i < last.size(); ++i) { + ASSERT_EQ(s[12 + i], last[i]); + } + + EXPECT_ANY_THROW(s.last(-1)); + EXPECT_ANY_THROW(s.last(17)); + EXPECT_ANY_THROW(s.last(32)); + + delete [] arr; + } +} + +TEST(Span, Subspan) { + int arr[16] {0}; + Span s1 (arr); + auto s2 = s1.subspan<4>(); + ASSERT_EQ(s1.size() - 4, s2.size()); + + auto s3 = s1.subspan(2, 4); + ASSERT_EQ(s1.data() + 2, s3.data()); + ASSERT_EQ(s3.size(), 4); + + auto s4 = s1.subspan(2, dynamic_extent); + ASSERT_EQ(s1.data() + 2, s4.data()); + ASSERT_EQ(s4.size(), s1.size() - 2); + + EXPECT_ANY_THROW(s1.subspan(-1, 0)); + EXPECT_ANY_THROW(s1.subspan(16, 0)); + + EXPECT_ANY_THROW(s1.subspan<-1>()); + EXPECT_ANY_THROW(s1.subspan<16>()); +} + +TEST(Span, Compare) { + int status = 1; + TestCompare{&status}(); + ASSERT_EQ(status, 1); +} + +TEST(Span, AsBytes) { + int status = 1; + TestAsBytes{&status}(); + ASSERT_EQ(status, 1); +} + +TEST(Span, AsWritableBytes) { + int status = 1; + TestAsWritableBytes{&status}(); + ASSERT_EQ(status, 1); +} + +} // namespace common +} // namespace xgboost diff --git a/tests/cpp/common/test_span.cu b/tests/cpp/common/test_span.cu new file mode 100644 index 000000000..f42052b07 --- /dev/null +++ b/tests/cpp/common/test_span.cu @@ -0,0 +1,357 @@ +/*! + * Copyright 2018 XGBoost contributors + */ +#include + +#include +#include +#include + +#include "../../../src/common/device_helpers.cuh" +#include "../../../src/common/span.h" +#include "test_span.h" + +namespace xgboost { +namespace common { + +struct TestStatus { + int *status_; + + public: + TestStatus () { + dh::safe_cuda(cudaMalloc(&status_, sizeof(int))); + int h_status = 1; + dh::safe_cuda(cudaMemcpy(status_, &h_status, + sizeof(int), cudaMemcpyHostToDevice)); + } + ~TestStatus() { + dh::safe_cuda(cudaFree(status_)); + } + + int get() { + int h_status; + dh::safe_cuda(cudaMemcpy(&h_status, status_, + sizeof(int), cudaMemcpyDeviceToHost)); + return h_status; + } + + int* data() { + return status_; + } +}; + +__global__ void test_from_other_kernel(Span span) { + // don't get optimized out + size_t idx = threadIdx.x + blockIdx.x * blockDim.x; + + if (idx >= span.size()) + return; +} +// Test converting different T + __global__ void test_from_other_kernel_const(Span span) { + // don't get optimized out + size_t idx = threadIdx.x + blockIdx.x * blockDim.x; + + if (idx >= span.size()) + return; +} + +/*! + * \brief Here we just test whether the code compiles. + */ +TEST(GPUSpan, FromOther) { + thrust::host_vector h_vec (16); + InitializeRange(h_vec.begin(), h_vec.end()); + + thrust::device_vector d_vec (h_vec.size()); + thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); + // dynamic extent + { + Span span (d_vec.data().get(), d_vec.size()); + test_from_other_kernel<<<1, 16>>>(span); + } + { + Span span (d_vec.data().get(), d_vec.size()); + test_from_other_kernel_const<<<1, 16>>>(span); + } + // static extent + { + Span span(d_vec.data().get(), d_vec.data().get() + 16); + test_from_other_kernel<<<1, 16>>>(span); + } + { + Span span(d_vec.data().get(), d_vec.data().get() + 16); + test_from_other_kernel_const<<<1, 16>>>(span); + } +} + +TEST(GPUSpan, Assignment) { + TestStatus status; + dh::LaunchN(0, 16, TestAssignment{status.data()}); + ASSERT_EQ(status.get(), 1); +} + +TEST(GPUSpan, TestStatus) { + TestStatus status; + dh::LaunchN(0, 16, TestTestStatus{status.data()}); + ASSERT_EQ(status.get(), -1); +} + +template +struct TestEqual { + T *lhs_, *rhs_; + int *status_; + + TestEqual(T* _lhs, T* _rhs, int * _status) : + lhs_(_lhs), rhs_(_rhs), status_(_status) {} + + XGBOOST_DEVICE void operator()(size_t _idx) { + bool res = lhs_[_idx] == rhs_[_idx]; + SPAN_ASSERT_TRUE(res, status_); + } +}; + +TEST(GPUSpan, WithTrust) { + // Not adviced to initialize span with host_vector, since h_vec.data() is + // a host function. + thrust::host_vector h_vec (16); + InitializeRange(h_vec.begin(), h_vec.end()); + + thrust::device_vector d_vec (h_vec.size()); + thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); + + // Can't initialize span with device_vector, since d_vec.data() is not raw + // pointer + { + Span s (d_vec.data().get(), d_vec.size()); + + ASSERT_EQ(d_vec.size(), s.size()); + ASSERT_EQ(d_vec.data().get(), s.data()); + } + + { + TestStatus status; + thrust::device_vector d_vec1 (d_vec.size()); + thrust::copy(thrust::device, d_vec.begin(), d_vec.end(), d_vec1.begin()); + Span s (d_vec1.data().get(), d_vec.size()); + + dh::LaunchN(0, 16, TestEqual{ + thrust::raw_pointer_cast(d_vec1.data()), + s.data(), status.data()}); + ASSERT_EQ(status.get(), 1); + + // FIXME: memory error! + // bool res = thrust::equal(thrust::device, + // d_vec.begin(), d_vec.end(), + // s.begin()); + } +} + +TEST(GPUSpan, BeginEnd) { + TestStatus status; + dh::LaunchN(0, 16, TestBeginEnd{status.data()}); + ASSERT_EQ(status.get(), 1); +} + +TEST(GPUSpan, RBeginREnd) { + TestStatus status; + dh::LaunchN(0, 16, TestRBeginREnd{status.data()}); + ASSERT_EQ(status.get(), 1); +} + +__global__ void test_modify_kernel(Span span) { + size_t idx = threadIdx.x + blockIdx.x * blockDim.x; + + if (idx >= span.size()) + return; + + span[idx] = span.size() - idx; +} + +TEST(GPUSpan, Modify) { + thrust::host_vector h_vec (16); + InitializeRange(h_vec.begin(), h_vec.end()); + + thrust::device_vector d_vec (h_vec.size()); + thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); + + Span span (d_vec.data().get(), d_vec.size()); + + test_modify_kernel<<<1, 16>>>(span); + + for (size_t i = 0; i < d_vec.size(); ++i) { + ASSERT_EQ(d_vec[i], d_vec.size() - i); + } +} + +TEST(GPUSpan, Observers) { + TestStatus status; + dh::LaunchN(0, 16, TestObservers{status.data()}); + ASSERT_EQ(status.get(), 1); +} + +TEST(GPUSpan, Compare) { + TestStatus status; + dh::LaunchN(0, 16, TestIterCompare{status.data()}); + ASSERT_EQ(status.get(), 1); +} + +struct TestElementAccess { + Span span_; + + XGBOOST_DEVICE TestElementAccess (Span _span) : span_(_span) {} + + XGBOOST_DEVICE float operator()(size_t _idx) { + float tmp = span_[_idx]; + return tmp; + } +}; + +TEST(GPUSpan, ElementAccess) { + EXPECT_DEATH({ + thrust::host_vector h_vec (16); + InitializeRange(h_vec.begin(), h_vec.end()); + + thrust::device_vector d_vec (h_vec.size()); + thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); + + Span span (d_vec.data().get(), d_vec.size()); + dh::LaunchN(0, 17, TestElementAccess{span});}, ""); +} + +__global__ void test_first_dynamic_kernel(Span _span) { + _span.first<-1>(); +} +__global__ void test_first_static_kernel(Span _span) { + _span.first(-1); +} +__global__ void test_last_dynamic_kernel(Span _span) { + _span.last<-1>(); +} +__global__ void test_last_static_kernel(Span _span) { + _span.last(-1); +} + +TEST(GPUSpan, FirstLast) { + // We construct vectors multiple times since thrust can not recover from + // death test. + auto lambda_first_dy = []() { + thrust::host_vector h_vec (4); + InitializeRange(h_vec.begin(), h_vec.end()); + + thrust::device_vector d_vec (h_vec.size()); + thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); + + Span span (d_vec.data().get(), d_vec.size()); + test_first_dynamic_kernel<<<1, 1>>>(span); + }; + EXPECT_DEATH(lambda_first_dy(), ""); + + auto lambda_first_static = []() { + thrust::host_vector h_vec (4); + InitializeRange(h_vec.begin(), h_vec.end()); + + thrust::device_vector d_vec (h_vec.size()); + thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); + + Span span (d_vec.data().get(), d_vec.size()); + test_first_static_kernel<<<1, 1>>>(span); + }; + EXPECT_DEATH(lambda_first_static(), ""); + + auto lambda_last_dy = []() { + thrust::host_vector h_vec (4); + InitializeRange(h_vec.begin(), h_vec.end()); + + thrust::device_vector d_vec (h_vec.size()); + thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); + + Span span (d_vec.data().get(), d_vec.size()); + test_last_dynamic_kernel<<<1, 1>>>(span); + }; + EXPECT_DEATH(lambda_last_dy(), ""); + + auto lambda_last_static = []() { + thrust::host_vector h_vec (4); + InitializeRange(h_vec.begin(), h_vec.end()); + + thrust::device_vector d_vec (h_vec.size()); + thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); + + Span span (d_vec.data().get(), d_vec.size()); + test_last_static_kernel<<<1, 1>>>(span); + }; + EXPECT_DEATH(lambda_last_static(), ""); +} + + +__global__ void test_subspan_dynamic_kernel(Span _span) { + _span.subspan(16, 0); +} +__global__ void test_subspan_static_kernel(Span _span) { + _span.subspan<16>(); +} +TEST(GPUSpan, Subspan) { + auto lambda_subspan_dynamic = []() { + thrust::host_vector h_vec (4); + InitializeRange(h_vec.begin(), h_vec.end()); + + thrust::device_vector d_vec (h_vec.size()); + thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); + + Span span (d_vec.data().get(), d_vec.size()); + test_subspan_dynamic_kernel<<<1, 1>>>(span); + }; + EXPECT_DEATH(lambda_subspan_dynamic(), ""); + + auto lambda_subspan_static = []() { + thrust::host_vector h_vec (4); + InitializeRange(h_vec.begin(), h_vec.end()); + + thrust::device_vector d_vec (h_vec.size()); + thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); + + Span span (d_vec.data().get(), d_vec.size()); + test_subspan_static_kernel<<<1, 1>>>(span); + }; + EXPECT_DEATH(lambda_subspan_static(), ""); +} + +TEST(GPUSpanIter, Construct) { + TestStatus status; + dh::LaunchN(0, 16, TestIterConstruct{status.data()}); + ASSERT_EQ(status.get(), 1); +} + +TEST(GPUSpanIter, Ref) { + TestStatus status; + dh::LaunchN(0, 16, TestIterRef{status.data()}); + ASSERT_EQ(status.get(), 1); +} + +TEST(GPUSpanIter, Calculate) { + TestStatus status; + dh::LaunchN(0, 16, TestIterCalculate{status.data()}); + ASSERT_EQ(status.get(), 1); +} + +TEST(GPUSpanIter, Compare) { + TestStatus status; + dh::LaunchN(0, 16, TestIterCompare{status.data()}); + ASSERT_EQ(status.get(), 1); +} + +TEST(GPUSpan, AsBytes) { + TestStatus status; + dh::LaunchN(0, 16, TestAsBytes{status.data()}); + ASSERT_EQ(status.get(), 1); +} + +TEST(GPUSpan, AsWritableBytes) { + TestStatus status; + dh::LaunchN(0, 16, TestAsWritableBytes{status.data()}); + ASSERT_EQ(status.get(), 1); +} + +} // namespace common +} // namespace xgboost diff --git a/tests/cpp/common/test_span.h b/tests/cpp/common/test_span.h new file mode 100644 index 000000000..194a356ce --- /dev/null +++ b/tests/cpp/common/test_span.h @@ -0,0 +1,339 @@ +/*! + * Copyright 2018 XGBoost contributors + */ +#ifndef XGBOOST_TEST_SPAN_H_ +#define XGBOOST_TEST_SPAN_H_ + +#include "../../include/xgboost/base.h" +#include "../../../src/common/span.h" + +namespace xgboost { +namespace common { + +#define SPAN_ASSERT_TRUE(cond, status) \ + if (!(cond)) { \ + *(status) = -1; \ + } + +#define SPAN_ASSERT_FALSE(cond, status) \ + if ((cond)) { \ + *(status) = -1; \ + } + +template +XGBOOST_DEVICE void InitializeRange(Iter _begin, Iter _end) { + float j = 0; + for (Iter i = _begin; i != _end; ++i, ++j) { + *i = j; + } +} + +struct TestTestStatus { + int * status_; + + TestTestStatus(int* _status): status_(_status) {} + + XGBOOST_DEVICE void operator()() { + this->operator()(0); + } + XGBOOST_DEVICE void operator()(int _idx) { + SPAN_ASSERT_TRUE(false, status_); + } +}; + +struct TestAssignment { + int* status_; + + TestAssignment(int* _status) : status_(_status) {} + + XGBOOST_DEVICE void operator()() { + this->operator()(0); + } + XGBOOST_DEVICE void operator()(int _idx) { + Span s1; + + float arr[] = {3, 4, 5}; + + Span s2 = arr; + SPAN_ASSERT_TRUE(s2.size() == 3, status_); + SPAN_ASSERT_TRUE(s2.data() == &arr[0], status_); + + s2 = s1; + SPAN_ASSERT_TRUE(s2.empty(), status_); + } +}; + +struct TestBeginEnd { + int* status_; + + TestBeginEnd(int* _status) : status_(_status) {} + + XGBOOST_DEVICE void operator()() { + this->operator()(0); + } + XGBOOST_DEVICE void operator()(int _idx) { + float arr[16]; + InitializeRange(arr, arr + 16); + + Span s (arr); + Span::iterator beg { s.begin() }; + Span::iterator end { s.end() }; + + SPAN_ASSERT_TRUE(end == beg + 16, status_); + SPAN_ASSERT_TRUE(*beg == arr[0], status_); + SPAN_ASSERT_TRUE(*(end - 1) == arr[15], status_); + } +}; + +struct TestRBeginREnd { + int * status_; + + TestRBeginREnd(int* _status): status_(_status) {} + + XGBOOST_DEVICE void operator()() { + this->operator()(0); + } + XGBOOST_DEVICE void operator()(int _idx) { + float arr[16]; + InitializeRange(arr, arr + 16); + + Span s (arr); + Span::iterator rbeg { s.rbegin() }; + Span::iterator rend { s.rend() }; + + SPAN_ASSERT_TRUE(rbeg == rend + 16, status_); + SPAN_ASSERT_TRUE(*(rbeg - 1) == arr[15], status_); + SPAN_ASSERT_TRUE(*rend == arr[0], status_); + } +}; + +struct TestObservers { + int * status_; + + TestObservers(int * _status): status_(_status) {} + + XGBOOST_DEVICE void operator()() { + this->operator()(0); + } + XGBOOST_DEVICE void operator()(int _idx) { + // empty + { + float *arr = nullptr; + Span s(arr, static_cast::index_type>(0)); + SPAN_ASSERT_TRUE(s.empty(), status_); + } + + // size, size_types + { + float* arr = new float[16]; + Span s (arr, 16); + SPAN_ASSERT_TRUE(s.size() == 16, status_); + SPAN_ASSERT_TRUE(s.size_bytes() == 16 * sizeof(float), status_); + delete [] arr; + } + } +}; + +struct TestCompare { + int * status_; + + TestCompare(int * _status): status_(_status) {} + + XGBOOST_DEVICE void operator()() { + this->operator()(0); + } + XGBOOST_DEVICE void operator()(int _idx) { + float lhs_arr[16], rhs_arr[16]; + InitializeRange(lhs_arr, lhs_arr + 16); + InitializeRange(rhs_arr, rhs_arr + 16); + + Span lhs(lhs_arr); + Span rhs(rhs_arr); + + SPAN_ASSERT_TRUE(lhs == rhs, status_); + SPAN_ASSERT_FALSE(lhs != rhs, status_); + + SPAN_ASSERT_TRUE(lhs <= rhs, status_); + SPAN_ASSERT_TRUE(lhs >= rhs, status_); + + lhs[2] -= 1; + + SPAN_ASSERT_FALSE(lhs == rhs, status_); + SPAN_ASSERT_TRUE(lhs < rhs, status_); + SPAN_ASSERT_FALSE(lhs > rhs, status_); + } +}; + +struct TestIterConstruct { + int * status_; + + TestIterConstruct(int * _status): status_(_status) {} + + XGBOOST_DEVICE void operator()() { + this->operator()(0); + } + XGBOOST_DEVICE void operator()(int _idx) { + Span::iterator it1; + Span::iterator it2; + SPAN_ASSERT_TRUE(it1 == it2, status_); + + Span::const_iterator cit1; + Span::const_iterator cit2; + SPAN_ASSERT_TRUE(cit1 == cit2, status_); + } +}; + +struct TestIterRef { + int * status_; + + TestIterRef(int * _status): status_(_status) {} + + XGBOOST_DEVICE void operator()() { + this->operator()(0); + } + XGBOOST_DEVICE void operator()(int _idx) { + float arr[16]; + InitializeRange(arr, arr + 16); + + Span s (arr); + SPAN_ASSERT_TRUE(*(s.begin()) == s[0], status_); + SPAN_ASSERT_TRUE(*(s.end() - 1) == s[15], status_); + } +}; + +struct TestIterCalculate { + int * status_; + + TestIterCalculate(int * _status): status_(_status) {} + + XGBOOST_DEVICE void operator()() { + this->operator()(0); + } + XGBOOST_DEVICE void operator()(int _idx) { + float arr[16]; + InitializeRange(arr, arr + 16); + + Span s (arr); + Span::iterator beg { s.begin() }; + + beg += 4; + SPAN_ASSERT_TRUE(*beg == 4, status_); + + beg -= 2; + SPAN_ASSERT_TRUE(*beg == 2, status_); + + ++beg; + SPAN_ASSERT_TRUE(*beg == 3, status_); + + --beg; + SPAN_ASSERT_TRUE(*beg == 2, status_); + + beg++; + beg--; + SPAN_ASSERT_TRUE(*beg == 2, status_); + } +}; + +struct TestIterCompare { + int * status_; + + TestIterCompare(int * _status): status_(_status) {} + + XGBOOST_DEVICE void operator()() { + this->operator()(0); + } + XGBOOST_DEVICE void operator()(int _idx) { + float arr[16]; + InitializeRange(arr, arr + 16); + Span s (arr); + Span::iterator left { s.begin() }; + Span::iterator right { s.end() }; + + left += 1; + right -= 15; + + SPAN_ASSERT_TRUE(left == right, status_); + + SPAN_ASSERT_TRUE(left >= right, status_); + SPAN_ASSERT_TRUE(left <= right, status_); + + ++right; + SPAN_ASSERT_TRUE(right > left, status_); + SPAN_ASSERT_TRUE(left < right, status_); + SPAN_ASSERT_TRUE(left <= right, status_); + } +}; + +struct TestAsBytes { + int * status_; + + TestAsBytes(int * _status): status_(_status) {} + + XGBOOST_DEVICE void operator()() { + this->operator()(0); + } + XGBOOST_DEVICE void operator()(int _idx) { + float arr[16]; + InitializeRange(arr, arr + 16); + + { + const Span s {arr}; + const Span bs = as_bytes(s); + SPAN_ASSERT_TRUE(bs.size() == s.size_bytes(), status_); + SPAN_ASSERT_TRUE(static_cast(bs.data()) == + static_cast(s.data()), + status_); + } + + { + Span s; + const Span bs = as_bytes(s); + SPAN_ASSERT_TRUE(bs.size() == s.size(), status_); + SPAN_ASSERT_TRUE(bs.size() == 0, status_); + SPAN_ASSERT_TRUE(bs.size_bytes() == 0, status_); + SPAN_ASSERT_TRUE(static_cast(bs.data()) == + static_cast(s.data()), + status_); + SPAN_ASSERT_TRUE(bs.data() == nullptr, status_); + } + } +}; + +struct TestAsWritableBytes { + int * status_; + + TestAsWritableBytes(int * _status): status_(_status) {} + + XGBOOST_DEVICE void operator()() { + this->operator()(0); + } + XGBOOST_DEVICE void operator()(int _idx) { + float arr[16]; + InitializeRange(arr, arr + 16); + + { + Span s; + Span bs = as_writable_bytes(s); + SPAN_ASSERT_TRUE(bs.size() == s.size(), status_); + SPAN_ASSERT_TRUE(bs.size_bytes() == s.size_bytes(), status_); + SPAN_ASSERT_TRUE(bs.size() == 0, status_); + SPAN_ASSERT_TRUE(bs.size_bytes() == 0, status_); + SPAN_ASSERT_TRUE(bs.data() == nullptr, status_); + SPAN_ASSERT_TRUE(static_cast(bs.data()) == + static_cast(s.data()), status_); + } + + { + Span s { arr }; + Span bs { as_writable_bytes(s) }; + SPAN_ASSERT_TRUE(s.size_bytes() == bs.size_bytes(), status_); + SPAN_ASSERT_TRUE(static_cast(bs.data()) == + static_cast(s.data()), status_); + } + } +}; + +} // namespace common +} // namespace xgboost + +#endif diff --git a/tests/cpp/data/test_simple_csr_source.cc b/tests/cpp/data/test_simple_csr_source.cc index 459e4570d..8da8a9730 100644 --- a/tests/cpp/data/test_simple_csr_source.cc +++ b/tests/cpp/data/test_simple_csr_source.cc @@ -25,7 +25,7 @@ TEST(SimpleCSRSource, SaveLoadBinary) { row_iter_read->BeforeFirst(); row_iter_read->Next(); auto first_row = row_iter->Value()[0]; auto first_row_read = row_iter_read->Value()[0]; - EXPECT_EQ(first_row.length, first_row_read.length); + EXPECT_EQ(first_row.size(), first_row_read.size()); EXPECT_EQ(first_row[2].index, first_row_read[2].index); EXPECT_EQ(first_row[2].fvalue, first_row_read[2].fvalue); row_iter = nullptr; row_iter_read = nullptr; diff --git a/tests/cpp/data/test_simple_dmatrix.cc b/tests/cpp/data/test_simple_dmatrix.cc index 600c7d662..10ded726e 100644 --- a/tests/cpp/data/test_simple_dmatrix.cc +++ b/tests/cpp/data/test_simple_dmatrix.cc @@ -31,7 +31,7 @@ TEST(SimpleDMatrix, RowAccess) { row_iter->BeforeFirst(); row_iter->Next(); auto first_row = row_iter->Value()[0]; - ASSERT_EQ(first_row.length, 3); + ASSERT_EQ(first_row.size(), 3); EXPECT_EQ(first_row[2].index, 2); EXPECT_EQ(first_row[2].fvalue, 20); row_iter = nullptr; @@ -70,7 +70,7 @@ TEST(SimpleDMatrix, ColAccessWithoutBatches) { EXPECT_EQ(col_iter->Value().Size(), dmat->Info().num_col_) << "Expected batch size = number of cells as #batches is 1."; for (int i = 0; i < static_cast(col_iter->Value().Size()); ++i) { - EXPECT_EQ(col_iter->Value()[i].length, dmat->GetColSize(i)) + EXPECT_EQ(col_iter->Value()[i].size(), dmat->GetColSize(i)) << "Expected length of each colbatch = colsize as #batches is 1."; } } diff --git a/tests/cpp/data/test_sparse_page_dmatrix.cc b/tests/cpp/data/test_sparse_page_dmatrix.cc index f08e1183a..572279c8c 100644 --- a/tests/cpp/data/test_sparse_page_dmatrix.cc +++ b/tests/cpp/data/test_sparse_page_dmatrix.cc @@ -40,7 +40,7 @@ TEST(SparsePageDMatrix, RowAccess) { row_iter->BeforeFirst(); row_iter->Next(); auto first_row = row_iter->Value()[0]; - ASSERT_EQ(first_row.length, 3); + ASSERT_EQ(first_row.size(), 3); EXPECT_EQ(first_row[2].index, 2); EXPECT_EQ(first_row[2].fvalue, 20); row_iter = nullptr;