Export c++ headers in CMake installation. (#4897)

* Move get transpose into cc.

* Clean up headers in host device vector, remove thrust dependency.

* Move span and host device vector into public.

* Install c++ headers.

* Short notes for c and c++.

Co-Authored-By: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Jiaming Yuan
2019-10-06 23:53:09 -04:00
committed by GitHub
parent 4ab1df5fe6
commit 095de3bf5f
55 changed files with 240 additions and 209 deletions

View File

@@ -13,7 +13,12 @@
#include <string>
#include <vector>
#include "span.h"
#if defined(__CUDACC__)
#include <thrust/copy.h>
#include <thrust/device_ptr.h>
#endif // defined(__CUDACC__)
#include "xgboost/span.h"
namespace xgboost {

View File

@@ -11,8 +11,10 @@
#include <rabit/rabit.h>
#include <cub/util_allocator.cuh>
#include "xgboost/host_device_vector.h"
#include "xgboost/span.h"
#include "common.h"
#include "span.h"
#include <algorithm>
#include <omp.h>
@@ -1132,6 +1134,27 @@ xgboost::common::Span<T> ToSpan(thrust::device_vector<T>& vec,
return ToSpan(vec, static_cast<IndexT>(offset), static_cast<IndexT>(size));
}
// thrust begin, similiar to std::begin
template <typename T>
thrust::device_ptr<T> tbegin(xgboost::HostDeviceVector<T>& vector) { // NOLINT
return thrust::device_ptr<T>(vector.DevicePointer());
}
template <typename T>
thrust::device_ptr<T> tend(xgboost::HostDeviceVector<T>& vector) { // // NOLINT
return tbegin(vector) + vector.Size();
}
template <typename T>
thrust::device_ptr<T const> tcbegin(xgboost::HostDeviceVector<T> const& vector) {
return thrust::device_ptr<T const>(vector.ConstDevicePointer());
}
template <typename T>
thrust::device_ptr<T const> tcend(xgboost::HostDeviceVector<T> const& vector) {
return tcbegin(vector) + vector.Size();
}
template <typename FunctionT>
class LauncherItr {
public:

View File

@@ -8,6 +8,8 @@
#include <dmlc/omp.h>
#include <numeric>
#include <vector>
#include "../common/common.h"
#include "./random.h"
#include "./column_matrix.h"
#include "./quantile.h"

View File

@@ -2,7 +2,6 @@
* Copyright 2018 XGBoost contributors
*/
#include "./hist_util.h"
#include <xgboost/logging.h>
#include <thrust/copy.h>
@@ -17,10 +16,11 @@
#include <memory>
#include <mutex>
#include "hist_util.h"
#include "xgboost/host_device_vector.h"
#include "device_helpers.cuh"
#include "quantile.h"
#include "../tree/param.h"
#include "./host_device_vector.h"
#include "./device_helpers.cuh"
#include "./quantile.h"
namespace xgboost {
namespace common {

View File

@@ -9,7 +9,7 @@
#include <xgboost/data.h>
#include <cstdint>
#include <utility>
#include "./host_device_vector.h"
#include "xgboost/host_device_vector.h"
namespace xgboost {

View File

@@ -2,13 +2,16 @@
* Copyright 2017 XGBoost contributors
*/
#include "./host_device_vector.h"
#include <thrust/fill.h>
#include <xgboost/data.h>
#include <thrust/device_ptr.h>
#include <algorithm>
#include <cstdint>
#include <mutex>
#include "./device_helpers.cuh"
#include "xgboost/data.h"
#include "xgboost/host_device_vector.h"
#include "device_helpers.cuh"
namespace xgboost {
@@ -75,22 +78,6 @@ class HostDeviceVectorImpl {
return {data_d_.data().get(), static_cast<SpanInd>(Size())};
}
thrust::device_ptr<T> tbegin() { // NOLINT
return thrust::device_ptr<T>(DevicePointer());
}
thrust::device_ptr<const T> tcbegin() { // NOLINT
return thrust::device_ptr<const T>(ConstDevicePointer());
}
thrust::device_ptr<T> tend() { // NOLINT
return tbegin() + Size();
}
thrust::device_ptr<const T> tcend() { // NOLINT
return tcbegin() + Size();
}
void Fill(T v) { // NOLINT
if (HostCanWrite()) {
std::fill(data_h_.begin(), data_h_.end(), v);
@@ -304,26 +291,6 @@ common::Span<const T> HostDeviceVector<T>::ConstDeviceSpan() const {
return impl_->ConstDeviceSpan();
}
template <typename T>
thrust::device_ptr<T> HostDeviceVector<T>::tbegin() { // NOLINT
return impl_->tbegin();
}
template <typename T>
thrust::device_ptr<const T> HostDeviceVector<T>::tcbegin() const { // NOLINT
return impl_->tcbegin();
}
template <typename T>
thrust::device_ptr<T> HostDeviceVector<T>::tend() { // NOLINT
return impl_->tend();
}
template <typename T>
thrust::device_ptr<const T> HostDeviceVector<T>::tcend() const { // NOLINT
return impl_->tcend();
}
template <typename T>
void HostDeviceVector<T>::Fill(T v) {
impl_->Fill(v);

View File

@@ -1,158 +0,0 @@
/*!
* Copyright 2017-2019 XGBoost contributors
*/
/**
* @file host_device_vector.h
* @brief A device-and-host vector abstraction layer.
*
* Why HostDeviceVector?<br/>
* With CUDA, one has to explicitly manage memory through 'cudaMemcpy' calls.
* This wrapper class hides this management from the users, thereby making it
* easy to integrate GPU/CPU usage under a single interface.
*
* Initialization/Allocation:<br/>
* One can choose to initialize the vector on CPU or GPU during constructor.
* (use the 'devices' argument) Or, can choose to use the 'Resize' method to
* allocate/resize memory explicitly, and use the 'SetDevice' method
* to specify the device.
*
* Accessing underlying data:<br/>
* Use 'HostVector' method to explicitly query for the underlying std::vector.
* If you need the raw device pointer, use the 'DevicePointer' method. For perf
* implications of these calls, see below.
*
* Accessing underling data and their perf implications:<br/>
* There are 4 scenarios to be considered here:
* HostVector and data on CPU --> no problems, std::vector returned immediately
* HostVector but data on GPU --> this causes a cudaMemcpy to be issued internally.
* subsequent calls to HostVector, will NOT incur this penalty.
* (assuming 'DevicePointer' is not called in between)
* DevicePointer but data on CPU --> this causes a cudaMemcpy to be issued internally.
* subsequent calls to DevicePointer, will NOT incur this penalty.
* (assuming 'HostVector' is not called in between)
* DevicePointer and data on GPU --> no problems, the device ptr
* will be returned immediately.
*
* What if xgboost is compiled without CUDA?<br/>
* In that case, there's a special implementation which always falls-back to
* working with std::vector. This logic can be found in host_device_vector.cc
*
* Why not consider CUDA unified memory?<br/>
* We did consider. However, it poses complications if we need to support both
* compiling with and without CUDA toolkit. It was easier to have
* 'HostDeviceVector' with a special-case implementation in host_device_vector.cc
*
* @note: Size and Devices methods are thread-safe.
* DevicePointer, DeviceStart, DeviceSize, tbegin and tend methods are thread-safe
* if different threads call these methods with different values of the device argument.
* All other methods are not thread safe.
*/
#ifndef XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_
#define XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_
#include <dmlc/logging.h>
#include <algorithm>
#include <cstdlib>
#include <initializer_list>
#include <utility>
#include <vector>
#include "common.h"
#include "span.h"
// only include thrust-related files if host_device_vector.h
// is included from a .cu file
#ifdef __CUDACC__
#include <thrust/device_ptr.h>
#endif // __CUDACC__
namespace xgboost {
#ifdef __CUDACC__
// Sets a function to call instead of cudaSetDevice();
// only added for testing
void SetCudaSetDeviceHandler(void (*handler)(int));
#endif // __CUDACC__
template <typename T> struct HostDeviceVectorImpl;
/*!
* \brief Controls data access from the GPU.
*
* Since a `HostDeviceVector` can have data on both the host and device, access control needs to be
* maintained to keep the data consistent.
*
* There are 3 scenarios supported:
* - Data is being manipulated on device. GPU has write access, host doesn't have access.
* - Data is read-only on both the host and device.
* - Data is being manipulated on the host. Host has write access, device doesn't have access.
*/
enum GPUAccess {
kNone, kRead,
// write implies read
kWrite
};
template <typename T>
class HostDeviceVector {
public:
explicit HostDeviceVector(size_t size = 0, T v = T(), int device = -1);
HostDeviceVector(std::initializer_list<T> init, int device = -1);
explicit HostDeviceVector(const std::vector<T>& init, int device = -1);
~HostDeviceVector();
HostDeviceVector(const HostDeviceVector<T>&);
HostDeviceVector<T>& operator=(const HostDeviceVector<T>&);
size_t Size() const;
int DeviceIdx() const;
common::Span<T> DeviceSpan();
common::Span<const T> ConstDeviceSpan() const;
common::Span<const T> DeviceSpan() const { return ConstDeviceSpan(); }
T* DevicePointer();
const T* ConstDevicePointer() const;
const T* DevicePointer() const { return ConstDevicePointer(); }
T* HostPointer() { return HostVector().data(); }
const T* ConstHostPointer() const { return ConstHostVector().data(); }
const T* HostPointer() const { return ConstHostPointer(); }
// only define functions returning device_ptr
// if HostDeviceVector.h is included from a .cu file
#ifdef __CUDACC__
thrust::device_ptr<T> tbegin(); // NOLINT
thrust::device_ptr<T> tend(); // NOLINT
thrust::device_ptr<const T> tcbegin() const; // NOLINT
thrust::device_ptr<const T> tcend() const; // NOLINT
thrust::device_ptr<const T> tbegin() const { // NOLINT
return tcbegin();
}
thrust::device_ptr<const T> tend() const { return tcend(); } // NOLINT
#endif // __CUDACC__
void Fill(T v);
void Copy(const HostDeviceVector<T>& other);
void Copy(const std::vector<T>& other);
void Copy(std::initializer_list<T> other);
std::vector<T>& HostVector();
const std::vector<T>& ConstHostVector() const;
const std::vector<T>& HostVector() const {return ConstHostVector(); }
bool HostCanRead() const;
bool HostCanWrite() const;
bool DeviceCanRead() const;
bool DeviceCanWrite() const;
void SetDevice(int device) const;
void Resize(size_t new_size, T v = T());
private:
HostDeviceVectorImpl<T>* impl_;
};
} // namespace xgboost
#endif // XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_

View File

@@ -17,8 +17,8 @@
#include <numeric>
#include <random>
#include "xgboost/host_device_vector.h"
#include "io.h"
#include "host_device_vector.h"
namespace xgboost {
namespace common {
@@ -113,7 +113,7 @@ class ColumnSampler {
}
public:
/**
/**
* \brief Column sampler constructor.
* \note This constructor manually sets the rng seed
*/
@@ -169,7 +169,7 @@ class ColumnSampler {
/**
* \brief Samples a feature set.
*
*
* \param depth The tree depth of the node at which to sample.
* \return The sampled feature set.
* \note If colsample_bynode_ < 1.0, this method creates a new feature set each time it

View File

@@ -1,640 +0,0 @@
/*!
* Copyright 2018 XGBoost contributors
* \brief span class based on ISO++20 span
*
* About NOLINTs in this file:
*
* If we want Span to work with std interface, like range for loop, the
* naming must be consistant with std, not XGBoost. Also, the interface also
* conflicts with XGBoost coding style, specifically, the use of `explicit'
* keyword.
*
*
* Some of the code is copied from Guidelines Support Library, here is the
* license:
*
* Copyright (c) 2015 Microsoft Corporation. All rights reserved.
*
* This code is licensed under the MIT License (MIT).
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef XGBOOST_COMMON_SPAN_H_
#define XGBOOST_COMMON_SPAN_H_
#include <xgboost/logging.h> // CHECK
#include <cinttypes> // int64_t
#include <type_traits>
/*!
* The version number 1910 is picked up from GSL.
*
* We might want to use MOODYCAMEL_NOEXCEPT from dmlc/concurrentqueue.h. But
* there are a lot more definitions in that file would cause warnings/troubles
* in MSVC 2013. Currently we try to keep the closure of Span as minimal as
* possible.
*
* There are other workarounds for MSVC, like _Unwrapped, _Verify_range ...
* Some of these are hiden magics of MSVC and I tried to avoid them. Should any
* of them become needed, please consult the source code of GSL, and possibily
* some explanations from this thread:
*
* https://github.com/Microsoft/GSL/pull/664
*
* TODO(trivialfis): Group these MSVC workarounds into a manageable place.
*/
#if defined(_MSC_VER) && _MSC_VER < 1910
#define __span_noexcept
#pragma push_macro("constexpr")
#define constexpr /*constexpr*/
#else
#define __span_noexcept noexcept
#endif // defined(_MSC_VER) && _MSC_VER < 1910
namespace xgboost {
namespace common {
// Usual logging facility is not available inside device code.
// TODO(trivialfis): Make dmlc check more generic.
// assert is not supported in mac as of CUDA 10.0
#define KERNEL_CHECK(cond) \
do { \
if (!(cond)) { \
printf("\nKernel error:\n" \
"In: %s: %d\n" \
"\t%s\n\tExpecting: %s\n" \
"\tBlock: [%d, %d, %d], Thread: [%d, %d, %d]\n\n", \
__FILE__, __LINE__, __PRETTY_FUNCTION__, #cond, \
blockIdx.x, blockIdx.y, blockIdx.z, \
threadIdx.x, threadIdx.y, threadIdx.z); \
asm("trap;"); \
} \
} while (0);
#ifdef __CUDA_ARCH__
#define SPAN_CHECK KERNEL_CHECK
#else
#define SPAN_CHECK CHECK // check from dmlc
#endif // __CUDA_ARCH__
namespace detail {
/*!
* By default, XGBoost uses uint32_t for indexing data. int64_t covers all
* values uint32_t can represent. Also, On x86-64 Linux, GCC uses long int to
* represent ptrdiff_t, which is just int64_t. So we make it determinstic
* here.
*/
using ptrdiff_t = int64_t; // NOLINT
} // namespace detail
#if defined(_MSC_VER) && _MSC_VER < 1910
constexpr const detail::ptrdiff_t dynamic_extent = -1; // NOLINT
#else
constexpr detail::ptrdiff_t dynamic_extent = -1; // NOLINT
#endif // defined(_MSC_VER) && _MSC_VER < 1910
enum class byte : unsigned char {}; // NOLINT
template <class ElementType, detail::ptrdiff_t Extent>
class Span;
namespace detail {
template <typename SpanType, bool IsConst>
class SpanIterator {
using ElementType = typename SpanType::element_type;
public:
using iterator_category = std::random_access_iterator_tag; // NOLINT
using value_type = typename std::remove_cv<ElementType>::type; // NOLINT
using difference_type = typename SpanType::index_type; // NOLINT
using reference = typename std::conditional< // NOLINT
IsConst, const ElementType, ElementType>::type&;
using pointer = typename std::add_pointer<reference>::type; // NOLINT
XGBOOST_DEVICE constexpr SpanIterator() : span_{nullptr}, index_{0} {}
XGBOOST_DEVICE constexpr SpanIterator(
const SpanType* _span,
typename SpanType::index_type _idx) __span_noexcept :
span_(_span), index_(_idx) {}
friend SpanIterator<SpanType, true>;
template <bool B, typename std::enable_if<!B && IsConst>::type* = nullptr>
XGBOOST_DEVICE constexpr SpanIterator( // NOLINT
const SpanIterator<SpanType, B>& other_) __span_noexcept
: SpanIterator(other_.span_, other_.index_) {}
XGBOOST_DEVICE reference operator*() const {
SPAN_CHECK(index_ < span_->size());
return *(span_->data() + index_);
}
XGBOOST_DEVICE reference operator[](difference_type n) const {
return *(*this + n);
}
XGBOOST_DEVICE pointer operator->() const {
SPAN_CHECK(index_ != span_->size());
return span_->data() + index_;
}
XGBOOST_DEVICE SpanIterator& operator++() {
SPAN_CHECK(0 <= index_ && index_ != span_->size());
index_++;
return *this;
}
XGBOOST_DEVICE SpanIterator operator++(int) {
auto ret = *this;
++(*this);
return ret;
}
XGBOOST_DEVICE SpanIterator& operator--() {
SPAN_CHECK(index_ != 0 && index_ <= span_->size());
index_--;
return *this;
}
XGBOOST_DEVICE SpanIterator operator--(int) {
auto ret = *this;
--(*this);
return ret;
}
XGBOOST_DEVICE SpanIterator operator+(difference_type n) const {
auto ret = *this;
return ret += n;
}
XGBOOST_DEVICE SpanIterator& operator+=(difference_type n) {
SPAN_CHECK((index_ + n) >= 0 && (index_ + n) <= span_->size());
index_ += n;
return *this;
}
XGBOOST_DEVICE difference_type operator-(SpanIterator rhs) const {
SPAN_CHECK(span_ == rhs.span_);
return index_ - rhs.index_;
}
XGBOOST_DEVICE SpanIterator operator-(difference_type n) const {
auto ret = *this;
return ret -= n;
}
XGBOOST_DEVICE SpanIterator& operator-=(difference_type n) {
return *this += -n;
}
// friends
XGBOOST_DEVICE constexpr friend bool operator==(
SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {
return _lhs.span_ == _rhs.span_ && _lhs.index_ == _rhs.index_;
}
XGBOOST_DEVICE constexpr friend bool operator!=(
SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {
return !(_lhs == _rhs);
}
XGBOOST_DEVICE constexpr friend bool operator<(
SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {
return _lhs.index_ < _rhs.index_;
}
XGBOOST_DEVICE constexpr friend bool operator<=(
SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {
return !(_rhs < _lhs);
}
XGBOOST_DEVICE constexpr friend bool operator>(
SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {
return _rhs < _lhs;
}
XGBOOST_DEVICE constexpr friend bool operator>=(
SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {
return !(_rhs > _lhs);
}
protected:
const SpanType *span_;
detail::ptrdiff_t index_;
};
// It's tempting to use constexpr instead of structs to do the following meta
// programming. But remember that we are supporting MSVC 2013 here.
/*!
* The extent E of the span returned by subspan is determined as follows:
*
* - If Count is not dynamic_extent, Count;
* - Otherwise, if Extent is not dynamic_extent, Extent - Offset;
* - Otherwise, dynamic_extent.
*/
template <detail::ptrdiff_t Extent,
detail::ptrdiff_t Offset,
detail::ptrdiff_t Count>
struct ExtentValue : public std::integral_constant<
detail::ptrdiff_t, Count != dynamic_extent ?
Count : (Extent != dynamic_extent ? Extent - Offset : Extent)> {};
/*!
* If N is dynamic_extent, the extent of the returned span E is also
* dynamic_extent; otherwise it is detail::ptrdiff_t(sizeof(T)) * N.
*/
template <typename T, detail::ptrdiff_t Extent>
struct ExtentAsBytesValue : public std::integral_constant<
detail::ptrdiff_t,
Extent == dynamic_extent ?
Extent : static_cast<detail::ptrdiff_t>(sizeof(T) * Extent)> {};
template <detail::ptrdiff_t From, detail::ptrdiff_t To>
struct IsAllowedExtentConversion : public std::integral_constant<
bool, From == To || From == dynamic_extent || To == dynamic_extent> {};
template <class From, class To>
struct IsAllowedElementTypeConversion : public std::integral_constant<
bool, std::is_convertible<From(*)[], To(*)[]>::value> {};
template <class T>
struct IsSpanOracle : std::false_type {};
template <class T, detail::ptrdiff_t Extent>
struct IsSpanOracle<Span<T, Extent>> : std::true_type {};
template <class T>
struct IsSpan : public IsSpanOracle<typename std::remove_cv<T>::type> {};
// Re-implement std algorithms here to adopt CUDA.
template <typename T>
struct Less {
XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const {
return _x < _y;
}
};
template <typename T>
struct Greater {
XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const {
return _x > _y;
}
};
template <class InputIt1, class InputIt2,
class Compare =
detail::Less<decltype(std::declval<InputIt1>().operator*())>>
XGBOOST_DEVICE bool LexicographicalCompare(InputIt1 first1, InputIt1 last1,
InputIt2 first2, InputIt2 last2) {
Compare comp;
for (; first1 != last1 && first2 != last2; ++first1, ++first2) {
if (comp(*first1, *first2)) {
return true;
}
if (comp(*first2, *first1)) {
return false;
}
}
return first1 == last1 && first2 != last2;
}
} // namespace detail
/*!
* \brief span class implementation, based on ISO++20 span<T>. The interface
* should be the same.
*
* What's different from span<T> in Guidelines Support Library (GSL)
*
* Interface might be slightly different, we stick with ISO.
*
* GSL uses C++14/17 features, which are not available here.
* GSL uses constexpr extensively, which is not possibile with limitation
* of C++11.
* GSL doesn't concern about CUDA.
*
* GSL is more thoroughly implemented and tested.
* GSL is more optimized, especially for static extent.
*
* GSL uses __buildin_unreachable() when error, Span<T> uses dmlc LOG and
* customized CUDA logging.
*
*
* What's different from span<T> in ISO++20 (ISO)
*
* ISO uses functions/structs from std library, which might be not available
* in CUDA.
* Initializing from std::array is not supported.
*
* ISO uses constexpr extensively, which is not possibile with limitation
* of C++11.
* ISO uses C++14/17 features, which is not available here.
* ISO doesn't concern about CUDA.
*
* ISO uses std::terminate(), Span<T> uses dmlc LOG and customized CUDA
* logging.
*
*
* Limitations:
* With thrust:
* It's not adviced to initialize Span with host_vector directly, since
* host_vector::data() is a host function.
* It's not possible to initialize Span with device_vector directly, since
* device_vector::data() returns a wrapped pointer.
* It's unclear that what kind of thrust algorithm can be used without
* memory error. See the test case "GPUSpan.WithTrust"
*
* Pass iterator to kernel:
* Not possible. Use subspan instead.
*
* The underlying Span in SpanIterator is a pointer, but CUDA pass kernel
* parameter by value. If we were to hold a Span value instead of a
* pointer, the following snippet will crash, violating the safety
* purpose of Span:
*
* \code{.cpp}
* Span<float> span {arr_a};
* auto beg = span.begin();
*
* Span<float> span_b = arr_b;
* span = span_b;
*
* delete arr_a;
* beg++; // crash
* \endcode
*
* While hoding a pointer or reference should avoid the problem, its a
* compromise. Since we have subspan, it's acceptable not to support
* passing iterator.
*/
template <typename T,
detail::ptrdiff_t Extent = dynamic_extent>
class Span {
public:
using element_type = T; // NOLINT
using value_type = typename std::remove_cv<T>::type; // NOLINT
using index_type = detail::ptrdiff_t; // NOLINT
using difference_type = detail::ptrdiff_t; // NOLINT
using pointer = T*; // NOLINT
using reference = T&; // NOLINT
using iterator = detail::SpanIterator<Span<T, Extent>, false>; // NOLINT
using const_iterator = const detail::SpanIterator<Span<T, Extent>, true>; // NOLINT
using reverse_iterator = detail::SpanIterator<Span<T, Extent>, false>; // NOLINT
using const_reverse_iterator = const detail::SpanIterator<Span<T, Extent>, true>; // NOLINT
// constructors
XGBOOST_DEVICE constexpr Span() __span_noexcept : size_(0), data_(nullptr) {}
XGBOOST_DEVICE Span(pointer _ptr, index_type _count) :
size_(_count), data_(_ptr) {
SPAN_CHECK(_count >= 0);
SPAN_CHECK(_ptr || _count == 0);
}
XGBOOST_DEVICE Span(pointer _first, pointer _last) :
size_(_last - _first), data_(_first) {
SPAN_CHECK(size_ >= 0);
SPAN_CHECK(data_ || size_ == 0);
}
template <std::size_t N>
XGBOOST_DEVICE constexpr Span(element_type (&arr)[N]) // NOLINT
__span_noexcept : size_(N), data_(&arr[0]) {}
template <class Container,
class = typename std::enable_if<
!std::is_const<element_type>::value && !detail::IsSpan<Container>::value &&
std::is_convertible<typename Container::pointer,
pointer>::value &&
std::is_convertible<
typename Container::pointer,
decltype(std::declval<Container>().data())>::value>>
XGBOOST_DEVICE Span(Container& _cont) : // NOLINT
size_(_cont.size()), data_(_cont.data()) {}
template <class Container,
class = typename std::enable_if<
std::is_const<element_type>::value && !detail::IsSpan<Container>::value &&
std::is_convertible<typename Container::pointer, pointer>::value &&
std::is_convertible<
typename Container::pointer,
decltype(std::declval<Container>().data())>::value>>
XGBOOST_DEVICE Span(const Container& _cont) : size_(_cont.size()), // NOLINT
data_(_cont.data()) {}
template <class U, detail::ptrdiff_t OtherExtent,
class = typename std::enable_if<
detail::IsAllowedElementTypeConversion<U, T>::value &&
detail::IsAllowedExtentConversion<OtherExtent, Extent>::value>>
XGBOOST_DEVICE constexpr Span(const Span<U, OtherExtent>& _other) // NOLINT
__span_noexcept : size_(_other.size()), data_(_other.data()) {}
XGBOOST_DEVICE constexpr Span(const Span& _other)
__span_noexcept : size_(_other.size()), data_(_other.data()) {}
XGBOOST_DEVICE Span& operator=(const Span& _other) __span_noexcept {
size_ = _other.size();
data_ = _other.data();
return *this;
}
XGBOOST_DEVICE ~Span() __span_noexcept {}; // NOLINT
XGBOOST_DEVICE constexpr iterator begin() const __span_noexcept { // NOLINT
return {this, 0};
}
XGBOOST_DEVICE constexpr iterator end() const __span_noexcept { // NOLINT
return {this, size()};
}
XGBOOST_DEVICE constexpr const_iterator cbegin() const __span_noexcept { // NOLINT
return {this, 0};
}
XGBOOST_DEVICE constexpr const_iterator cend() const __span_noexcept { // NOLINT
return {this, size()};
}
XGBOOST_DEVICE constexpr reverse_iterator rbegin() const __span_noexcept { // NOLINT
return reverse_iterator{end()};
}
XGBOOST_DEVICE constexpr reverse_iterator rend() const __span_noexcept { // NOLINT
return reverse_iterator{begin()};
}
XGBOOST_DEVICE constexpr const_reverse_iterator crbegin() const __span_noexcept { // NOLINT
return const_reverse_iterator{cend()};
}
XGBOOST_DEVICE constexpr const_reverse_iterator crend() const __span_noexcept { // NOLINT
return const_reverse_iterator{cbegin()};
}
XGBOOST_DEVICE reference operator[](index_type _idx) const {
SPAN_CHECK(_idx >= 0 && _idx < size());
return data()[_idx];
}
XGBOOST_DEVICE reference operator()(index_type _idx) const {
return this->operator[](_idx);
}
XGBOOST_DEVICE constexpr pointer data() const __span_noexcept { // NOLINT
return data_;
}
// Observers
XGBOOST_DEVICE constexpr index_type size() const __span_noexcept { // NOLINT
return size_;
}
XGBOOST_DEVICE constexpr index_type size_bytes() const __span_noexcept { // NOLINT
return size() * sizeof(T);
}
XGBOOST_DEVICE constexpr bool empty() const __span_noexcept { // NOLINT
return size() == 0;
}
// Subviews
template <detail::ptrdiff_t Count >
XGBOOST_DEVICE Span<element_type, Count> first() const { // NOLINT
SPAN_CHECK(Count >= 0 && Count <= size());
return {data(), Count};
}
XGBOOST_DEVICE Span<element_type, dynamic_extent> first( // NOLINT
detail::ptrdiff_t _count) const {
SPAN_CHECK(_count >= 0 && _count <= size());
return {data(), _count};
}
template <detail::ptrdiff_t Count >
XGBOOST_DEVICE Span<element_type, Count> last() const { // NOLINT
SPAN_CHECK(Count >=0 && size() - Count >= 0);
return {data() + size() - Count, Count};
}
XGBOOST_DEVICE Span<element_type, dynamic_extent> last( // NOLINT
detail::ptrdiff_t _count) const {
SPAN_CHECK(_count >= 0 && _count <= size());
return subspan(size() - _count, _count);
}
/*!
* If Count is std::dynamic_extent, r.size() == this->size() - Offset;
* Otherwise r.size() == Count.
*/
template <detail::ptrdiff_t Offset,
detail::ptrdiff_t Count = dynamic_extent>
XGBOOST_DEVICE auto subspan() const -> // NOLINT
Span<element_type,
detail::ExtentValue<Extent, Offset, Count>::value> {
SPAN_CHECK(Offset >= 0 && (Offset < size() || size() == 0));
SPAN_CHECK(Count == dynamic_extent ||
(Count >= 0 && Offset + Count <= size()));
return {data() + Offset, Count == dynamic_extent ? size() - Offset : Count};
}
XGBOOST_DEVICE Span<element_type, dynamic_extent> subspan( // NOLINT
detail::ptrdiff_t _offset,
detail::ptrdiff_t _count = dynamic_extent) const {
SPAN_CHECK(_offset >= 0 && (_offset < size() || size() == 0));
SPAN_CHECK((_count == dynamic_extent) ||
(_count >= 0 && _offset + _count <= size()));
return {data() + _offset, _count ==
dynamic_extent ? size() - _offset : _count};
}
private:
index_type size_;
pointer data_;
};
template <class T, detail::ptrdiff_t X, class U, detail::ptrdiff_t Y>
XGBOOST_DEVICE bool operator==(Span<T, X> l, Span<U, Y> r) {
if (l.size() != r.size()) {
return false;
}
for (auto l_beg = l.cbegin(), r_beg = r.cbegin(); l_beg != l.cend();
++l_beg, ++r_beg) {
if (*l_beg != *r_beg) {
return false;
}
}
return true;
}
template <class T, detail::ptrdiff_t X, class U, detail::ptrdiff_t Y>
XGBOOST_DEVICE constexpr bool operator!=(Span<T, X> l, Span<U, Y> r) {
return !(l == r);
}
template <class T, detail::ptrdiff_t X, class U, detail::ptrdiff_t Y>
XGBOOST_DEVICE constexpr bool operator<(Span<T, X> l, Span<U, Y> r) {
return detail::LexicographicalCompare(l.begin(), l.end(),
r.begin(), r.end());
}
template <class T, detail::ptrdiff_t X, class U, detail::ptrdiff_t Y>
XGBOOST_DEVICE constexpr bool operator<=(Span<T, X> l, Span<U, Y> r) {
return !(l > r);
}
template <class T, detail::ptrdiff_t X, class U, detail::ptrdiff_t Y>
XGBOOST_DEVICE constexpr bool operator>(Span<T, X> l, Span<U, Y> r) {
return detail::LexicographicalCompare<
typename Span<T, X>::iterator, typename Span<U, Y>::iterator,
detail::Greater<typename Span<T, X>::element_type>>(l.begin(), l.end(),
r.begin(), r.end());
}
template <class T, detail::ptrdiff_t X, class U, detail::ptrdiff_t Y>
XGBOOST_DEVICE constexpr bool operator>=(Span<T, X> l, Span<U, Y> r) {
return !(l < r);
}
template <class T, detail::ptrdiff_t E>
XGBOOST_DEVICE auto as_bytes(Span<T, E> s) __span_noexcept -> // NOLINT
Span<const byte, detail::ExtentAsBytesValue<T, E>::value> {
return {reinterpret_cast<const byte*>(s.data()), s.size_bytes()};
}
template <class T, detail::ptrdiff_t E>
XGBOOST_DEVICE auto as_writable_bytes(Span<T, E> s) __span_noexcept -> // NOLINT
Span<byte, detail::ExtentAsBytesValue<T, E>::value> {
return {reinterpret_cast<byte*>(s.data()), s.size_bytes()};
}
} // namespace common
} // namespace xgboost
#if defined(_MSC_VER) &&_MSC_VER < 1910
#undef constexpr
#pragma pop_macro("constexpr")
#undef __span_noexcept
#endif // _MSC_VER < 1910
#endif // XGBOOST_COMMON_SPAN_H_

View File

@@ -10,9 +10,10 @@
#include <vector>
#include <type_traits> // enable_if
#include "host_device_vector.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/span.h"
#include "common.h"
#include "span.h"
#if defined (__CUDACC__)
#include "device_helpers.cuh"

View File

@@ -13,7 +13,8 @@
#include "xgboost/data.h"
#include "xgboost/json.h"
#include "xgboost/logging.h"
#include "../common/span.h"
#include "xgboost/span.h"
#include "../common/bitfield.h"
namespace xgboost {

View File

@@ -11,6 +11,7 @@
#include "./simple_dmatrix.h"
#include "./simple_csr_source.h"
#include "../common/io.h"
#include "../common/group_data.h"
#if DMLC_ENABLE_STD_THREAD
#include "./sparse_page_source.h"
@@ -322,7 +323,35 @@ data::SparsePageFormat::DecideFormat(const std::string& cache_prefix) {
return std::make_pair(raw, raw);
}
}
SparsePage SparsePage::GetTranspose(int num_columns) const {
SparsePage transpose;
common::ParallelGroupBuilder<Entry> builder(&transpose.offset.HostVector(),
&transpose.data.HostVector());
const int nthread = omp_get_max_threads();
builder.InitBudget(num_columns, nthread);
long batch_size = static_cast<long>(this->Size()); // NOLINT(*)
#pragma omp parallel for default(none) shared(batch_size, builder) schedule(static)
for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
int tid = omp_get_thread_num();
auto inst = (*this)[i];
for (const auto& entry : inst) {
builder.AddBudget(entry.index, tid);
}
}
builder.InitStorage();
#pragma omp parallel for default(none) shared(batch_size, builder) schedule(static)
for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
int tid = omp_get_thread_num();
auto inst = (*this)[i];
for (const auto& entry : inst) {
builder.Push(
entry.index,
Entry(static_cast<bst_uint>(this->base_rowid + i), entry.fvalue),
tid);
}
}
return transpose;
}
void SparsePage::Push(const SparsePage &batch) {
auto& data_vec = data.HostVector();
auto& offset_vec = offset.HostVector();

View File

@@ -21,6 +21,7 @@
#include <vector>
#include "sparse_page_writer.h"
#include "../common/common.h"
namespace {

View File

@@ -10,10 +10,13 @@
#include <xgboost/gbm.h>
#include <xgboost/logging.h>
#include <xgboost/linear_updater.h>
#include <vector>
#include <string>
#include <sstream>
#include <algorithm>
#include "gblinear_model.h"
#include "../common/timer.h"
namespace xgboost {

View File

@@ -6,11 +6,6 @@
*/
#include <dmlc/omp.h>
#include <dmlc/parameter.h>
#include <dmlc/timer.h>
#include <xgboost/logging.h>
#include <xgboost/gbm.h>
#include <xgboost/predictor.h>
#include <xgboost/tree_updater.h>
#include <vector>
#include <memory>
@@ -19,11 +14,16 @@
#include <limits>
#include <algorithm>
#include "../common/common.h"
#include "../common/host_device_vector.h"
#include "../common/random.h"
#include "xgboost/logging.h"
#include "xgboost/gbm.h"
#include "xgboost/predictor.h"
#include "xgboost/tree_updater.h"
#include "xgboost/host_device_vector.h"
#include "gbtree.h"
#include "gbtree_model.h"
#include "../common/common.h"
#include "../common/random.h"
#include "../common/timer.h"

View File

@@ -23,8 +23,9 @@
#include <string>
#include "gbtree_model.h"
#include "xgboost/host_device_vector.h"
#include "../common/common.h"
#include "../common/host_device_vector.h"
#include "../common/timer.h"
namespace xgboost {

View File

@@ -19,11 +19,12 @@
#include <ios>
#include <utility>
#include <vector>
#include "./common/common.h"
#include "./common/host_device_vector.h"
#include "./common/io.h"
#include "./common/random.h"
#include "./common/timer.h"
#include "xgboost/host_device_vector.h"
#include "common/common.h"
#include "common/io.h"
#include "common/random.h"
#include "common/timer.h"
namespace {

View File

@@ -7,12 +7,13 @@
#include <thrust/inner_product.h>
#include <xgboost/data.h>
#include <xgboost/linear_updater.h>
#include "xgboost/span.h"
#include "coordinate_common.h"
#include "../common/common.h"
#include "../common/span.h"
#include "../common/device_helpers.cuh"
#include "../common/timer.h"
#include "./param.h"
#include "coordinate_common.h"
namespace xgboost {
namespace linear {

View File

@@ -11,7 +11,7 @@
#include <vector>
#include "../common/host_device_vector.h"
#include "xgboost/host_device_vector.h"
#include "../common/math.h"
namespace {

View File

@@ -4,12 +4,13 @@
* \brief Provides an implementation of the hinge loss function
* \author Henry Gouk
*/
#include <xgboost/objective.h>
#include "xgboost/objective.h"
#include "xgboost/span.h"
#include "xgboost/host_device_vector.h"
#include "../common/math.h"
#include "../common/transform.h"
#include "../common/common.h"
#include "../common/span.h"
#include "../common/host_device_vector.h"
namespace xgboost {
namespace obj {

View File

@@ -6,7 +6,7 @@
#include <xgboost/objective.h>
#include <dmlc/registry.h>
#include "../common/host_device_vector.h"
#include "xgboost/host_device_vector.h"
namespace dmlc {
DMLC_REGISTRY_ENABLE(::xgboost::ObjFunctionReg);

View File

@@ -12,10 +12,11 @@
#include <memory>
#include <vector>
#include "../common/span.h"
#include "xgboost/span.h"
#include "xgboost/host_device_vector.h"
#include "../common/transform.h"
#include "../common/common.h"
#include "../common/host_device_vector.h"
#include "./regression_loss.h"

View File

@@ -1,11 +1,13 @@
/*!
* Copyright by Contributors 2017
*/
#include <xgboost/predictor.h>
#include <xgboost/tree_model.h>
#include <xgboost/tree_updater.h>
#include "dmlc/logging.h"
#include "../common/host_device_vector.h"
#include "xgboost/predictor.h"
#include "xgboost/tree_model.h"
#include "xgboost/tree_updater.h"
#include "xgboost/logging.h"
#include "xgboost/host_device_vector.h"
#include "../gbm/gbtree_model.h"
namespace xgboost {
namespace predictor {

View File

@@ -6,14 +6,17 @@
#include <thrust/device_ptr.h>
#include <thrust/device_vector.h>
#include <thrust/fill.h>
#include <xgboost/data.h>
#include <xgboost/predictor.h>
#include <xgboost/tree_model.h>
#include <xgboost/tree_updater.h>
#include <memory>
#include "xgboost/data.h"
#include "xgboost/predictor.h"
#include "xgboost/tree_model.h"
#include "xgboost/tree_updater.h"
#include "xgboost/host_device_vector.h"
#include "../gbm/gbtree_model.h"
#include "../common/common.h"
#include "../common/device_helpers.cuh"
#include "../common/host_device_vector.h"
namespace xgboost {
namespace predictor {

View File

@@ -6,17 +6,16 @@
#include <thrust/execution_policy.h>
#include <thrust/iterator/counting_iterator.h>
#include <xgboost/logging.h>
#include <algorithm>
#include <bitset>
#include <string>
#include <sstream>
#include <set>
#include "xgboost/logging.h"
#include "xgboost/span.h"
#include "constraints.cuh"
#include "param.h"
#include "../common/span.h"
#include "../common/device_helpers.cuh"

View File

@@ -12,7 +12,7 @@
#include <vector>
#include "param.h"
#include "../common/span.h"
#include "xgboost/span.h"
#include "../common/bitfield.h"
#include "../common/device_helpers.cuh"

View File

@@ -3,10 +3,8 @@
* \file split_evaluator.cc
* \brief Contains implementations of different split evaluators.
*/
#include "split_evaluator.h"
#include <dmlc/json.h>
#include <dmlc/registry.h>
#include <xgboost/logging.h>
#include <algorithm>
#include <unordered_set>
#include <vector>
@@ -15,9 +13,12 @@
#include <string>
#include <sstream>
#include <utility>
#include "xgboost/logging.h"
#include "xgboost/host_device_vector.h"
#include "param.h"
#include "split_evaluator.h"
#include "../common/common.h"
#include "../common/host_device_vector.h"
namespace dmlc {
DMLC_REGISTRY_ENABLE(::xgboost::tree::SplitEvaluatorReg);

View File

@@ -3,10 +3,10 @@
* \file tree_updater.cc
* \brief Registry of tree updaters.
*/
#include <xgboost/tree_updater.h>
#include <dmlc/registry.h>
#include "../common/host_device_vector.h"
#include "xgboost/tree_updater.h"
#include "xgboost/host_device_vector.h"
namespace dmlc {
DMLC_REGISTRY_ENABLE(::xgboost::TreeUpdaterReg);

View File

@@ -14,13 +14,15 @@
#include <queue>
#include <utility>
#include <vector>
#include "xgboost/host_device_vector.h"
#include "xgboost/span.h"
#include "../common/common.h"
#include "../common/compressed_iterator.h"
#include "../common/device_helpers.cuh"
#include "../common/hist_util.h"
#include "../common/host_device_vector.h"
#include "../common/timer.h"
#include "../common/span.h"
#include "../data/ellpack_page.cuh"
#include "param.h"
#include "updater_gpu_common.cuh"