Support F order for the tensor type. (#8872)

- Add F order support for tensor and view.
- Use parameter pack for automatic type cast. (avoid excessive static cast for shape).
This commit is contained in:
Jiaming Yuan
2023-03-08 03:27:49 +08:00
committed by GitHub
parent f53055f75e
commit f236640427
9 changed files with 194 additions and 94 deletions

View File

@@ -15,11 +15,11 @@
#include <algorithm>
#include <cassert>
#include <cinttypes> // std::int32_t
#include <cstddef> // std::size_t
#include <cinttypes> // for int32_t
#include <cstddef> // for size_t
#include <limits>
#include <string>
#include <tuple>
#include <tuple> // for make_tuple
#include <type_traits>
#include <utility>
#include <vector>
@@ -37,8 +37,7 @@
#endif // defined (__CUDA__) || defined(__NVCC__)
#endif // LINALG_HD
namespace xgboost {
namespace linalg {
namespace xgboost::linalg {
namespace detail {
struct ArrayInterfaceHandler {
@@ -86,7 +85,7 @@ template <typename I>
struct RangeTag {
I beg;
I end;
constexpr size_t Size() const { return end - beg; }
[[nodiscard]] constexpr size_t Size() const { return end - beg; }
};
/**
@@ -158,14 +157,34 @@ inline LINALG_HD int Popc(uint64_t v) {
#endif // compiler
}
template <std::size_t D, typename Head>
LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head) {
static_assert(std::is_integral<std::remove_reference_t<Head>>::value, "Invalid index type.");
arr[D - 1] = head;
}
/**
* \brief Convert index from parameter pack to C-style array.
*/
template <std::size_t D, typename Head, typename... Rest>
LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head, Rest &&...index) {
static_assert(sizeof...(Rest) < D, "Index overflow.");
static_assert(std::is_integral<std::remove_reference_t<Head>>::value, "Invalid index type.");
arr[D - sizeof...(Rest) - 1] = head;
IndexToArr(arr, std::forward<Rest>(index)...);
}
template <class T, std::size_t N, std::size_t... Idx>
constexpr auto Arr2Tup(T (&arr)[N], std::index_sequence<Idx...>) {
constexpr auto ArrToTuple(T (&arr)[N], std::index_sequence<Idx...>) {
return std::make_tuple(arr[Idx]...);
}
/**
* \brief Convert C-styple array to std::tuple.
*/
template <class T, std::size_t N>
constexpr auto Arr2Tup(T (&arr)[N]) {
return Arr2Tup(arr, std::make_index_sequence<N>{});
constexpr auto ArrToTuple(T (&arr)[N]) {
return ArrToTuple(arr, std::make_index_sequence<N>{});
}
// uint division optimization inspired by the CIndexer in cupy. Division operation is
@@ -188,7 +207,7 @@ LINALG_HD auto UnravelImpl(I idx, common::Span<size_t const, D> shape) {
}
}
index[0] = idx;
return Arr2Tup(index);
return ArrToTuple(index);
}
template <size_t dim, typename I, int32_t D>
@@ -252,6 +271,11 @@ constexpr detail::RangeTag<I> Range(I beg, I end) {
return {beg, end};
}
enum Order : std::uint8_t {
kC, // Row major
kF, // Col major
};
/**
* \brief A tensor view with static type and dimension. It implements indexing and slicing.
*
@@ -377,7 +401,11 @@ class TensorView {
* \param device Device ordinal
*/
template <typename I, int32_t D>
LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], int32_t device)
LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], std::int32_t device)
: TensorView{data, shape, device, Order::kC} {}
template <typename I, int32_t D>
LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], std::int32_t device, Order order)
: data_{data}, ptr_{data_.data()}, device_{device} {
static_assert(D > 0 && D <= kDim, "Invalid shape.");
// shape
@@ -386,7 +414,19 @@ class TensorView {
shape_[i] = 1;
}
// stride
detail::CalcStride(shape_, stride_);
switch (order) {
case Order::kC: {
detail::CalcStride(shape_, stride_);
break;
}
case Order::kF: {
detail::CalcStride<kDim, true>(shape_, stride_);
break;
}
default: {
SPAN_CHECK(false);
}
}
// size
this->CalcSize();
}
@@ -490,17 +530,17 @@ class TensorView {
/**
* \brief Number of items in the tensor.
*/
LINALG_HD size_t Size() const { return size_; }
LINALG_HD [[nodiscard]] std::size_t Size() const { return size_; }
/**
* \brief Whether this is a contiguous array, both C and F contiguous returns true.
*/
LINALG_HD bool Contiguous() const {
LINALG_HD [[nodiscard]] bool Contiguous() const {
return data_.size() == this->Size() || this->CContiguous() || this->FContiguous();
}
/**
* \brief Whether it's a c-contiguous array.
*/
LINALG_HD bool CContiguous() const {
LINALG_HD [[nodiscard]] bool CContiguous() const {
StrideT stride;
static_assert(std::is_same<decltype(stride), decltype(stride_)>::value);
// It's contiguous if the stride can be calculated from shape.
@@ -510,7 +550,7 @@ class TensorView {
/**
* \brief Whether it's a f-contiguous array.
*/
LINALG_HD bool FContiguous() const {
LINALG_HD [[nodiscard]] bool FContiguous() const {
StrideT stride;
static_assert(std::is_same<decltype(stride), decltype(stride_)>::value);
// It's contiguous if the stride can be calculated from shape.
@@ -530,16 +570,38 @@ class TensorView {
/**
* \brief Constructor for automatic type deduction.
*/
template <typename Container, typename I, int32_t D,
std::enable_if_t<!common::detail::IsSpan<Container>::value> * = nullptr>
auto MakeTensorView(Container &data, I const (&shape)[D], int32_t device) { // NOLINT
template <typename Container, typename... S,
std::enable_if_t<!common::detail::IsSpan<Container>::value &&
!std::is_pointer_v<Container>> * = nullptr>
auto MakeTensorView(Context const *ctx, Container &data, S &&...shape) { // NOLINT
using T = typename Container::value_type;
return TensorView<T, D>{data, shape, device};
std::size_t in_shape[sizeof...(S)];
detail::IndexToArr(in_shape, std::forward<S>(shape)...);
return TensorView<T, sizeof...(S)>{data, in_shape, ctx->gpu_id};
}
template <typename T, typename I, int32_t D>
LINALG_HD auto MakeTensorView(common::Span<T> data, I const (&shape)[D], int32_t device) {
return TensorView<T, D>{data, shape, device};
template <typename T, typename... S>
LINALG_HD auto MakeTensorView(std::int32_t device, common::Span<T> data, S &&...shape) {
std::size_t in_shape[sizeof...(S)];
detail::IndexToArr(in_shape, std::forward<S>(shape)...);
return TensorView<T, sizeof...(S)>{data, in_shape, device};
}
template <typename T, typename... S>
auto MakeTensorView(Context const *ctx, common::Span<T> data, S &&...shape) {
return MakeTensorView(ctx->gpu_id, data, std::forward<S>(shape)...);
}
template <typename T, typename... S>
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();
return MakeTensorView(ctx->gpu_id, span, std::forward<S>(shape)...);
}
template <typename T, typename... S>
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();
return MakeTensorView(ctx->gpu_id, span, std::forward<S>(shape)...);
}
/**
@@ -559,6 +621,13 @@ LINALG_HD auto UnravelIndex(size_t idx, std::size_t const (&shape)[D]) {
return UnravelIndex(idx, common::Span<std::size_t const, D>(shape));
}
template <typename... S>
LINALG_HD auto UnravelIndex(std::size_t idx, S... shape) {
std::size_t s[sizeof...(S)];
detail::IndexToArr(s, shape...);
return UnravelIndex(idx, common::Span<std::size_t const, sizeof...(S)>(s));
}
/**
* \brief A view over a vector, specialization of Tensor
*
@@ -676,6 +745,7 @@ class Tensor {
private:
HostDeviceVector<T> data_;
ShapeT shape_{0};
Order order_{Order::kC};
template <typename I, std::int32_t D>
void Initialize(I const (&shape)[D], std::int32_t device) {
@@ -701,11 +771,12 @@ class Tensor {
* See \ref TensorView for parameters of this constructor.
*/
template <typename I, int32_t D>
explicit Tensor(I const (&shape)[D], int32_t device)
: Tensor{common::Span<I const, D>{shape}, device} {}
explicit Tensor(I const (&shape)[D], std::int32_t device, Order order = kC)
: Tensor{common::Span<I const, D>{shape}, device, order} {}
template <typename I, size_t D>
explicit Tensor(common::Span<I const, D> shape, int32_t device) {
explicit Tensor(common::Span<I const, D> shape, std::int32_t device, Order order = kC)
: order_{order} {
// No device unroll as this is a host only function.
std::copy(shape.data(), shape.data() + D, shape_);
for (auto i = D; i < kDim; ++i) {
@@ -724,7 +795,8 @@ class Tensor {
* Initialize from 2 host iterators.
*/
template <typename It, typename I, int32_t D>
explicit Tensor(It begin, It end, I const (&shape)[D], int32_t device) {
explicit Tensor(It begin, It end, I const (&shape)[D], std::int32_t device, Order order = kC)
: order_{order} {
auto &h_vec = data_.HostVector();
h_vec.insert(h_vec.begin(), begin, end);
// shape
@@ -732,8 +804,9 @@ class Tensor {
}
template <typename I, int32_t D>
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D],
int32_t device = Context::kCpuId) {
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], std::int32_t device,
Order order = kC)
: order_{order} {
auto &h_vec = data_.HostVector();
h_vec = data;
// shape
@@ -763,27 +836,27 @@ class Tensor {
if (device >= 0) {
data_.SetDevice(device);
auto span = data_.DeviceSpan();
return {span, shape_, device};
return {span, shape_, device, order_};
} else {
auto span = data_.HostSpan();
return {span, shape_, device};
return {span, shape_, device, order_};
}
}
TensorView<T const, kDim> View(int32_t device) const {
if (device >= 0) {
data_.SetDevice(device);
auto span = data_.ConstDeviceSpan();
return {span, shape_, device};
return {span, shape_, device, order_};
} else {
auto span = data_.ConstHostSpan();
return {span, shape_, device};
return {span, shape_, device, order_};
}
}
auto HostView() const { return this->View(-1); }
auto HostView() { return this->View(-1); }
size_t Size() const { return data_.Size(); }
[[nodiscard]] size_t Size() const { return data_.Size(); }
auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
auto Shape(size_t i) const { return shape_[i]; }
@@ -837,12 +910,26 @@ class Tensor {
void Reshape(size_t (&shape)[D]) {
this->Reshape(common::Span<size_t const, D>{shape});
}
/**
* \brief Get a host view on the slice.
*/
template <typename... S>
auto Slice(S &&...slices) const {
return this->HostView().Slice(std::forward<S>(slices)...);
}
/**
* \brief Get a host view on the slice.
*/
template <typename... S>
auto Slice(S &&...slices) {
return this->HostView().Slice(std::forward<S>(slices)...);
}
/**
* \brief Set device ordinal for this tensor.
*/
void SetDevice(int32_t device) const { data_.SetDevice(device); }
int32_t DeviceIdx() const { return data_.DeviceIdx(); }
[[nodiscard]] int32_t DeviceIdx() const { return data_.DeviceIdx(); }
};
template <typename T>
@@ -900,8 +987,7 @@ void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
shape[0] = l->Shape(0) + r.Shape(0);
});
}
} // namespace linalg
} // namespace xgboost
} // namespace xgboost::linalg
#if defined(LINALG_HD)
#undef LINALG_HD