Extend array interface to handle ndarray. (#7434)
* Extend array interface to handle ndarray. The `ArrayInterface` class is extended to support multi-dim array inputs. Previously this class handles only 2-dim (vector is also matrix). This PR specifies the expected dimension at compile-time and the array interface can perform various checks automatically for input data. Also, adapters like CSR are more rigorous about their input. Lastly, row vector and column vector are handled without intervention from the caller.
This commit is contained in:
@@ -41,9 +41,10 @@ TEST(Adapter, CSRArrayAdapter) {
|
||||
HostDeviceVector<bst_feature_t> indices;
|
||||
size_t n_features = 100, n_samples = 10;
|
||||
RandomDataGenerator{n_samples, n_features, 0.5}.GenerateCSR(&values, &indptr, &indices);
|
||||
auto indptr_arr = MakeArrayInterface(indptr.HostPointer(), indptr.Size());
|
||||
auto values_arr = MakeArrayInterface(values.HostPointer(), values.Size());
|
||||
auto indices_arr = MakeArrayInterface(indices.HostPointer(), indices.Size());
|
||||
using linalg::MakeVec;
|
||||
auto indptr_arr = MakeVec(indptr.HostPointer(), indptr.Size()).ArrayInterfaceStr();
|
||||
auto values_arr = MakeVec(values.HostPointer(), values.Size()).ArrayInterfaceStr();
|
||||
auto indices_arr = MakeVec(indices.HostPointer(), indices.Size()).ArrayInterfaceStr();
|
||||
auto adapter = data::CSRArrayAdapter(
|
||||
StringView{indptr_arr.c_str(), indptr_arr.size()},
|
||||
StringView{values_arr.c_str(), values_arr.size()},
|
||||
|
||||
@@ -11,21 +11,22 @@ TEST(ArrayInterface, Initialize) {
|
||||
size_t constexpr kRows = 10, kCols = 10;
|
||||
HostDeviceVector<float> storage;
|
||||
auto array = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage);
|
||||
auto arr_interface = ArrayInterface(array);
|
||||
ASSERT_EQ(arr_interface.num_rows, kRows);
|
||||
ASSERT_EQ(arr_interface.num_cols, kCols);
|
||||
auto arr_interface = ArrayInterface<2>(StringView{array});
|
||||
ASSERT_EQ(arr_interface.Shape(0), kRows);
|
||||
ASSERT_EQ(arr_interface.Shape(1), kCols);
|
||||
ASSERT_EQ(arr_interface.data, storage.ConstHostPointer());
|
||||
ASSERT_EQ(arr_interface.ElementSize(), 4);
|
||||
ASSERT_EQ(arr_interface.type, ArrayInterface::kF4);
|
||||
ASSERT_EQ(arr_interface.type, ArrayInterfaceHandler::kF4);
|
||||
|
||||
HostDeviceVector<size_t> u64_storage(storage.Size());
|
||||
std::string u64_arr_str;
|
||||
Json::Dump(GetArrayInterface(&u64_storage, kRows, kCols), &u64_arr_str);
|
||||
std::string u64_arr_str{linalg::TensorView<size_t const, 2>{
|
||||
u64_storage.ConstHostSpan(), {kRows, kCols}, GenericParameter::kCpuId}
|
||||
.ArrayInterfaceStr()};
|
||||
std::copy(storage.ConstHostVector().cbegin(), storage.ConstHostVector().cend(),
|
||||
u64_storage.HostSpan().begin());
|
||||
auto u64_arr = ArrayInterface{u64_arr_str};
|
||||
auto u64_arr = ArrayInterface<2>{u64_arr_str};
|
||||
ASSERT_EQ(u64_arr.ElementSize(), 8);
|
||||
ASSERT_EQ(u64_arr.type, ArrayInterface::kU8);
|
||||
ASSERT_EQ(u64_arr.type, ArrayInterfaceHandler::kU8);
|
||||
}
|
||||
|
||||
TEST(ArrayInterface, Error) {
|
||||
@@ -38,23 +39,22 @@ TEST(ArrayInterface, Error) {
|
||||
Json(Boolean(false))};
|
||||
|
||||
auto const& column_obj = get<Object>(column);
|
||||
std::pair<size_t, size_t> shape{kRows, kCols};
|
||||
std::string typestr{"<f4"};
|
||||
size_t n = kRows * kCols;
|
||||
|
||||
// missing version
|
||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, shape),
|
||||
dmlc::Error);
|
||||
column["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n), dmlc::Error);
|
||||
column["version"] = 3;
|
||||
// missing data
|
||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, shape),
|
||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),
|
||||
dmlc::Error);
|
||||
column["data"] = j_data;
|
||||
// missing typestr
|
||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, shape),
|
||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),
|
||||
dmlc::Error);
|
||||
column["typestr"] = String("<f4");
|
||||
// nullptr is not valid
|
||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, shape),
|
||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),
|
||||
dmlc::Error);
|
||||
|
||||
HostDeviceVector<float> storage;
|
||||
@@ -63,22 +63,52 @@ TEST(ArrayInterface, Error) {
|
||||
Json(Integer(reinterpret_cast<Integer::Int>(storage.ConstHostPointer()))),
|
||||
Json(Boolean(false))};
|
||||
column["data"] = j_data;
|
||||
EXPECT_NO_THROW(ArrayInterfaceHandler::ExtractData(column_obj, shape));
|
||||
EXPECT_NO_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n));
|
||||
}
|
||||
|
||||
TEST(ArrayInterface, GetElement) {
|
||||
size_t kRows = 4, kCols = 2;
|
||||
HostDeviceVector<float> storage;
|
||||
auto intefrace_str = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage);
|
||||
ArrayInterface array_interface{intefrace_str};
|
||||
ArrayInterface<2> array_interface{intefrace_str};
|
||||
|
||||
auto const& h_storage = storage.ConstHostVector();
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
for (size_t j = 0; j < kCols; ++j) {
|
||||
float v0 = array_interface.GetElement(i, j);
|
||||
float v0 = array_interface(i, j);
|
||||
float v1 = h_storage.at(i * kCols + j);
|
||||
ASSERT_EQ(v0, v1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ArrayInterface, TrivialDim) {
|
||||
size_t kRows{1000}, kCols = 1;
|
||||
HostDeviceVector<float> storage;
|
||||
auto interface_str = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage);
|
||||
{
|
||||
ArrayInterface<1> arr_i{interface_str};
|
||||
ASSERT_EQ(arr_i.n, kRows);
|
||||
ASSERT_EQ(arr_i.Shape(0), kRows);
|
||||
}
|
||||
|
||||
std::swap(kRows, kCols);
|
||||
interface_str = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage);
|
||||
{
|
||||
ArrayInterface<1> arr_i{interface_str};
|
||||
ASSERT_EQ(arr_i.n, kCols);
|
||||
ASSERT_EQ(arr_i.Shape(0), kCols);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ArrayInterface, ToDType) {
|
||||
static_assert(ToDType<float>::kType == ArrayInterfaceHandler::kF4, "");
|
||||
static_assert(ToDType<double>::kType == ArrayInterfaceHandler::kF8, "");
|
||||
|
||||
static_assert(ToDType<uint32_t>::kType == ArrayInterfaceHandler::kU4, "");
|
||||
static_assert(ToDType<uint64_t>::kType == ArrayInterfaceHandler::kU8, "");
|
||||
|
||||
static_assert(ToDType<int32_t>::kType == ArrayInterfaceHandler::kI4, "");
|
||||
static_assert(ToDType<int64_t>::kType == ArrayInterfaceHandler::kI8, "");
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -32,11 +32,24 @@ TEST(ArrayInterface, Stream) {
|
||||
dh::caching_device_vector<uint64_t> out(1, 0);
|
||||
uint64_t dur = 1e9;
|
||||
dh::LaunchKernel{1, 1, 0, stream}(SleepForTest, out.data().get(), dur);
|
||||
ArrayInterface arr(arr_str);
|
||||
ArrayInterface<2> arr(arr_str);
|
||||
|
||||
auto t = out[0];
|
||||
CHECK_GE(t, dur);
|
||||
|
||||
cudaStreamDestroy(stream);
|
||||
}
|
||||
|
||||
TEST(ArrayInterface, Ptr) {
|
||||
std::vector<float> h_data(10);
|
||||
ASSERT_FALSE(ArrayInterfaceHandler::IsCudaPtr(h_data.data()));
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
|
||||
dh::device_vector<float> d_data(10);
|
||||
ASSERT_TRUE(ArrayInterfaceHandler::IsCudaPtr(d_data.data().get()));
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
|
||||
ASSERT_FALSE(ArrayInterfaceHandler::IsCudaPtr(nullptr));
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -19,6 +19,7 @@ Json GenerateDenseColumn(std::string const& typestr, size_t kRows,
|
||||
std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};
|
||||
column["shape"] = Array(j_shape);
|
||||
column["strides"] = Array(std::vector<Json>{Json(Integer(static_cast<Integer::Int>(sizeof(T))))});
|
||||
column["stream"] = nullptr;
|
||||
|
||||
d_data.resize(kRows);
|
||||
thrust::sequence(thrust::device, d_data.begin(), d_data.end(), 0.0f, 2.0f);
|
||||
@@ -30,7 +31,7 @@ Json GenerateDenseColumn(std::string const& typestr, size_t kRows,
|
||||
Json(Boolean(false))};
|
||||
column["data"] = j_data;
|
||||
|
||||
column["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
column["version"] = 3;
|
||||
column["typestr"] = String(typestr);
|
||||
return column;
|
||||
}
|
||||
@@ -43,6 +44,7 @@ Json GenerateSparseColumn(std::string const& typestr, size_t kRows,
|
||||
std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};
|
||||
column["shape"] = Array(j_shape);
|
||||
column["strides"] = Array(std::vector<Json>{Json(Integer(static_cast<Integer::Int>(sizeof(T))))});
|
||||
column["stream"] = nullptr;
|
||||
|
||||
d_data.resize(kRows);
|
||||
for (size_t i = 0; i < d_data.size(); ++i) {
|
||||
@@ -56,7 +58,7 @@ Json GenerateSparseColumn(std::string const& typestr, size_t kRows,
|
||||
Json(Boolean(false))};
|
||||
column["data"] = j_data;
|
||||
|
||||
column["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
column["version"] = 3;
|
||||
column["typestr"] = String(typestr);
|
||||
return column;
|
||||
}
|
||||
@@ -75,9 +77,9 @@ Json Generate2dArrayInterface(int rows, int cols, std::string typestr,
|
||||
Json(Integer(reinterpret_cast<Integer::Int>(data.data().get()))),
|
||||
Json(Boolean(false))};
|
||||
array_interface["data"] = j_data;
|
||||
array_interface["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
array_interface["version"] = 3;
|
||||
array_interface["typestr"] = String(typestr);
|
||||
array_interface["stream"] = nullptr;
|
||||
return array_interface;
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -103,9 +103,9 @@ TEST(IterativeDeviceDMatrix, RowMajor) {
|
||||
|
||||
auto j_interface =
|
||||
Json::Load({interface_str.c_str(), interface_str.size()});
|
||||
ArrayInterface loaded {get<Object const>(j_interface)};
|
||||
ArrayInterface<2> loaded {get<Object const>(j_interface)};
|
||||
std::vector<float> h_data(cols * rows);
|
||||
common::Span<float> s_data{static_cast<float*>(loaded.data), cols * rows};
|
||||
common::Span<float const> s_data{static_cast<float const*>(loaded.data), cols * rows};
|
||||
dh::CopyDeviceSpanToVector(&h_data, s_data);
|
||||
|
||||
for(auto i = 0ull; i < rows * cols; i++) {
|
||||
@@ -128,9 +128,9 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
|
||||
std::string interface_str = iter.AsArray();
|
||||
auto j_interface =
|
||||
Json::Load({interface_str.c_str(), interface_str.size()});
|
||||
ArrayInterface loaded {get<Object const>(j_interface)};
|
||||
ArrayInterface<2> loaded {get<Object const>(j_interface)};
|
||||
std::vector<float> h_data(cols * rows);
|
||||
common::Span<float> s_data{static_cast<float*>(loaded.data), cols * rows};
|
||||
common::Span<float const> s_data{static_cast<float const*>(loaded.data), cols * rows};
|
||||
dh::CopyDeviceSpanToVector(&h_data, s_data);
|
||||
h_data[1] = kMissing;
|
||||
h_data[5] = kMissing;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/*! Copyright 2019 by Contributors */
|
||||
/*! Copyright 2019-2021 by XGBoost Contributors */
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
Reference in New Issue
Block a user