Support dmatrix construction from cupy array (#5206)

This commit is contained in:
Rory Mitchell
2020-01-22 13:15:27 +13:00
committed by GitHub
parent 2a071cebc5
commit 9c56480c61
19 changed files with 522 additions and 158 deletions

View File

@@ -8,7 +8,6 @@
#include "../../../src/common/bitfield.h"
#include "../../../src/common/device_helpers.cuh"
#include "../../../src/data/simple_csr_source.h"
#include "../../../src/data/columnar.h"
namespace xgboost {
@@ -62,4 +61,24 @@ Json GenerateSparseColumn(std::string const& typestr, size_t kRows,
column["typestr"] = String(typestr);
return column;
}
template <typename T>
Json Generate2dArrayInterface(int rows, int cols, std::string typestr,
thrust::device_vector<T>* p_data) {
auto& data = *p_data;
thrust::sequence(data.begin(), data.end());
Json array_interface{Object()};
std::vector<Json> shape = {Json(static_cast<Integer::Int>(rows)),
Json(static_cast<Integer::Int>(cols))};
array_interface["shape"] = Array(shape);
std::vector<Json> j_data{
Json(Integer(reinterpret_cast<Integer::Int>(data.data().get()))),
Json(Boolean(false))};
array_interface["data"] = j_data;
array_interface["version"] = Integer(static_cast<Integer::Int>(1));
array_interface["typestr"] = String(typestr);
return array_interface;
}
} // namespace xgboost

View File

@@ -7,7 +7,7 @@
#include "../helpers.h"
#include <thrust/device_vector.h>
#include "../../../src/data/device_adapter.cuh"
#include "test_columnar.h"
#include "test_array_interface.h"
using namespace xgboost; // NOLINT
void TestCudfAdapter()

View File

@@ -9,8 +9,7 @@
namespace xgboost {
template <typename T>
std::string PrepareData(std::string typestr, thrust::device_vector<T>* out) {
constexpr size_t kRows = 16;
std::string PrepareData(std::string typestr, thrust::device_vector<T>* out, const size_t kRows=16) {
out->resize(kRows);
auto& d_data = *out;
@@ -66,7 +65,15 @@ TEST(MetaInfo, FromInterface) {
ASSERT_EQ(h_base_margin[i], d_data[i]);
}
EXPECT_ANY_THROW({info.SetInfo("group", str.c_str());});
thrust::device_vector<int> d_group_data;
std::string group_str = PrepareData<int>("<i4", &d_group_data, 4);
d_group_data[0] = 4;
d_group_data[1] = 3;
d_group_data[2] = 2;
d_group_data[3] = 1;
info.SetInfo("group", group_str.c_str());
std::vector<bst_group_t> expected_group_ptr = {0, 4, 7, 9, 10};
EXPECT_EQ(info.group_ptr_, expected_group_ptr);
}
TEST(MetaInfo, Group) {
@@ -83,4 +90,4 @@ TEST(MetaInfo, Group) {
ASSERT_EQ(h_group[i], d_data[i-1] + h_group[i-1]) << "i: " << i;
}
}
} // namespace xgboost
} // namespace xgboost

View File

@@ -6,7 +6,8 @@
#include <thrust/sequence.h>
#include "../../../src/data/device_adapter.cuh"
#include "../helpers.h"
#include "test_columnar.h"
#include "test_array_interface.h"
#include "../../../src/data/array_interface.h"
using namespace xgboost; // NOLINT
@@ -316,3 +317,55 @@ TEST(SimpleDMatrix, FromColumnarSparseBasic) {
}
}
}
TEST(SimpleDMatrix, FromCupy){
int rows = 50;
int cols = 10;
thrust::device_vector< float> data(rows*cols);
auto json_array_interface = Generate2dArrayInterface(rows, cols, "<f4", &data);
std::stringstream ss;
Json::Dump(json_array_interface, &ss);
std::string str = ss.str();
data::CupyAdapter adapter(str);
data::SimpleDMatrix dmat(&adapter, -1, 1);
EXPECT_EQ(dmat.Info().num_col_, cols);
EXPECT_EQ(dmat.Info().num_row_, rows);
EXPECT_EQ(dmat.Info().num_nonzero_, rows*cols);
for (auto& batch : dmat.GetBatches<SparsePage>()) {
for (auto i = 0ull; i < batch.Size(); i++) {
auto inst = batch[i];
for (auto j = 0ull; j < inst.size(); j++) {
EXPECT_EQ(inst[j].fvalue, i * cols + j);
EXPECT_EQ(inst[j].index, j);
}
}
}
}
TEST(SimpleDMatrix, FromCupySparse){
int rows = 2;
int cols = 2;
thrust::device_vector< float> data(rows*cols);
auto json_array_interface = Generate2dArrayInterface(rows, cols, "<f4", &data);
data[1] = std::numeric_limits<float>::quiet_NaN();
data[2] = std::numeric_limits<float>::quiet_NaN();
std::stringstream ss;
Json::Dump(json_array_interface, &ss);
std::string str = ss.str();
data::CupyAdapter adapter(str);
data::SimpleDMatrix dmat(&adapter, -1, 1);
EXPECT_EQ(dmat.Info().num_col_, cols);
EXPECT_EQ(dmat.Info().num_row_, rows);
EXPECT_EQ(dmat.Info().num_nonzero_, rows * cols - 2);
auto& batch = *dmat.GetBatches<SparsePage>().begin();
auto inst0 = batch[0];
auto inst1 = batch[1];
EXPECT_EQ(batch[0].size(), 1);
EXPECT_EQ(batch[1].size(), 1);
EXPECT_EQ(batch[0][0].fvalue, 0.0f);
EXPECT_EQ(batch[0][0].index, 0);
EXPECT_EQ(batch[1][0].fvalue, 3.0f);
EXPECT_EQ(batch[1][0].index, 1);
}