Support dmatrix construction from cupy array (#5206)
This commit is contained in:
@@ -8,7 +8,6 @@
|
||||
#include "../../../src/common/bitfield.h"
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
#include "../../../src/data/simple_csr_source.h"
|
||||
#include "../../../src/data/columnar.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
@@ -62,4 +61,24 @@ Json GenerateSparseColumn(std::string const& typestr, size_t kRows,
|
||||
column["typestr"] = String(typestr);
|
||||
return column;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Json Generate2dArrayInterface(int rows, int cols, std::string typestr,
|
||||
thrust::device_vector<T>* p_data) {
|
||||
auto& data = *p_data;
|
||||
thrust::sequence(data.begin(), data.end());
|
||||
|
||||
Json array_interface{Object()};
|
||||
std::vector<Json> shape = {Json(static_cast<Integer::Int>(rows)),
|
||||
Json(static_cast<Integer::Int>(cols))};
|
||||
array_interface["shape"] = Array(shape);
|
||||
std::vector<Json> j_data{
|
||||
Json(Integer(reinterpret_cast<Integer::Int>(data.data().get()))),
|
||||
Json(Boolean(false))};
|
||||
array_interface["data"] = j_data;
|
||||
array_interface["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
array_interface["typestr"] = String(typestr);
|
||||
return array_interface;
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
@@ -7,7 +7,7 @@
|
||||
#include "../helpers.h"
|
||||
#include <thrust/device_vector.h>
|
||||
#include "../../../src/data/device_adapter.cuh"
|
||||
#include "test_columnar.h"
|
||||
#include "test_array_interface.h"
|
||||
using namespace xgboost; // NOLINT
|
||||
|
||||
void TestCudfAdapter()
|
||||
|
||||
@@ -9,8 +9,7 @@
|
||||
namespace xgboost {
|
||||
|
||||
template <typename T>
|
||||
std::string PrepareData(std::string typestr, thrust::device_vector<T>* out) {
|
||||
constexpr size_t kRows = 16;
|
||||
std::string PrepareData(std::string typestr, thrust::device_vector<T>* out, const size_t kRows=16) {
|
||||
out->resize(kRows);
|
||||
auto& d_data = *out;
|
||||
|
||||
@@ -66,7 +65,15 @@ TEST(MetaInfo, FromInterface) {
|
||||
ASSERT_EQ(h_base_margin[i], d_data[i]);
|
||||
}
|
||||
|
||||
EXPECT_ANY_THROW({info.SetInfo("group", str.c_str());});
|
||||
thrust::device_vector<int> d_group_data;
|
||||
std::string group_str = PrepareData<int>("<i4", &d_group_data, 4);
|
||||
d_group_data[0] = 4;
|
||||
d_group_data[1] = 3;
|
||||
d_group_data[2] = 2;
|
||||
d_group_data[3] = 1;
|
||||
info.SetInfo("group", group_str.c_str());
|
||||
std::vector<bst_group_t> expected_group_ptr = {0, 4, 7, 9, 10};
|
||||
EXPECT_EQ(info.group_ptr_, expected_group_ptr);
|
||||
}
|
||||
|
||||
TEST(MetaInfo, Group) {
|
||||
@@ -83,4 +90,4 @@ TEST(MetaInfo, Group) {
|
||||
ASSERT_EQ(h_group[i], d_data[i-1] + h_group[i-1]) << "i: " << i;
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -6,7 +6,8 @@
|
||||
#include <thrust/sequence.h>
|
||||
#include "../../../src/data/device_adapter.cuh"
|
||||
#include "../helpers.h"
|
||||
#include "test_columnar.h"
|
||||
#include "test_array_interface.h"
|
||||
#include "../../../src/data/array_interface.h"
|
||||
|
||||
using namespace xgboost; // NOLINT
|
||||
|
||||
@@ -316,3 +317,55 @@ TEST(SimpleDMatrix, FromColumnarSparseBasic) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(SimpleDMatrix, FromCupy){
|
||||
int rows = 50;
|
||||
int cols = 10;
|
||||
thrust::device_vector< float> data(rows*cols);
|
||||
auto json_array_interface = Generate2dArrayInterface(rows, cols, "<f4", &data);
|
||||
std::stringstream ss;
|
||||
Json::Dump(json_array_interface, &ss);
|
||||
std::string str = ss.str();
|
||||
data::CupyAdapter adapter(str);
|
||||
data::SimpleDMatrix dmat(&adapter, -1, 1);
|
||||
EXPECT_EQ(dmat.Info().num_col_, cols);
|
||||
EXPECT_EQ(dmat.Info().num_row_, rows);
|
||||
EXPECT_EQ(dmat.Info().num_nonzero_, rows*cols);
|
||||
|
||||
for (auto& batch : dmat.GetBatches<SparsePage>()) {
|
||||
for (auto i = 0ull; i < batch.Size(); i++) {
|
||||
auto inst = batch[i];
|
||||
for (auto j = 0ull; j < inst.size(); j++) {
|
||||
EXPECT_EQ(inst[j].fvalue, i * cols + j);
|
||||
EXPECT_EQ(inst[j].index, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SimpleDMatrix, FromCupySparse){
|
||||
int rows = 2;
|
||||
int cols = 2;
|
||||
thrust::device_vector< float> data(rows*cols);
|
||||
auto json_array_interface = Generate2dArrayInterface(rows, cols, "<f4", &data);
|
||||
data[1] = std::numeric_limits<float>::quiet_NaN();
|
||||
data[2] = std::numeric_limits<float>::quiet_NaN();
|
||||
std::stringstream ss;
|
||||
Json::Dump(json_array_interface, &ss);
|
||||
std::string str = ss.str();
|
||||
data::CupyAdapter adapter(str);
|
||||
data::SimpleDMatrix dmat(&adapter, -1, 1);
|
||||
EXPECT_EQ(dmat.Info().num_col_, cols);
|
||||
EXPECT_EQ(dmat.Info().num_row_, rows);
|
||||
EXPECT_EQ(dmat.Info().num_nonzero_, rows * cols - 2);
|
||||
auto& batch = *dmat.GetBatches<SparsePage>().begin();
|
||||
auto inst0 = batch[0];
|
||||
auto inst1 = batch[1];
|
||||
EXPECT_EQ(batch[0].size(), 1);
|
||||
EXPECT_EQ(batch[1].size(), 1);
|
||||
EXPECT_EQ(batch[0][0].fvalue, 0.0f);
|
||||
EXPECT_EQ(batch[0][0].index, 0);
|
||||
EXPECT_EQ(batch[1][0].fvalue, 3.0f);
|
||||
EXPECT_EQ(batch[1][0].index, 1);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user