// Copyright (c) 2019 by Contributors #include #include #include #include #include #include "../../../src/common/bitfield.h" #include "../../../src/common/device_helpers.cuh" #include "../../../src/data/simple_csr_source.h" namespace xgboost { TEST(SimpleCSRSource, FromColumnarDense) { constexpr size_t kRows = 16; Json column { Object() }; std::vector j_shape {Json(Integer(static_cast(kRows)))}; column["shape"] = Array(j_shape); column["strides"] = Array(std::vector{Json(Integer(static_cast(4)))}); thrust::device_vector d_data(kRows); for (size_t i = 0; i < d_data.size(); ++i) { d_data[i] = i * 2.0; } auto p_d_data = dh::Raw(d_data); std::vector j_data { Json(Integer(reinterpret_cast(p_d_data))), Json(Boolean(false))}; column["data"] = j_data; column["version"] = Integer(static_cast(1)); column["typestr"] = String("{column}}}; std::stringstream ss; Json::Dump(column_arr, &ss); std::string str = ss.str(); std::unique_ptr source (new data::SimpleCSRSource()); source->CopyFrom(str.c_str()); auto const& data = source->page_.data.HostVector(); auto const& offset = source->page_.offset.HostVector(); for (size_t i = 0; i < kRows; ++i) { auto e = data[i]; ASSERT_NEAR(e.fvalue, i * 2.0, kRtEps); ASSERT_EQ(e.index, 0); // feature 0 } ASSERT_EQ(offset.back(), 16); for (size_t i = 0; i < kRows + 1; ++i) { ASSERT_EQ(offset[i], i); } } TEST(SimpleCSRSource, FromColumnarWithEmptyRows) { // In this test we construct a data storage similar to cudf constexpr size_t kRows = 102; constexpr size_t kCols = 24; constexpr size_t kMissingRows = 3; std::vector v_columns (kCols); std::vector> columns_data(kCols); std::vector> column_bitfields(kCols); unsigned char constexpr kUCOne = 1; for (size_t i = 0; i < kCols; ++i) { auto& col = v_columns[i]; col = Object(); auto& data = columns_data[i]; data.resize(kRows); thrust::sequence(data.begin(), data.end(), 0); dh::safe_cuda(cudaDeviceSynchronize()); dh::safe_cuda(cudaGetLastError()); ASSERT_EQ(data.size(), kRows); auto p_d_data = raw_pointer_cast(data.data()); std::vector j_data { Json(Integer(reinterpret_cast(p_d_data))), Json(Boolean(false))}; col["data"] = j_data; std::vector j_shape {Json(Integer(static_cast(kRows)))}; col["shape"] = Array(j_shape); col["version"] = Integer(static_cast(1)); col["typestr"] = String(" missing_row_index {0, 1, last_ind}; for (size_t i = 0; i < mask_storage.size(); ++i) { if (missing_row_index.find(i) == missing_row_index.cend()) { // all other rows are valid mask_storage[i] = ~0; } } j_mask["data"] = std::vector{ Json(Integer(reinterpret_cast(mask_storage.data().get()))), Json(Boolean(false))}; j_mask["shape"] = Array(std::vector{Json(Integer(static_cast(16)))}); j_mask["typestr"] = String("|i1"); j_mask["null_count"] = Json(Integer(static_cast(kMissingRows))); } Json column_arr {Array(v_columns)}; std::stringstream ss; Json::Dump(column_arr, &ss); std::string str = ss.str(); std::unique_ptr source (new data::SimpleCSRSource()); source->CopyFrom(str.c_str()); auto const& data = source->page_.data.HostVector(); auto const& offset = source->page_.offset.HostVector(); ASSERT_EQ(offset.size(), kRows + 1); for (size_t i = 1; i < offset.size(); ++i) { for (size_t j = offset[i-1]; j < offset[i]; ++j) { ASSERT_EQ(data[j].index, j % kCols); ASSERT_NEAR(data[j].fvalue, i - 1, kRtEps); } } } TEST(SimpleCSRSource, FromColumnarSparse) { constexpr size_t kRows = 32; constexpr size_t kCols = 2; unsigned char constexpr kUCOne = 1; std::vector> columns_data(kCols); std::vector> column_bitfields(kCols); { // column 0 auto& mask = column_bitfields[0]; mask.resize(8); for (size_t j = 0; j < mask.size(); ++j) { mask[j] = ~0; } mask[0] = ~(kUCOne << 2); } { // column 1 auto& mask = column_bitfields[1]; mask.resize(8); for (size_t j = 0; j < mask.size(); ++j) { mask[j] = ~0; } mask[2] = ~(kUCOne << 3); } for (size_t c = 0; c < kCols; ++c) { columns_data[c].resize(kRows); thrust::sequence(columns_data[c].begin(), columns_data[c].end(), 0); } std::vector j_columns(kCols); for (size_t c = 0; c < kCols; ++c) { auto& column = j_columns[c]; column = Object(); column["version"] = Integer(static_cast(1)); column["typestr"] = String(" j_data { Json(Integer(reinterpret_cast(p_d_data))), Json(Boolean(false))}; column["data"] = j_data; std::vector j_shape {Json(Integer(static_cast(kRows)))}; column["shape"] = Array(j_shape); column["version"] = Integer(static_cast(1)); column["typestr"] = String("{ Json(Integer(reinterpret_cast(column_bitfields[c].data().get()))), Json(Boolean(false))}; j_mask["shape"] = Array(std::vector{Json(Integer(static_cast(8)))}); j_mask["typestr"] = String("|i1"); j_mask["null_count"] = Json(Integer(static_cast(1))); } Json column_arr {Array(j_columns)}; std::stringstream ss; Json::Dump(column_arr, &ss); std::string str = ss.str(); std::unique_ptr source (new data::SimpleCSRSource()); source->CopyFrom(str.c_str()); auto const& data = source->page_.data.HostVector(); auto const& offset = source->page_.offset.HostVector(); ASSERT_EQ(offset.size(), kRows + 1); ASSERT_EQ(data[4].index, 1); ASSERT_EQ(data[4].fvalue, 2); ASSERT_EQ(data[37].index, 0); ASSERT_EQ(data[37].fvalue, 19); } } // namespace xgboost