Implement ubjson. (#7549)

* Implement ubjson.

This is a partial implementation of UBJSON with support for typed arrays.  Some missing
features are `f64`, typed object, and the no-op.
This commit is contained in:
Jiaming Yuan
2022-01-10 23:24:23 +08:00
committed by GitHub
parent 001503186c
commit c635d4c46a
6 changed files with 792 additions and 242 deletions

View File

@@ -1,5 +1,5 @@
/*!
* Copyright (c) by Contributors 2019-2021
* Copyright (c) by Contributors 2019-2022
*/
#include <gtest/gtest.h>
#include <dmlc/filesystem.h>
@@ -255,6 +255,10 @@ TEST(Json, Null) {
json = Json::Load({null_input.c_str(), null_input.size()});
ASSERT_TRUE(IsA<Null>(json["key"]));
std::string dumped;
Json::Dump(json, &dumped, std::ios::binary);
ASSERT_TRUE(IsA<Null>(Json::Load(StringView{dumped}, std::ios::binary)["key"]));
}
TEST(Json, EmptyObject) {
@@ -297,6 +301,10 @@ TEST(Json, Boolean) {
Json j {Json::Load(StringView{str.c_str(), str.size()})};
ASSERT_EQ(get<JsonBoolean>(j["left_child"]), true);
ASSERT_EQ(get<JsonBoolean>(j["right_child"]), false);
std::string dumped;
Json::Dump(j, &dumped, std::ios::binary);
ASSERT_TRUE(get<Boolean const>(Json::Load(StringView{dumped}, std::ios::binary)["left_child"]));
}
TEST(Json, Indexing) {
@@ -532,7 +540,8 @@ TEST(Json, IntVSFloat) {
}
}
TEST(Json, RoundTrip) {
namespace {
void TestRroundTrip(std::ios::openmode mode) {
uint32_t i = 0;
SimpleLCG rng;
SimpleRealUniformDistribution<float> dist(1.0f, 4096.0f);
@@ -541,10 +550,10 @@ TEST(Json, RoundTrip) {
float f;
std::memcpy(&f, &i, sizeof(f));
Json jf { f };
Json jf{f};
std::string str;
Json::Dump(jf, &str);
auto loaded = Json::Load({str.c_str(), str.size()});
Json::Dump(jf, &str, mode);
auto loaded = Json::Load(StringView{str}, mode);
if (XGBOOST_EXPECT(std::isnan(f), false)) {
ASSERT_TRUE(std::isnan(get<Number const>(loaded)));
} else {
@@ -558,6 +567,12 @@ TEST(Json, RoundTrip) {
}
}
}
} // namespace
TEST(Json, RoundTrip) {
TestRroundTrip(std::ios::out);
TestRroundTrip(std::ios::binary);
}
TEST(Json, DISABLED_RoundTripExhaustive) {
auto test = [](uint32_t i) {
@@ -580,4 +595,87 @@ TEST(Json, DISABLED_RoundTripExhaustive) {
test(static_cast<uint32_t>(i));
}
}
TEST(Json, TypedArray) {
size_t n = 16;
F32Array f32{n};
std::iota(f32.GetArray().begin(), f32.GetArray().end(), -8);
U8Array u8{n};
std::iota(u8.GetArray().begin(), u8.GetArray().end(), 0);
I32Array i32{n};
std::iota(i32.GetArray().begin(), i32.GetArray().end(), -8);
I64Array i64{n};
std::iota(i64.GetArray().begin(), i64.GetArray().end(), -8);
Json json{Object{}};
json["u8"] = std::move(u8);
ASSERT_TRUE(IsA<U8Array>(json["u8"]));
json["f32"] = std::move(f32);
ASSERT_TRUE(IsA<F32Array>(json["f32"]));
json["i32"] = std::move(i32);
ASSERT_TRUE(IsA<I32Array>(json["i32"]));
json["i64"] = std::move(i64);
ASSERT_TRUE(IsA<I64Array>(json["i64"]));
std::string str;
Json::Dump(json, &str);
{
auto loaded = Json::Load(StringView{str});
// for text output there's no typed array.
ASSERT_TRUE(IsA<Array>(loaded["u8"]));
auto const& arr = loaded["f32"];
for (int32_t i = -8; i < 8; ++i) {
ASSERT_EQ(get<Number>(arr[i + 8]), i);
}
}
std::string binary;
Json::Dump(json, &binary, std::ios::binary);
{
auto loaded = Json::Load(StringView{binary}, std::ios::binary);
ASSERT_TRUE(IsA<U8Array>(loaded["u8"]));
auto const& arr = get<F32Array>(loaded["f32"]);
for (int32_t i = -8; i < 8; ++i) {
ASSERT_EQ(arr[i + 8], i);
}
}
}
TEST(UBJson, Basic) {
auto run_test = [](StringView str) {
auto json = Json::Load(str);
std::vector<char> stream;
UBJWriter writer{&stream};
Json::Dump(json, &writer);
{
std::ofstream fout{"test.ubj", std::ios::binary | std::ios::out};
fout.write(stream.data(), stream.size());
}
auto data = common::LoadSequentialFile("test.ubj");
UBJReader reader{StringView{data}};
json = reader.Load();
return json;
};
{
// empty
auto ret = run_test(R"({})");
std::stringstream ss;
ss << ret;
ASSERT_EQ(ss.str(), "{}");
}
{
auto ret = run_test(R"({"":[]})");
std::stringstream ss;
ss << ret;
ASSERT_EQ(ss.str(), R"({"":[]})");
}
{
// basic
auto ret = run_test(R"({"test": [2.71, 3.14, Infinity]})");
ASSERT_TRUE(std::isinf(get<Number>(get<Array>(ret["test"])[2])));
ASSERT_FLOAT_EQ(3.14, get<Number>(get<Array>(ret["test"])[1]));
ASSERT_FLOAT_EQ(2.71, get<Number>(get<Array>(ret["test"])[0]));
}
}
} // namespace xgboost