Implement fast number serialization routines. (#5772)

* Implement ryu algorithm.
* Implement integer printing.
* Full coverage roundtrip test.
This commit is contained in:
Jiaming Yuan
2020-06-17 12:39:23 +08:00
committed by GitHub
parent 7c3a168ffd
commit 38ee514787
17 changed files with 1601 additions and 253 deletions

View File

@@ -9,7 +9,9 @@
#include "xgboost/json.h"
#include "xgboost/logging.h"
#include "xgboost/json_io.h"
#include "../helpers.h"
#include "../../../src/common/io.h"
#include "../../../src/common/charconv.h"
namespace xgboost {
@@ -146,22 +148,46 @@ TEST(Json, ParseNumber) {
{
std::string str = "31.8892";
auto json = Json::Load(StringView{str.c_str(), str.size()});
ASSERT_NEAR(get<JsonNumber>(json), 31.8892f, kRtEps);
ASSERT_EQ(get<JsonNumber>(json), 31.8892f);
}
{
std::string str = "-31.8892";
auto json = Json::Load(StringView{str.c_str(), str.size()});
ASSERT_NEAR(get<JsonNumber>(json), -31.8892f, kRtEps);
ASSERT_EQ(get<JsonNumber>(json), -31.8892f);
}
{
std::string str = "2e4";
auto json = Json::Load(StringView{str.c_str(), str.size()});
ASSERT_NEAR(get<JsonNumber>(json), 2e4f, kRtEps);
ASSERT_EQ(get<JsonNumber>(json), 2e4f);
}
{
std::string str = "2e-4";
auto json = Json::Load(StringView{str.c_str(), str.size()});
ASSERT_NEAR(get<JsonNumber>(json), 2e-4f, kRtEps);
ASSERT_EQ(get<JsonNumber>(json), 2e-4f);
}
{
std::string str = "-2e-4";
auto json = Json::Load(StringView{str.c_str(), str.size()});
ASSERT_EQ(get<JsonNumber>(json), -2e-4f);
}
{
std::string str = "-0.0";
auto json = Json::Load(StringView{str.c_str(), str.size()});
ASSERT_TRUE(std::signbit(get<JsonNumber>(json)));
ASSERT_EQ(get<JsonNumber>(json), -0);
}
{
std::string str = "-5.37645816802978516e-01";
auto json = Json::Load(StringView{str.c_str(), str.size()});
ASSERT_TRUE(std::signbit(get<JsonNumber>(json)));
// Larger than fast path limit.
ASSERT_EQ(get<JsonNumber>(json), -5.37645816802978516e-01);
}
{
std::string str = "9.86623668670654297e+00";
auto json = Json::Load(StringView{str.c_str(), str.size()});
ASSERT_FALSE(std::signbit(get<JsonNumber>(json)));
ASSERT_EQ(get<JsonNumber>(json), 9.86623668670654297e+00);
}
}
@@ -200,13 +226,30 @@ TEST(Json, ParseArray) {
Json v0 = arr[0];
ASSERT_EQ(get<Integer>(v0["depth"]), 3);
ASSERT_NEAR(get<Number>(v0["gain"]), 10.4866, kRtEps);
{
std::string str =
"[5.04713470458984375e+02,9.86623668670654297e+00,4.94847229003906250e+"
"02,2.13924217224121094e+00,7.72699451446533203e+00,2."
"30380615234375000e+02,2.64466613769531250e+02]";
auto json = Json::Load(StringView{str.c_str(), str.size()});
auto const& vec = get<Array const>(json);
ASSERT_EQ(get<Number const>(vec[0]), 5.04713470458984375e+02);
ASSERT_EQ(get<Number const>(vec[1]), 9.86623668670654297e+00);
ASSERT_EQ(get<Number const>(vec[2]), 4.94847229003906250e+02);
ASSERT_EQ(get<Number const>(vec[3]), 2.13924217224121094e+00);
ASSERT_EQ(get<Number const>(vec[4]), 7.72699451446533203e+00);
ASSERT_EQ(get<Number const>(vec[5]), 2.30380615234375000e+02);
ASSERT_EQ(get<Number const>(vec[6]), 2.64466613769531250e+02);
}
}
TEST(Json, Null) {
Json json {JsonNull()};
std::stringstream ss;
std::string ss;
Json::Dump(json, &ss);
ASSERT_EQ(ss.str(), "null");
ASSERT_EQ(ss, "null");
std::string null_input {R"null({"key": null })null"};
@@ -288,7 +331,7 @@ TEST(Json, AssigningObjects) {
Json json_object { JsonObject() };
auto str = JsonString("1");
auto& k = json_object["1"];
k = str;
k = std::move(str);
auto& m = json_object["1"];
std::string value = get<JsonString>(m);
ASSERT_EQ(value, "1");
@@ -365,15 +408,56 @@ TEST(Json, LoadDump) {
dmlc::TemporaryDirectory tempdir;
auto const& path = tempdir.path + "test_model_dump";
std::ofstream fout (path);
Json::Dump(origin, &fout);
fout.close();
std::string out;
Json::Dump(origin, &out);
std::ofstream fout(path);
ASSERT_TRUE(fout);
fout << out << std::flush;
std::string new_buffer = common::LoadSequentialFile(path);
Json load_back {Json::Load(StringView(new_buffer.c_str(), new_buffer.size()))};
ASSERT_EQ(load_back, origin) << ori_buffer << "\n\n---------------\n\n"
<< new_buffer;
Json load_back {Json::Load(StringView(new_buffer.c_str(), new_buffer.size()))};
ASSERT_EQ(load_back, origin);
}
TEST(Json, Invalid) {
{
std::string str = "}";
bool has_thrown = false;
try {
Json load{Json::Load(StringView(str.c_str(), str.size()))};
} catch (dmlc::Error const &e) {
std::string msg = e.what();
ASSERT_NE(msg.find("Unknown"), std::string::npos);
has_thrown = true;
};
ASSERT_TRUE(has_thrown);
}
{
std::string str = R"json({foo)json";
bool has_thrown = false;
try {
Json load{Json::Load(StringView(str.c_str(), str.size()))};
} catch (dmlc::Error const &e) {
std::string msg = e.what();
ASSERT_NE(msg.find("position: 1"), std::string::npos);
has_thrown = true;
};
ASSERT_TRUE(has_thrown);
}
{
std::string str = R"json({"foo")json";
bool has_thrown = false;
try {
Json load{Json::Load(StringView(str.c_str(), str.size()))};
} catch (dmlc::Error const &e) {
std::string msg = e.what();
ASSERT_NE(msg.find("EOF"), std::string::npos);
has_thrown = true;
};
ASSERT_TRUE(has_thrown);
}
}
// For now Json is quite ignorance about unicode.
@@ -383,10 +467,9 @@ TEST(Json, CopyUnicode) {
)json";
Json loaded {Json::Load(StringView{json_str.c_str(), json_str.size()})};
std::stringstream ss_1;
Json::Dump(loaded, &ss_1);
std::string dumped_string;
Json::Dump(loaded, &dumped_string);
std::string dumped_string = ss_1.str();
ASSERT_NE(dumped_string.find("\\u20ac"), std::string::npos);
}
@@ -406,6 +489,15 @@ TEST(Json, WrongCasts) {
}
}
TEST(Json, Integer) {
for (int64_t i = 1; i < 10000; i *= 10) {
auto ten = Json{Integer{i}};
std::string str;
Json::Dump(ten, &str);
ASSERT_EQ(str, std::to_string(i));
}
}
TEST(Json, IntVSFloat) {
// If integer is parsed as float, calling `get<Integer>()' will throw.
{
@@ -432,4 +524,31 @@ TEST(Json, IntVSFloat) {
ASSERT_EQ(ptr, 2503595760);
}
}
TEST(Json, RoundTrip) {
uint32_t i = 0;
SimpleLCG rng;
SimpleRealUniformDistribution<float> dist(1.0f, 4096.0f);
while (i <= std::numeric_limits<uint32_t>::max()) {
float f;
std::memcpy(&f, &i, sizeof(f));
Json jf { f };
std::string str;
Json::Dump(jf, &str);
auto loaded = Json::Load({str.c_str(), str.size()});
if (XGBOOST_EXPECT(std::isnan(f), false)) {
ASSERT_TRUE(std::isnan(get<Number const>(loaded)));
} else {
ASSERT_EQ(get<Number const>(loaded), f);
}
auto t = i;
i+= static_cast<uint32_t>(dist(&rng));
if (i < t) {
break;
}
}
}
} // namespace xgboost