Implement fast number serialization routines. (#5772)
* Implement ryu algorithm. * Implement integer printing. * Full coverage roundtrip test.
This commit is contained in:
@@ -9,7 +9,9 @@
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/json_io.h"
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/common/io.h"
|
||||
#include "../../../src/common/charconv.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
@@ -146,22 +148,46 @@ TEST(Json, ParseNumber) {
|
||||
{
|
||||
std::string str = "31.8892";
|
||||
auto json = Json::Load(StringView{str.c_str(), str.size()});
|
||||
ASSERT_NEAR(get<JsonNumber>(json), 31.8892f, kRtEps);
|
||||
ASSERT_EQ(get<JsonNumber>(json), 31.8892f);
|
||||
}
|
||||
{
|
||||
std::string str = "-31.8892";
|
||||
auto json = Json::Load(StringView{str.c_str(), str.size()});
|
||||
ASSERT_NEAR(get<JsonNumber>(json), -31.8892f, kRtEps);
|
||||
ASSERT_EQ(get<JsonNumber>(json), -31.8892f);
|
||||
}
|
||||
{
|
||||
std::string str = "2e4";
|
||||
auto json = Json::Load(StringView{str.c_str(), str.size()});
|
||||
ASSERT_NEAR(get<JsonNumber>(json), 2e4f, kRtEps);
|
||||
ASSERT_EQ(get<JsonNumber>(json), 2e4f);
|
||||
}
|
||||
{
|
||||
std::string str = "2e-4";
|
||||
auto json = Json::Load(StringView{str.c_str(), str.size()});
|
||||
ASSERT_NEAR(get<JsonNumber>(json), 2e-4f, kRtEps);
|
||||
ASSERT_EQ(get<JsonNumber>(json), 2e-4f);
|
||||
}
|
||||
{
|
||||
std::string str = "-2e-4";
|
||||
auto json = Json::Load(StringView{str.c_str(), str.size()});
|
||||
ASSERT_EQ(get<JsonNumber>(json), -2e-4f);
|
||||
}
|
||||
{
|
||||
std::string str = "-0.0";
|
||||
auto json = Json::Load(StringView{str.c_str(), str.size()});
|
||||
ASSERT_TRUE(std::signbit(get<JsonNumber>(json)));
|
||||
ASSERT_EQ(get<JsonNumber>(json), -0);
|
||||
}
|
||||
{
|
||||
std::string str = "-5.37645816802978516e-01";
|
||||
auto json = Json::Load(StringView{str.c_str(), str.size()});
|
||||
ASSERT_TRUE(std::signbit(get<JsonNumber>(json)));
|
||||
// Larger than fast path limit.
|
||||
ASSERT_EQ(get<JsonNumber>(json), -5.37645816802978516e-01);
|
||||
}
|
||||
{
|
||||
std::string str = "9.86623668670654297e+00";
|
||||
auto json = Json::Load(StringView{str.c_str(), str.size()});
|
||||
ASSERT_FALSE(std::signbit(get<JsonNumber>(json)));
|
||||
ASSERT_EQ(get<JsonNumber>(json), 9.86623668670654297e+00);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -200,13 +226,30 @@ TEST(Json, ParseArray) {
|
||||
Json v0 = arr[0];
|
||||
ASSERT_EQ(get<Integer>(v0["depth"]), 3);
|
||||
ASSERT_NEAR(get<Number>(v0["gain"]), 10.4866, kRtEps);
|
||||
|
||||
{
|
||||
std::string str =
|
||||
"[5.04713470458984375e+02,9.86623668670654297e+00,4.94847229003906250e+"
|
||||
"02,2.13924217224121094e+00,7.72699451446533203e+00,2."
|
||||
"30380615234375000e+02,2.64466613769531250e+02]";
|
||||
auto json = Json::Load(StringView{str.c_str(), str.size()});
|
||||
|
||||
auto const& vec = get<Array const>(json);
|
||||
ASSERT_EQ(get<Number const>(vec[0]), 5.04713470458984375e+02);
|
||||
ASSERT_EQ(get<Number const>(vec[1]), 9.86623668670654297e+00);
|
||||
ASSERT_EQ(get<Number const>(vec[2]), 4.94847229003906250e+02);
|
||||
ASSERT_EQ(get<Number const>(vec[3]), 2.13924217224121094e+00);
|
||||
ASSERT_EQ(get<Number const>(vec[4]), 7.72699451446533203e+00);
|
||||
ASSERT_EQ(get<Number const>(vec[5]), 2.30380615234375000e+02);
|
||||
ASSERT_EQ(get<Number const>(vec[6]), 2.64466613769531250e+02);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Json, Null) {
|
||||
Json json {JsonNull()};
|
||||
std::stringstream ss;
|
||||
std::string ss;
|
||||
Json::Dump(json, &ss);
|
||||
ASSERT_EQ(ss.str(), "null");
|
||||
ASSERT_EQ(ss, "null");
|
||||
|
||||
std::string null_input {R"null({"key": null })null"};
|
||||
|
||||
@@ -288,7 +331,7 @@ TEST(Json, AssigningObjects) {
|
||||
Json json_object { JsonObject() };
|
||||
auto str = JsonString("1");
|
||||
auto& k = json_object["1"];
|
||||
k = str;
|
||||
k = std::move(str);
|
||||
auto& m = json_object["1"];
|
||||
std::string value = get<JsonString>(m);
|
||||
ASSERT_EQ(value, "1");
|
||||
@@ -365,15 +408,56 @@ TEST(Json, LoadDump) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
auto const& path = tempdir.path + "test_model_dump";
|
||||
|
||||
std::ofstream fout (path);
|
||||
Json::Dump(origin, &fout);
|
||||
fout.close();
|
||||
std::string out;
|
||||
Json::Dump(origin, &out);
|
||||
|
||||
std::ofstream fout(path);
|
||||
ASSERT_TRUE(fout);
|
||||
fout << out << std::flush;
|
||||
|
||||
std::string new_buffer = common::LoadSequentialFile(path);
|
||||
Json load_back {Json::Load(StringView(new_buffer.c_str(), new_buffer.size()))};
|
||||
|
||||
ASSERT_EQ(load_back, origin) << ori_buffer << "\n\n---------------\n\n"
|
||||
<< new_buffer;
|
||||
Json load_back {Json::Load(StringView(new_buffer.c_str(), new_buffer.size()))};
|
||||
ASSERT_EQ(load_back, origin);
|
||||
}
|
||||
|
||||
TEST(Json, Invalid) {
|
||||
{
|
||||
std::string str = "}";
|
||||
bool has_thrown = false;
|
||||
try {
|
||||
Json load{Json::Load(StringView(str.c_str(), str.size()))};
|
||||
} catch (dmlc::Error const &e) {
|
||||
std::string msg = e.what();
|
||||
ASSERT_NE(msg.find("Unknown"), std::string::npos);
|
||||
has_thrown = true;
|
||||
};
|
||||
ASSERT_TRUE(has_thrown);
|
||||
}
|
||||
{
|
||||
std::string str = R"json({foo)json";
|
||||
bool has_thrown = false;
|
||||
try {
|
||||
Json load{Json::Load(StringView(str.c_str(), str.size()))};
|
||||
} catch (dmlc::Error const &e) {
|
||||
std::string msg = e.what();
|
||||
ASSERT_NE(msg.find("position: 1"), std::string::npos);
|
||||
has_thrown = true;
|
||||
};
|
||||
ASSERT_TRUE(has_thrown);
|
||||
}
|
||||
{
|
||||
std::string str = R"json({"foo")json";
|
||||
bool has_thrown = false;
|
||||
try {
|
||||
Json load{Json::Load(StringView(str.c_str(), str.size()))};
|
||||
} catch (dmlc::Error const &e) {
|
||||
std::string msg = e.what();
|
||||
ASSERT_NE(msg.find("EOF"), std::string::npos);
|
||||
has_thrown = true;
|
||||
};
|
||||
ASSERT_TRUE(has_thrown);
|
||||
}
|
||||
}
|
||||
|
||||
// For now Json is quite ignorance about unicode.
|
||||
@@ -383,10 +467,9 @@ TEST(Json, CopyUnicode) {
|
||||
)json";
|
||||
Json loaded {Json::Load(StringView{json_str.c_str(), json_str.size()})};
|
||||
|
||||
std::stringstream ss_1;
|
||||
Json::Dump(loaded, &ss_1);
|
||||
std::string dumped_string;
|
||||
Json::Dump(loaded, &dumped_string);
|
||||
|
||||
std::string dumped_string = ss_1.str();
|
||||
ASSERT_NE(dumped_string.find("\\u20ac"), std::string::npos);
|
||||
}
|
||||
|
||||
@@ -406,6 +489,15 @@ TEST(Json, WrongCasts) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Json, Integer) {
|
||||
for (int64_t i = 1; i < 10000; i *= 10) {
|
||||
auto ten = Json{Integer{i}};
|
||||
std::string str;
|
||||
Json::Dump(ten, &str);
|
||||
ASSERT_EQ(str, std::to_string(i));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Json, IntVSFloat) {
|
||||
// If integer is parsed as float, calling `get<Integer>()' will throw.
|
||||
{
|
||||
@@ -432,4 +524,31 @@ TEST(Json, IntVSFloat) {
|
||||
ASSERT_EQ(ptr, 2503595760);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Json, RoundTrip) {
|
||||
uint32_t i = 0;
|
||||
SimpleLCG rng;
|
||||
SimpleRealUniformDistribution<float> dist(1.0f, 4096.0f);
|
||||
|
||||
while (i <= std::numeric_limits<uint32_t>::max()) {
|
||||
float f;
|
||||
std::memcpy(&f, &i, sizeof(f));
|
||||
|
||||
Json jf { f };
|
||||
std::string str;
|
||||
Json::Dump(jf, &str);
|
||||
auto loaded = Json::Load({str.c_str(), str.size()});
|
||||
if (XGBOOST_EXPECT(std::isnan(f), false)) {
|
||||
ASSERT_TRUE(std::isnan(get<Number const>(loaded)));
|
||||
} else {
|
||||
ASSERT_EQ(get<Number const>(loaded), f);
|
||||
}
|
||||
|
||||
auto t = i;
|
||||
i+= static_cast<uint32_t>(dist(&rng));
|
||||
if (i < t) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
Reference in New Issue
Block a user