Add Json integer, remove specialization. (#4739)

This commit is contained in:
Jiaming Yuan 2019-08-06 03:10:49 -04:00 committed by GitHub
parent 9c469b3844
commit 2a4df8e29f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 314 additions and 221 deletions

View File

@ -69,6 +69,12 @@
#define XGBOOST_PARALLEL_STABLE_SORT(X, Y, Z) std::stable_sort((X), (Y), (Z)) #define XGBOOST_PARALLEL_STABLE_SORT(X, Y, Z) std::stable_sort((X), (Y), (Z))
#endif // GLIBC VERSION #endif // GLIBC VERSION
#if defined(__GNUC__)
#define XGBOOST_EXPECT(cond, ret) __builtin_expect((cond), (ret))
#else
#define XGBOOST_EXPECT(cond, ret) (cond)
#endif // defined(__GNUC__)
/*! /*!
* \brief Tag function as usable by device * \brief Tag function as usable by device
*/ */

View File

@ -4,8 +4,9 @@
#ifndef XGBOOST_JSON_H_ #ifndef XGBOOST_JSON_H_
#define XGBOOST_JSON_H_ #define XGBOOST_JSON_H_
#include <xgboost/logging.h> #include <dmlc/io.h>
#include <xgboost/logging.h>
#include <string> #include <string>
#include <map> #include <map>
@ -29,7 +30,6 @@ class Value {
Integer, Integer,
Object, // std::map Object, // std::map
Array, // std::vector Array, // std::vector
Raw,
Boolean, Boolean,
Null Null
}; };
@ -63,9 +63,9 @@ T* Cast(U* value) {
if (IsA<T>(value)) { if (IsA<T>(value)) {
return dynamic_cast<T*>(value); return dynamic_cast<T*>(value);
} else { } else {
throw std::runtime_error( LOG(FATAL) << "Invalid cast, from " + value->TypeStr() + " to " + T().TypeStr();
"Invalid cast, from " + value->TypeStr() + " to " + T().TypeStr());
} }
return dynamic_cast<T*>(value); // supress compiler warning.
} }
class JsonString : public Value { class JsonString : public Value {
@ -123,32 +123,6 @@ class JsonArray : public Value {
} }
}; };
class JsonRaw : public Value {
std::string str_;
public:
explicit JsonRaw(std::string&& str) :
Value(ValueKind::Raw),
str_{std::move(str)}{} // NOLINT
JsonRaw() : Value(ValueKind::Raw) {}
std::string const& getRaw() && { return str_; }
std::string const& getRaw() const & { return str_; }
std::string& getRaw() & { return str_; }
void Save(JsonWriter* writer) override;
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
static bool isClassOf(Value const* value) {
return value->Type() == ValueKind::Raw;
}
};
class JsonObject : public Value { class JsonObject : public Value {
std::map<std::string, Json> object_; std::map<std::string, Json> object_;
@ -185,7 +159,9 @@ class JsonNumber : public Value {
public: public:
JsonNumber() : Value(ValueKind::Number) {} JsonNumber() : Value(ValueKind::Number) {}
JsonNumber(double value) : Value(ValueKind::Number) { // NOLINT template <typename FloatT,
typename std::enable_if<std::is_same<FloatT, Float>::value>::type* = nullptr>
JsonNumber(FloatT value) : Value(ValueKind::Number) { // NOLINT
number_ = value; number_ = value;
} }
@ -198,6 +174,7 @@ class JsonNumber : public Value {
Float const& getNumber() const & { return number_; } Float const& getNumber() const & { return number_; }
Float& getNumber() & { return number_; } Float& getNumber() & { return number_; }
bool operator==(Value const& rhs) const override; bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override; Value& operator=(Value const& rhs) override;
@ -206,6 +183,35 @@ class JsonNumber : public Value {
} }
}; };
class JsonInteger : public Value {
public:
using Int = int64_t;
private:
Int integer_;
public:
JsonInteger() : Value(ValueKind::Integer), integer_{0} {} // NOLINT
template <typename IntT,
typename std::enable_if<std::is_same<IntT, Int>::value>::type* = nullptr>
JsonInteger(IntT value) : Value(ValueKind::Integer), integer_{value} {} // NOLINT
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
Int const& getInteger() && { return integer_; }
Int const& getInteger() const & { return integer_; }
Int& getInteger() & { return integer_; }
void Save(JsonWriter* writer) override;
static bool isClassOf(Value const* value) {
return value->Type() == ValueKind::Integer;
}
};
class JsonNull : public Value { class JsonNull : public Value {
public: public:
JsonNull() : Value(ValueKind::Null) {} JsonNull() : Value(ValueKind::Null) {}
@ -256,15 +262,16 @@ class JsonBoolean : public Value {
}; };
struct StringView { struct StringView {
char const* str_; using CharT = char; // unsigned char
CharT const* str_;
size_t size_; size_t size_;
public: public:
StringView() = default; StringView() = default;
StringView(char const* str, size_t size) : str_{str}, size_{size} {} StringView(CharT const* str, size_t size) : str_{str}, size_{size} {}
char const& operator[](size_t p) const { return str_[p]; } CharT const& operator[](size_t p) const { return str_[p]; }
char const& at(size_t p) const { // NOLINT CharT const& at(size_t p) const { // NOLINT
CHECK_LT(p, size_); CHECK_LT(p, size_);
return str_[p]; return str_[p];
} }
@ -302,7 +309,7 @@ class Json {
public: public:
/*! \brief Load a Json object from string. */ /*! \brief Load a Json object from string. */
static Json Load(StringView str, bool ignore_specialization = false); static Json Load(StringView str);
/*! \brief Pass your own JsonReader. */ /*! \brief Pass your own JsonReader. */
static Json Load(JsonReader* reader); static Json Load(JsonReader* reader);
/*! \brief Dump json into stream. */ /*! \brief Dump json into stream. */
@ -319,6 +326,13 @@ class Json {
return *this; return *this;
} }
// integer
explicit Json(JsonInteger integer) : ptr_{new JsonInteger(integer)} {}
Json& operator=(JsonInteger integer) {
ptr_.reset(new JsonInteger(std::move(integer)));
return *this;
}
// array // array
explicit Json(JsonArray list) : explicit Json(JsonArray list) :
ptr_ {new JsonArray(std::move(list))} {} ptr_ {new JsonArray(std::move(list))} {}
@ -327,14 +341,6 @@ class Json {
return *this; return *this;
} }
// raw
explicit Json(JsonRaw str) :
ptr_{new JsonRaw(std::move(str))} {}
Json& operator=(JsonRaw str) {
ptr_.reset(new JsonRaw(std::move(str)));
return *this;
}
// object // object
explicit Json(JsonObject object) : explicit Json(JsonObject object) :
ptr_{new JsonObject(std::move(object))} {} ptr_{new JsonObject(std::move(object))} {}
@ -410,10 +416,24 @@ JsonNumber::Float& GetImpl(T& val) { // NOLINT
template <typename T, template <typename T,
typename std::enable_if< typename std::enable_if<
std::is_same<T, JsonNumber const>::value>::type* = nullptr> std::is_same<T, JsonNumber const>::value>::type* = nullptr>
double const& GetImpl(T& val) { // NOLINT JsonNumber::Float const& GetImpl(T& val) { // NOLINT
return val.getNumber(); return val.getNumber();
} }
// Integer
template <typename T,
typename std::enable_if<
std::is_same<T, JsonInteger>::value>::type* = nullptr>
JsonInteger::Int& GetImpl(T& val) { // NOLINT
return val.getInteger();
}
template <typename T,
typename std::enable_if<
std::is_same<T, JsonInteger const>::value>::type* = nullptr>
JsonInteger::Int const& GetImpl(T& val) { // NOLINT
return val.getInteger();
}
// String // String
template <typename T, template <typename T,
typename std::enable_if< typename std::enable_if<
@ -442,19 +462,6 @@ bool const& GetImpl(T& val) { // NOLINT
return val.getBoolean(); return val.getBoolean();
} }
template <typename T,
typename std::enable_if<
std::is_same<T, JsonRaw>::value>::type* = nullptr>
std::string& GetImpl(T& val) { // NOLINT
return val.getRaw();
}
template <typename T,
typename std::enable_if<
std::is_same<T, JsonRaw const>::value>::type* = nullptr>
std::string const& GetImpl(T& val) { // NOLINT
return val.getRaw();
}
// Array // Array
template <typename T, template <typename T,
typename std::enable_if< typename std::enable_if<
@ -502,10 +509,10 @@ auto get(U& json) -> decltype(detail::GetImpl(*Cast<T>(&json.GetValue())))& { //
using Object = JsonObject; using Object = JsonObject;
using Array = JsonArray; using Array = JsonArray;
using Number = JsonNumber; using Number = JsonNumber;
using Integer = JsonInteger;
using Boolean = JsonBoolean; using Boolean = JsonBoolean;
using String = JsonString; using String = JsonString;
using Null = JsonNull; using Null = JsonNull;
using Raw = JsonRaw;
// Utils tailored for XGBoost. // Utils tailored for XGBoost.
@ -518,13 +525,14 @@ Object toJson(dmlc::Parameter<Type> const& param) {
return obj; return obj;
} }
inline std::map<std::string, std::string> fromJson(std::map<std::string, Json> const& param) { template <typename Type>
std::map<std::string, std::string> res; void fromJson(Json const& obj, dmlc::Parameter<Type>* param) {
for (auto const& kv : param) { auto const& j_param = get<Object const>(obj);
res[kv.first] = get<String const>(kv.second); std::map<std::string, std::string> m;
for (auto const& kv : j_param) {
m[kv.first] = get<String const>(kv.second);
} }
return res; param->InitAllowUnknown(m);
} }
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_JSON_H_ #endif // XGBOOST_JSON_H_

View File

@ -22,50 +22,15 @@ class FixedPrecisionStreamContainer : public std::basic_stringstream<
public: public:
FixedPrecisionStreamContainer() { FixedPrecisionStreamContainer() {
this->precision(std::numeric_limits<Number::Float>::max_digits10); this->precision(std::numeric_limits<Number::Float>::max_digits10);
this->imbue(std::locale("C"));
this->setf(std::ios::scientific);
} }
}; };
using FixedPrecisionStream = FixedPrecisionStreamContainer<std::allocator<char>>; using FixedPrecisionStream = FixedPrecisionStreamContainer<std::allocator<char>>;
/* /*
* \brief An reader that can be specialised. * \brief A json reader, currently error checking and utf-8 is not fully supported.
*
* Why specialization?
*
* First of all, we don't like specialization. This is purely for performance concern.
* Distributed environment freqently serializes model so at some point this could be a
* bottle neck for training performance. There are many other techniques for obtaining
* better performance, but all of them requires implementing thier own allocaltor(s),
* using simd instructions. And few of them can provide a easy to modify structure
* since they assumes a fixed memory layout.
*
* In XGBoost we provide specialized logic for parsing/writing tree models and linear
* models, where dense numeric values is presented, including weights, node ids etc.
*
* Plan for removing the specialization:
*
* We plan to upstream this implementaion into DMLC as it matures. For XGBoost, most of
* the time spent in load/dump is actually `sprintf`.
*
* To enable specialization, register a keyword that corresponds to
* key in Json object. For example in:
*
* \code
* { "key": {...} }
* \endcode
*
* To add special logic for parsing {...}, one can call:
*
* \code
* JsonReader::registry("key", [](StringView str, size_t* pos){ ... return JsonRaw(...); });
* \endcode
*
* Where str is a view of entire input string, while pos is a pointer to current position.
* The function must return a raw object. Later after obtaining a parsed object, say
* `Json obj`, you can obtain * the raw object by calling `obj["key"]' then perform the
* specialized parsing on it.
*
* See `LinearSelectRaw` and `LinearReader` in combination as an example.
*/ */
class JsonReader { class JsonReader {
protected: protected:
@ -77,17 +42,19 @@ class JsonReader {
public: public:
SourceLocation() : pos_(0) {} SourceLocation() : pos_(0) {}
explicit SourceLocation(size_t pos) : pos_{pos} {}
size_t Pos() const { return pos_; } size_t Pos() const { return pos_; }
SourceLocation& Forward(char c = 0) { SourceLocation& Forward() {
pos_++; pos_++;
return *this; return *this;
} }
SourceLocation& Forward(uint32_t n) {
pos_ += n;
return *this;
}
} cursor_; } cursor_;
StringView raw_str_; StringView raw_str_;
bool ignore_specialization_;
protected: protected:
void SkipSpaces(); void SkipSpaces();
@ -140,32 +107,13 @@ class JsonReader {
Json Parse(); Json Parse();
private:
using Fn = std::function<Json (StringView, size_t*)>;
public: public:
explicit JsonReader(StringView str, bool ignore = false) : explicit JsonReader(StringView str) :
raw_str_{str}, raw_str_{str} {}
ignore_specialization_{ignore} {}
explicit JsonReader(StringView str, size_t pos, bool ignore = false) :
cursor_{pos},
raw_str_{str},
ignore_specialization_{ignore} {}
virtual ~JsonReader() = default; virtual ~JsonReader() = default;
Json Load(); Json Load();
static std::map<std::string, Fn>& getRegistry() {
static std::map<std::string, Fn> set;
return set;
}
static std::map<std::string, Fn> const& registry(
std::string const& key, Fn fn) {
getRegistry()[key] = fn;
return getRegistry();
}
}; };
class JsonWriter { class JsonWriter {
@ -207,7 +155,7 @@ class JsonWriter {
virtual void Visit(JsonArray const* arr); virtual void Visit(JsonArray const* arr);
virtual void Visit(JsonObject const* obj); virtual void Visit(JsonObject const* obj);
virtual void Visit(JsonNumber const* num); virtual void Visit(JsonNumber const* num);
virtual void Visit(JsonRaw const* raw); virtual void Visit(JsonInteger const* num);
virtual void Visit(JsonNull const* null); virtual void Visit(JsonNull const* null);
virtual void Visit(JsonString const* str); virtual void Visit(JsonString const* str);
virtual void Visit(JsonBoolean const* boolean); virtual void Visit(JsonBoolean const* boolean);

View File

@ -2,11 +2,13 @@
* Copyright (c) by Contributors 2019 * Copyright (c) by Contributors 2019
*/ */
#include <sstream> #include <sstream>
#include <limits>
#include <cmath>
#include "xgboost/base.h"
#include "xgboost/logging.h" #include "xgboost/logging.h"
#include "xgboost/json.h" #include "xgboost/json.h"
#include "xgboost/json_io.h" #include "xgboost/json_io.h"
#include "../common/timer.h"
namespace xgboost { namespace xgboost {
@ -56,9 +58,11 @@ void JsonWriter::Visit(JsonNumber const* num) {
convertor_.str(""); convertor_.str("");
} }
void JsonWriter::Visit(JsonRaw const* raw) { void JsonWriter::Visit(JsonInteger const* num) {
auto const& str = raw->getRaw(); convertor_ << num->getInteger();
this->Write(str); auto const& str = convertor_.str();
this->Write(StringView{str.c_str(), str.size()});
convertor_.str("");
} }
void JsonWriter::Visit(JsonNull const* null) { void JsonWriter::Visit(JsonNull const* null) {
@ -120,7 +124,6 @@ std::string Value::TypeStr() const {
case ValueKind::Array: return "Array"; break; case ValueKind::Array: return "Array"; break;
case ValueKind::Boolean: return "Boolean"; break; case ValueKind::Boolean: return "Boolean"; break;
case ValueKind::Null: return "Null"; break; case ValueKind::Null: return "Null"; break;
case ValueKind::Raw: return "Raw"; break;
case ValueKind::Integer: return "Integer"; break; case ValueKind::Integer: return "Integer"; break;
} }
return ""; return "";
@ -225,35 +228,6 @@ void JsonArray::Save(JsonWriter* writer) {
writer->Visit(this); writer->Visit(this);
} }
// Json raw
Json& JsonRaw::operator[](std::string const & key) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by string.";
return DummyJsonObject();
}
Json& JsonRaw::operator[](int ind) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by Integer.";
return DummyJsonObject();
}
bool JsonRaw::operator==(Value const& rhs) const {
if (!IsA<JsonRaw>(&rhs)) { return false; }
auto& arr = Cast<JsonRaw const>(&rhs)->getRaw();
return std::equal(arr.cbegin(), arr.cend(), str_.cbegin());
}
Value & JsonRaw::operator=(Value const &rhs) {
auto const* casted = Cast<JsonRaw const>(&rhs);
str_ = casted->getRaw();
return *this;
}
void JsonRaw::Save(JsonWriter* writer) {
writer->Visit(this);
}
// Json Number // Json Number
Json& JsonNumber::operator[](std::string const & key) { Json& JsonNumber::operator[](std::string const & key) {
LOG(FATAL) << "Object of type " LOG(FATAL) << "Object of type "
@ -282,6 +256,34 @@ void JsonNumber::Save(JsonWriter* writer) {
writer->Visit(this); writer->Visit(this);
} }
// Json Integer
Json& JsonInteger::operator[](std::string const& key) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by string.";
return DummyJsonObject();
}
Json& JsonInteger::operator[](int ind) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by Integer.";
return DummyJsonObject();
}
bool JsonInteger::operator==(Value const& rhs) const {
if (!IsA<JsonInteger>(&rhs)) { return false; }
return integer_ == Cast<JsonInteger const>(&rhs)->getInteger();
}
Value & JsonInteger::operator=(Value const &rhs) {
JsonInteger const* casted = Cast<JsonInteger const>(&rhs);
integer_ = casted->getInteger();
return *this;
}
void JsonInteger::Save(JsonWriter* writer) {
writer->Visit(this);
}
// Json Null // Json Null
Json& JsonNull::operator[](std::string const & key) { Json& JsonNull::operator[](std::string const & key) {
LOG(FATAL) << "Object of type " LOG(FATAL) << "Object of type "
@ -377,7 +379,8 @@ void JsonReader::Error(std::string msg) const {
msg += '\n'; msg += '\n';
constexpr size_t kExtend = 8; constexpr size_t kExtend = 8;
auto beg = cursor_.Pos() - kExtend < 0 ? 0 : cursor_.Pos() - kExtend; auto beg = static_cast<int64_t>(cursor_.Pos()) -
static_cast<int64_t>(kExtend) < 0 ? 0 : cursor_.Pos() - kExtend;
auto end = cursor_.Pos() + kExtend >= raw_str_.size() ? auto end = cursor_.Pos() + kExtend >= raw_str_.size() ?
raw_str_.size() : cursor_.Pos() + kExtend; raw_str_.size() : cursor_.Pos() + kExtend;
@ -401,7 +404,7 @@ void JsonReader::SkipSpaces() {
while (cursor_.Pos() < raw_str_.size()) { while (cursor_.Pos() < raw_str_.size()) {
char c = raw_str_[cursor_.Pos()]; char c = raw_str_[cursor_.Pos()];
if (std::isspace(c)) { if (std::isspace(c)) {
cursor_.Forward(c); cursor_.Forward();
} else { } else {
break; break;
} }
@ -493,6 +496,8 @@ Json JsonReader::ParseObject() {
while (true) { while (true) {
SkipSpaces(); SkipSpaces();
ch = PeekNextChar(); ch = PeekNextChar();
CHECK_NE(ch, -1) << "cursor_.Pos(): " << cursor_.Pos() << ", "
<< "raw_str_.size():" << raw_str_.size();
if (ch != '"') { if (ch != '"') {
Expect('"', ch); Expect('"', ch);
} }
@ -504,16 +509,9 @@ Json JsonReader::ParseObject() {
Expect(':', ch); Expect(':', ch);
} }
Json value; Json value { Parse() };
if (!ignore_specialization_ &&
(getRegistry().find(get<String>(key)) != getRegistry().cend())) {
LOG(DEBUG) << "Using specialized parser for: " << get<String>(key);
value = getRegistry().at(get<String>(key))(raw_str_, &(cursor_.pos_));
} else {
value = Parse();
}
data[get<JsonString>(key)] = std::move(value); data[get<String>(key)] = std::move(value);
ch = GetNextNonSpaceChar(); ch = GetNextNonSpaceChar();
@ -527,15 +525,118 @@ Json JsonReader::ParseObject() {
} }
Json JsonReader::ParseNumber() { Json JsonReader::ParseNumber() {
std::string substr = raw_str_.substr(cursor_.Pos(), kMaxNumLength); // Adopted from sajson with some simplifications and small optimizations.
size_t pos = 0; char const* p = raw_str_.c_str() + cursor_.Pos();
char const* const beg = p; // keep track of current pointer
Number::Float number{0}; // TODO(trivialfis): Add back all the checks for number
number = std::stof(substr, &pos); bool negative = false;
for (size_t i = 0; i < pos; ++i) { if ('-' == *p) {
GetNextChar(); ++p;
negative = true;
}
bool is_float = false;
using ExpInt = std::remove_const<
decltype(std::numeric_limits<Number::Float>::max_exponent)>::type;
constexpr auto kExpMax = std::numeric_limits<ExpInt>::max();
constexpr auto kExpMin = std::numeric_limits<ExpInt>::min();
JsonInteger::Int i = 0;
double f = 0.0; // Use double to maintain accuracy
if (*p == '0') {
++p;
} else {
char c = *p;
do {
++p;
char digit = c - '0';
i = 10 * i + digit;
c = *p;
} while (std::isdigit(c));
}
ExpInt exponent = 0;
const char *const dot_position = p;
if ('.' == *p) {
is_float = true;
f = i;
++p;
char c = *p;
do {
++p;
f = f * 10 + (c - '0');
c = *p;
} while (std::isdigit(c));
}
if (is_float) {
exponent = dot_position - p + 1;
}
char e = *p;
if ('e' == e || 'E' == e) {
if (!is_float) {
is_float = true;
f = i;
}
++p;
bool negative_exponent = false;
if ('-' == *p) {
negative_exponent = true;
++p;
} else if ('+' == *p) {
++p;
}
ExpInt exp = 0;
char c = *p;
while (std::isdigit(c)) {
unsigned char digit = c - '0';
if (XGBOOST_EXPECT(exp > (kExpMax - digit) / 10, false)) {
CHECK_GT(exp, (kExpMax - digit) / 10) << "Overflow";
}
exp = 10 * exp + digit;
++p;
c = *p;
}
static_assert(-kExpMax >= kExpMin, "exp can be negated without loss or UB");
exponent += (negative_exponent ? -exp : exp);
}
if (exponent) {
CHECK(is_float);
// If d is zero but the exponent is huge, don't
// multiply zero by inf which gives nan.
if (f != 0.0) {
// Only use exp10 from libc on gcc+linux
#if !defined(__GNUC__) || defined(_WIN32) || defined(__APPLE__)
#define exp10(val) std::pow(10, (val))
#endif // !defined(__GNUC__) || defined(_WIN32) || defined(__APPLE__)
f *= exp10(exponent);
#if !defined(__GNUC__) || defined(_WIN32) || defined(__APPLE__)
#undef exp10
#endif // !defined(__GNUC__) || defined(_WIN32) || defined(__APPLE__)
}
}
if (negative) {
f = -f;
i = -i;
}
auto moved = std::distance(beg, p);
this->cursor_.Forward(moved);
if (is_float) {
return Json(static_cast<Number::Float>(f));
} else {
return Json(JsonInteger(i));
} }
return Json(number);
} }
Json JsonReader::ParseBoolean() { Json JsonReader::ParseBoolean() {
@ -566,7 +667,7 @@ Json JsonReader::ParseBoolean() {
} }
// This is an ad-hoc solution for writing numeric value in standard way. We need to add // This is an ad-hoc solution for writing numeric value in standard way. We need to add
// something locale independent way of writing stream. // a locale independent way of writing stream like `std::{from, to}_chars' from C++-17.
// FIXME(trivialfis): Remove this. // FIXME(trivialfis): Remove this.
class GlobalCLocale { class GlobalCLocale {
std::locale ori_; std::locale ori_;
@ -585,39 +686,23 @@ class GlobalCLocale {
} }
}; };
Json Json::Load(StringView str, bool ignore_specialization) { Json Json::Load(StringView str) {
GlobalCLocale guard; GlobalCLocale guard;
LOG(WARNING) << "Json serialization is still experimental." JsonReader reader(str);
" Output schema is subject to change in the future.";
JsonReader reader(str, ignore_specialization);
common::Timer t;
t.Start();
Json json{reader.Load()}; Json json{reader.Load()};
t.Stop();
t.PrintElapsed("Json::load");
return json; return json;
} }
Json Json::Load(JsonReader* reader) { Json Json::Load(JsonReader* reader) {
GlobalCLocale guard; GlobalCLocale guard;
common::Timer t;
t.Start();
Json json{reader->Load()}; Json json{reader->Load()};
t.Stop();
t.PrintElapsed("Json::load");
return json; return json;
} }
void Json::Dump(Json json, std::ostream *stream, bool pretty) { void Json::Dump(Json json, std::ostream *stream, bool pretty) {
GlobalCLocale guard; GlobalCLocale guard;
LOG(WARNING) << "Json serialization is still experimental." JsonWriter writer(stream, pretty);
" Output schema is subject to change in the future.";
JsonWriter writer(stream, true);
common::Timer t;
t.Start();
writer.Save(json); writer.Save(json);
t.Stop();
t.PrintElapsed("Json::dump");
} }
Json& Json::operator=(Json const &other) = default; Json& Json::operator=(Json const &other) = default;

View File

@ -143,9 +143,26 @@ TEST(Json, TestParseObject) {
} }
TEST(Json, ParseNumber) { TEST(Json, ParseNumber) {
{
std::string str = "31.8892"; std::string str = "31.8892";
auto json = Json::Load(StringView{str.c_str(), str.size()}); auto json = Json::Load(StringView{str.c_str(), str.size()});
ASSERT_NEAR(get<JsonNumber>(json), 31.8892f, kRtEps); ASSERT_NEAR(get<JsonNumber>(json), 31.8892f, kRtEps);
}
{
std::string str = "-31.8892";
auto json = Json::Load(StringView{str.c_str(), str.size()});
ASSERT_NEAR(get<JsonNumber>(json), -31.8892f, kRtEps);
}
{
std::string str = "2e4";
auto json = Json::Load(StringView{str.c_str(), str.size()});
ASSERT_NEAR(get<JsonNumber>(json), 2e4f, kRtEps);
}
{
std::string str = "2e-4";
auto json = Json::Load(StringView{str.c_str(), str.size()});
ASSERT_NEAR(get<JsonNumber>(json), 2e-4f, kRtEps);
}
} }
TEST(Json, ParseArray) { TEST(Json, ParseArray) {
@ -176,12 +193,13 @@ TEST(Json, ParseArray) {
] ]
} }
)json"; )json";
auto json = Json::Load(StringView{str.c_str(), str.size()}, true); auto json = Json::Load(StringView{str.c_str(), str.size()});
json = json["nodes"]; json = json["nodes"];
std::vector<Json> arr = get<JsonArray>(json); std::vector<Json> arr = get<JsonArray>(json);
ASSERT_EQ(arr.size(), 3); ASSERT_EQ(arr.size(), 3);
Json v0 = arr[0]; Json v0 = arr[0];
ASSERT_EQ(get<JsonNumber>(v0["depth"]), 3); ASSERT_EQ(get<Integer>(v0["depth"]), 3);
ASSERT_NEAR(get<Number>(v0["gain"]), 10.4866, kRtEps);
} }
TEST(Json, Null) { TEST(Json, Null) {
@ -203,7 +221,7 @@ TEST(Json, EmptyArray) {
} }
)json"; )json";
std::istringstream iss(str); std::istringstream iss(str);
auto json = Json::Load(StringView{str.c_str(), str.size()}, true); auto json = Json::Load(StringView{str.c_str(), str.size()});
auto arr = get<JsonArray>(json["leaf_vector"]); auto arr = get<JsonArray>(json["leaf_vector"]);
ASSERT_EQ(arr.size(), 0); ASSERT_EQ(arr.size(), 0);
} }
@ -215,14 +233,14 @@ TEST(Json, Boolean) {
"right_child": false "right_child": false
} }
)json"; )json";
Json j {Json::Load(StringView{str.c_str(), str.size()}, true)}; Json j {Json::Load(StringView{str.c_str(), str.size()})};
ASSERT_EQ(get<JsonBoolean>(j["left_child"]), true); ASSERT_EQ(get<JsonBoolean>(j["left_child"]), true);
ASSERT_EQ(get<JsonBoolean>(j["right_child"]), false); ASSERT_EQ(get<JsonBoolean>(j["right_child"]), false);
} }
TEST(Json, Indexing) { TEST(Json, Indexing) {
auto str = GetModelStr(); auto str = GetModelStr();
JsonReader reader(StringView{str.c_str(), str.size()}, true); JsonReader reader(StringView{str.c_str(), str.size()});
Json j {Json::Load(&reader)}; Json j {Json::Load(&reader)};
auto& value_1 = j["model_parameter"]; auto& value_1 = j["model_parameter"];
auto& value = value_1["base_score"]; auto& value = value_1["base_score"];
@ -242,7 +260,7 @@ TEST(Json, AssigningObjects) {
{ {
std::map<std::string, Json> objects; std::map<std::string, Json> objects;
Json json_objects { JsonObject() }; Json json_objects { JsonObject() };
std::vector<Json> arr_0 (1, Json(3.3)); std::vector<Json> arr_0 (1, Json(3.3f));
json_objects["tree_parameters"] = JsonArray(arr_0); json_objects["tree_parameters"] = JsonArray(arr_0);
std::vector<Json> json_arr = get<JsonArray>(json_objects["tree_parameters"]); std::vector<Json> json_arr = get<JsonArray>(json_objects["tree_parameters"]);
ASSERT_NEAR(get<JsonNumber>(json_arr[0]), 3.3f, kRtEps); ASSERT_NEAR(get<JsonNumber>(json_arr[0]), 3.3f, kRtEps);
@ -263,9 +281,9 @@ TEST(Json, AssigningObjects) {
TEST(Json, AssigningArray) { TEST(Json, AssigningArray) {
Json json; Json json;
json = JsonArray(); json = JsonArray();
std::vector<Json> tmp_0 {Json(Number(1)), Json(Number(2))}; std::vector<Json> tmp_0 {Json(Number(1.0f)), Json(Number(2.0f))};
json = tmp_0; json = tmp_0;
std::vector<Json> tmp_1 {Json(Number(3))}; std::vector<Json> tmp_1 {Json(Number(3.0f))};
get<Array>(json) = tmp_1; get<Array>(json) = tmp_1;
std::vector<Json> res = get<Array>(json); std::vector<Json> res = get<Array>(json);
ASSERT_EQ(get<Number>(res[0]), 3); ASSERT_EQ(get<Number>(res[0]), 3);
@ -274,14 +292,14 @@ TEST(Json, AssigningArray) {
TEST(Json, AssigningNumber) { TEST(Json, AssigningNumber) {
{ {
// right value // right value
Json json = Json{ Number(4) }; Json json = Json{ Number(4.0f) };
get<Number>(json) = 15; get<Number>(json) = 15;
ASSERT_EQ(get<Number>(json), 15); ASSERT_EQ(get<Number>(json), 15);
} }
{ {
// left value ref // left value ref
Json json = Json{ Number(4) }; Json json = Json{ Number(4.0f) };
Number::Float& ref = get<Number>(json); Number::Float& ref = get<Number>(json);
ref = 15; ref = 15;
ASSERT_EQ(get<Number>(json), 15); ASSERT_EQ(get<Number>(json), 15);
@ -289,7 +307,7 @@ TEST(Json, AssigningNumber) {
{ {
// left value // left value
Json json = Json{ Number(4) }; Json json = Json{ Number(4.0f) };
double value = get<Number>(json); double value = get<Number>(json);
ASSERT_EQ(value, 4); ASSERT_EQ(value, 4);
value = 15; // NOLINT value = 15; // NOLINT
@ -323,8 +341,8 @@ TEST(Json, AssigningString) {
} }
TEST(Json, LoadDump) { TEST(Json, LoadDump) {
std::string buffer = GetModelStr(); std::string ori_buffer = GetModelStr();
Json origin {Json::Load(StringView{buffer.c_str(), buffer.size()}, true)}; Json origin {Json::Load(StringView{ori_buffer.c_str(), ori_buffer.size()})};
dmlc::TemporaryDirectory tempdir; dmlc::TemporaryDirectory tempdir;
auto const& path = tempdir.path + "test_model_dump"; auto const& path = tempdir.path + "test_model_dump";
@ -333,10 +351,11 @@ TEST(Json, LoadDump) {
Json::Dump(origin, &fout); Json::Dump(origin, &fout);
fout.close(); fout.close();
buffer = common::LoadSequentialFile(path); std::string new_buffer = common::LoadSequentialFile(path);
Json load_back {Json::Load(StringView(buffer.c_str(), buffer.size()), true)}; Json load_back {Json::Load(StringView(new_buffer.c_str(), new_buffer.size()))};
ASSERT_EQ(load_back, origin); ASSERT_EQ(load_back, origin) << ori_buffer << "\n\n---------------\n\n"
<< new_buffer;
} }
// For now Json is quite ignorance about unicode. // For now Json is quite ignorance about unicode.
@ -344,7 +363,7 @@ TEST(Json, CopyUnicode) {
std::string json_str = R"json( std::string json_str = R"json(
{"m": ["\ud834\udd1e", "\u20ac", "\u0416", "\u00f6"]} {"m": ["\ud834\udd1e", "\u20ac", "\u0416", "\u00f6"]}
)json"; )json";
Json loaded {Json::Load(StringView{json_str.c_str(), json_str.size()}, true)}; Json loaded {Json::Load(StringView{json_str.c_str(), json_str.size()})};
std::stringstream ss_1; std::stringstream ss_1;
Json::Dump(loaded, &ss_1); Json::Dump(loaded, &ss_1);
@ -359,7 +378,7 @@ TEST(Json, WrongCasts) {
ASSERT_ANY_THROW(get<Number>(json)); ASSERT_ANY_THROW(get<Number>(json));
} }
{ {
Json json = Json{ Array{ std::vector<Json>{ Json{ Number{1} } } } }; Json json = Json{ Array{ std::vector<Json>{ Json{ Number{1.0f} } } } };
ASSERT_ANY_THROW(get<Number>(json)); ASSERT_ANY_THROW(get<Number>(json));
} }
{ {
@ -368,4 +387,31 @@ TEST(Json, WrongCasts) {
ASSERT_ANY_THROW(get<Number>(json)); ASSERT_ANY_THROW(get<Number>(json));
} }
} }
TEST(Json, Int_vs_Float) {
// If integer is parsed as float, calling `get<Integer>()' will throw.
{
std::string str = R"json(
{
"number": 123.4,
"integer": 123
})json";
Json obj = Json::Load({str.c_str(), str.size()});
JsonNumber::Float number = get<Number>(obj["number"]);
ASSERT_NEAR(number, 123.4f, kRtEps);
JsonInteger::Int integer = get<Integer>(obj["integer"]);
ASSERT_EQ(integer, 123);
}
{
std::string str = R"json(
{"data": [2503595760, false], "shape": [10]}
)json";
Json obj = Json::Load({str.c_str(), str.size()});
auto array = get<Array>(obj["data"]);
auto ptr = get<Integer>(array[0]);
ASSERT_EQ(ptr, 2503595760);
}
}
} // namespace xgboost } // namespace xgboost