A simple Json implementation for future use. (#4708)

* A simple Json implementation for future use.
This commit is contained in:
Jiaming Yuan 2019-07-29 21:17:27 -04:00 committed by GitHub
parent 9b9e298ff2
commit d2e1e4d5b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 1816 additions and 1 deletions

View File

@ -1,5 +1,5 @@
/*!
* Copyright 2015 by Contributors.
* Copyright 2015-2019 by Contributors.
* \brief XGBoost Amalgamation.
* This offers an alternative way to compile the entire library from this single file.
*
@ -66,6 +66,8 @@
#include "../src/common/common.cc"
#include "../src/common/host_device_vector.cc"
#include "../src/common/hist_util.cc"
#include "../src/common/json.cc"
#include "../src/common/io.cc"
// c_api
#include "../src/c_api/c_api.cc"

530
include/xgboost/json.h Normal file
View File

@ -0,0 +1,530 @@
/*!
* Copyright (c) by Contributors 2019
*/
#ifndef XGBOOST_JSON_H_
#define XGBOOST_JSON_H_
#include <xgboost/logging.h>
#include <string>
#include <map>
#include <memory>
#include <vector>
#include <functional>
#include <utility>
namespace xgboost {
class Json;
class JsonReader;
class JsonWriter;
class Value {
public:
/*!\brief Simplified implementation of LLVM RTTI. */
enum class ValueKind {
String,
Number,
Integer,
Object, // std::map
Array, // std::vector
Raw,
Boolean,
Null
};
explicit Value(ValueKind _kind) : kind_{_kind} {}
ValueKind Type() const { return kind_; }
virtual ~Value() = default;
virtual void Save(JsonWriter* writer) = 0;
virtual Json& operator[](std::string const & key) = 0;
virtual Json& operator[](int ind) = 0;
virtual bool operator==(Value const& rhs) const = 0;
virtual Value& operator=(Value const& rhs) = 0;
std::string TypeStr() const;
private:
ValueKind kind_;
};
template <typename T>
bool IsA(Value const* value) {
return T::isClassOf(value);
}
template <typename T, typename U>
T* Cast(U* value) {
if (IsA<T>(value)) {
return dynamic_cast<T*>(value);
} else {
throw std::runtime_error(
"Invalid cast, from " + value->TypeStr() + " to " + T().TypeStr());
}
}
class JsonString : public Value {
std::string str_;
public:
JsonString() : Value(ValueKind::String) {}
JsonString(std::string const& str) : // NOLINT
Value(ValueKind::String), str_{str} {}
JsonString(std::string&& str) : // NOLINT
Value(ValueKind::String), str_{std::move(str)} {}
void Save(JsonWriter* writer) override;
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
std::string const& getString() && { return str_; }
std::string const& getString() const & { return str_; }
std::string& getString() & { return str_; }
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
static bool isClassOf(Value const* value) {
return value->Type() == ValueKind::String;
}
};
class JsonArray : public Value {
std::vector<Json> vec_;
public:
JsonArray() : Value(ValueKind::Array) {}
JsonArray(std::vector<Json>&& arr) : // NOLINT
Value(ValueKind::Array), vec_{std::move(arr)} {}
JsonArray(std::vector<Json> const& arr) : // NOLINT
Value(ValueKind::Array), vec_{arr} {}
JsonArray(JsonArray const& that) = delete;
JsonArray(JsonArray && that);
void Save(JsonWriter* writer) override;
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
std::vector<Json> const& getArray() && { return vec_; }
std::vector<Json> const& getArray() const & { return vec_; }
std::vector<Json>& getArray() & { return vec_; }
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
static bool isClassOf(Value const* value) {
return value->Type() == ValueKind::Array;
}
};
class JsonRaw : public Value {
std::string str_;
public:
explicit JsonRaw(std::string&& str) :
Value(ValueKind::Raw),
str_{std::move(str)}{} // NOLINT
JsonRaw() : Value(ValueKind::Raw) {}
std::string const& getRaw() && { return str_; }
std::string const& getRaw() const & { return str_; }
std::string& getRaw() & { return str_; }
void Save(JsonWriter* writer) override;
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
static bool isClassOf(Value const* value) {
return value->Type() == ValueKind::Raw;
}
};
class JsonObject : public Value {
std::map<std::string, Json> object_;
public:
JsonObject() : Value(ValueKind::Object) {}
JsonObject(std::map<std::string, Json>&& object); // NOLINT
JsonObject(JsonObject const& that) = delete;
JsonObject(JsonObject && that);
void Save(JsonWriter* writer) override;
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
std::map<std::string, Json> const& getObject() && { return object_; }
std::map<std::string, Json> const& getObject() const & { return object_; }
std::map<std::string, Json> & getObject() & { return object_; }
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
static bool isClassOf(Value const* value) {
return value->Type() == ValueKind::Object;
}
virtual ~JsonObject() = default;
};
class JsonNumber : public Value {
public:
using Float = float;
private:
Float number_;
public:
JsonNumber() : Value(ValueKind::Number) {}
JsonNumber(double value) : Value(ValueKind::Number) { // NOLINT
number_ = value;
}
void Save(JsonWriter* writer) override;
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
Float const& getNumber() && { return number_; }
Float const& getNumber() const & { return number_; }
Float& getNumber() & { return number_; }
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
static bool isClassOf(Value const* value) {
return value->Type() == ValueKind::Number;
}
};
class JsonNull : public Value {
public:
JsonNull() : Value(ValueKind::Null) {}
JsonNull(std::nullptr_t) : Value(ValueKind::Null) {} // NOLINT
void Save(JsonWriter* writer) override;
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
static bool isClassOf(Value const* value) {
return value->Type() == ValueKind::Null;
}
};
/*! \brief Describes both true and false. */
class JsonBoolean : public Value {
bool boolean_;
public:
JsonBoolean() : Value(ValueKind::Boolean) {} // NOLINT
// Ambigious with JsonNumber.
template <typename Bool,
typename std::enable_if<
std::is_same<Bool, bool>::value ||
std::is_same<Bool, bool const>::value>::type* = nullptr>
JsonBoolean(Bool value) : // NOLINT
Value(ValueKind::Boolean), boolean_{value} {}
void Save(JsonWriter* writer) override;
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
bool const& getBoolean() && { return boolean_; }
bool const& getBoolean() const & { return boolean_; }
bool& getBoolean() & { return boolean_; }
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
static bool isClassOf(Value const* value) {
return value->Type() == ValueKind::Boolean;
}
};
struct StringView {
char const* str_;
size_t size_;
public:
StringView() = default;
StringView(char const* str, size_t size) : str_{str}, size_{size} {}
char const& operator[](size_t p) const { return str_[p]; }
char const& at(size_t p) const { // NOLINT
CHECK_LT(p, size_);
return str_[p];
}
size_t size() const { return size_; } // NOLINT
// Copies a portion of string. Since we don't have std::from_chars and friends here, so
// copying substring is necessary for appending `\0`. It's not too bad since string by
// default has small vector optimization, which is enabled by most if not all modern
// compilers for numeric values.
std::string substr(size_t beg, size_t n) const { // NOLINT
CHECK_LE(beg, size_);
return std::string {str_ + beg, n < (size_ - beg) ? n : (size_ - beg)};
}
char const* c_str() const { return str_; } // NOLINT
};
/*!
* \brief Data structure representing JSON format.
*
* Limitation: UTF-8 is not properly supported. Code points above ASCII are
* invalid.
*
* Examples:
*
* \code
* // Create a JSON object.
* Json object { Object() };
* // Assign key "key" with a JSON string "Value";
* object["key"] = String("Value");
* // Assign key "arr" with a empty JSON Array;
* object["arr"] = Array();
* \endcode
*/
class Json {
friend JsonWriter;
public:
/*! \brief Load a Json object from string. */
static Json Load(StringView str, bool ignore_specialization = false);
/*! \brief Pass your own JsonReader. */
static Json Load(JsonReader* reader);
/*! \brief Dump json into stream. */
static void Dump(Json json, std::ostream* stream,
bool pretty = ConsoleLogger::ShouldLog(
ConsoleLogger::LogVerbosity::kDebug));
Json() : ptr_{new JsonNull} {}
// number
explicit Json(JsonNumber number) : ptr_{new JsonNumber(number)} {}
Json& operator=(JsonNumber number) {
ptr_.reset(new JsonNumber(std::move(number)));
return *this;
}
// array
explicit Json(JsonArray list) :
ptr_ {new JsonArray(std::move(list))} {}
Json& operator=(JsonArray array) {
ptr_.reset(new JsonArray(std::move(array)));
return *this;
}
// raw
explicit Json(JsonRaw str) :
ptr_{new JsonRaw(std::move(str))} {}
Json& operator=(JsonRaw str) {
ptr_.reset(new JsonRaw(std::move(str)));
return *this;
}
// object
explicit Json(JsonObject object) :
ptr_{new JsonObject(std::move(object))} {}
Json& operator=(JsonObject object) {
ptr_.reset(new JsonObject(std::move(object)));
return *this;
}
// string
explicit Json(JsonString str) :
ptr_{new JsonString(std::move(str))} {}
Json& operator=(JsonString str) {
ptr_.reset(new JsonString(std::move(str)));
return *this;
}
// bool
explicit Json(JsonBoolean boolean) :
ptr_{new JsonBoolean(std::move(boolean))} {}
Json& operator=(JsonBoolean boolean) {
ptr_.reset(new JsonBoolean(std::move(boolean)));
return *this;
}
// null
explicit Json(JsonNull null) :
ptr_{new JsonNull(std::move(null))} {}
Json& operator=(JsonNull null) {
ptr_.reset(new JsonNull(std::move(null)));
return *this;
}
// copy
Json(Json const& other) : ptr_{other.ptr_} {}
Json& operator=(Json const& other);
// move
Json(Json&& other) : ptr_{std::move(other.ptr_)} {}
Json& operator=(Json&& other) {
ptr_ = std::move(other.ptr_);
return *this;
}
/*! \brief Index Json object with a std::string, used for Json Object. */
Json& operator[](std::string const & key) const { return (*ptr_)[key]; }
/*! \brief Index Json object with int, used for Json Array. */
Json& operator[](int ind) const { return (*ptr_)[ind]; }
/*! \Brief Return the reference to stored Json value. */
Value const& GetValue() const & { return *ptr_; }
Value const& GetValue() && { return *ptr_; }
Value& GetValue() & { return *ptr_; }
bool operator==(Json const& rhs) const {
return *ptr_ == *(rhs.ptr_);
}
private:
std::shared_ptr<Value> ptr_;
};
template <typename T>
bool IsA(Json const j) {
auto const& v = j.GetValue();
return IsA<T>(&v);
}
namespace detail {
// Number
template <typename T,
typename std::enable_if<
std::is_same<T, JsonNumber>::value>::type* = nullptr>
JsonNumber::Float& GetImpl(T& val) { // NOLINT
return val.getNumber();
}
template <typename T,
typename std::enable_if<
std::is_same<T, JsonNumber const>::value>::type* = nullptr>
double const& GetImpl(T& val) { // NOLINT
return val.getNumber();
}
// String
template <typename T,
typename std::enable_if<
std::is_same<T, JsonString>::value>::type* = nullptr>
std::string& GetImpl(T& val) { // NOLINT
return val.getString();
}
template <typename T,
typename std::enable_if<
std::is_same<T, JsonString const>::value>::type* = nullptr>
std::string const& GetImpl(T& val) { // NOLINT
return val.getString();
}
// Boolean
template <typename T,
typename std::enable_if<
std::is_same<T, JsonBoolean>::value>::type* = nullptr>
bool& GetImpl(T& val) { // NOLINT
return val.getBoolean();
}
template <typename T,
typename std::enable_if<
std::is_same<T, JsonBoolean const>::value>::type* = nullptr>
bool const& GetImpl(T& val) { // NOLINT
return val.getBoolean();
}
template <typename T,
typename std::enable_if<
std::is_same<T, JsonRaw>::value>::type* = nullptr>
std::string& GetImpl(T& val) { // NOLINT
return val.getRaw();
}
template <typename T,
typename std::enable_if<
std::is_same<T, JsonRaw const>::value>::type* = nullptr>
std::string const& GetImpl(T& val) { // NOLINT
return val.getRaw();
}
// Array
template <typename T,
typename std::enable_if<
std::is_same<T, JsonArray>::value>::type* = nullptr>
std::vector<Json>& GetImpl(T& val) { // NOLINT
return val.getArray();
}
template <typename T,
typename std::enable_if<
std::is_same<T, JsonArray const>::value>::type* = nullptr>
std::vector<Json> const& GetImpl(T& val) { // NOLINT
return val.getArray();
}
// Object
template <typename T,
typename std::enable_if<
std::is_same<T, JsonObject>::value>::type* = nullptr>
std::map<std::string, Json>& GetImpl(T& val) { // NOLINT
return val.getObject();
}
template <typename T,
typename std::enable_if<
std::is_same<T, JsonObject const>::value>::type* = nullptr>
std::map<std::string, Json> const& GetImpl(T& val) { // NOLINT
return val.getObject();
}
} // namespace detail
/*!
* \brief Get Json value.
*
* \tparam T One of the Json value type.
*
* \param json
* \return Value contained in Json object of type T.
*/
template <typename T, typename U>
auto get(U& json) -> decltype(detail::GetImpl(*Cast<T>(&json.GetValue())))& { // NOLINT
auto& value = *Cast<T>(&json.GetValue());
return detail::GetImpl(value);
}
using Object = JsonObject;
using Array = JsonArray;
using Number = JsonNumber;
using Boolean = JsonBoolean;
using String = JsonString;
using Null = JsonNull;
using Raw = JsonRaw;
// Utils tailored for XGBoost.
template <typename Type>
Object toJson(dmlc::Parameter<Type> const& param) {
Object obj;
for (auto const& kv : param.__DICT__()) {
obj[kv.first] = kv.second;
}
return obj;
}
inline std::map<std::string, std::string> fromJson(std::map<std::string, Json> const& param) {
std::map<std::string, std::string> res;
for (auto const& kv : param) {
res[kv.first] = get<String const>(kv.second);
}
return res;
}
} // namespace xgboost
#endif // XGBOOST_JSON_H_

217
include/xgboost/json_io.h Normal file
View File

@ -0,0 +1,217 @@
/*!
* Copyright (c) by Contributors 2019
*/
#ifndef XGBOOST_JSON_IO_H_
#define XGBOOST_JSON_IO_H_
#include <xgboost/json.h>
#include <memory>
#include <string>
#include <cinttypes>
#include <utility>
#include <map>
#include <limits>
#include <sstream>
#include <locale>
namespace xgboost {
template <typename Allocator>
class FixedPrecisionStreamContainer : public std::basic_stringstream<
char, std::char_traits<char>, Allocator> {
public:
FixedPrecisionStreamContainer() {
this->precision(std::numeric_limits<Number::Float>::max_digits10);
}
};
using FixedPrecisionStream = FixedPrecisionStreamContainer<std::allocator<char>>;
/*
* \brief An reader that can be specialised.
*
* Why specialization?
*
* First of all, we don't like specialization. This is purely for performance concern.
* Distributed environment freqently serializes model so at some point this could be a
* bottle neck for training performance. There are many other techniques for obtaining
* better performance, but all of them requires implementing thier own allocaltor(s),
* using simd instructions. And few of them can provide a easy to modify structure
* since they assumes a fixed memory layout.
*
* In XGBoost we provide specialized logic for parsing/writing tree models and linear
* models, where dense numeric values is presented, including weights, node ids etc.
*
* Plan for removing the specialization:
*
* We plan to upstream this implementaion into DMLC as it matures. For XGBoost, most of
* the time spent in load/dump is actually `sprintf`.
*
* To enable specialization, register a keyword that corresponds to
* key in Json object. For example in:
*
* \code
* { "key": {...} }
* \endcode
*
* To add special logic for parsing {...}, one can call:
*
* \code
* JsonReader::registry("key", [](StringView str, size_t* pos){ ... return JsonRaw(...); });
* \endcode
*
* Where str is a view of entire input string, while pos is a pointer to current position.
* The function must return a raw object. Later after obtaining a parsed object, say
* `Json obj`, you can obtain * the raw object by calling `obj["key"]' then perform the
* specialized parsing on it.
*
* See `LinearSelectRaw` and `LinearReader` in combination as an example.
*/
class JsonReader {
protected:
size_t constexpr static kMaxNumLength =
std::numeric_limits<double>::max_digits10 + 1;
struct SourceLocation {
size_t pos_; // current position in raw_str_
public:
SourceLocation() : pos_(0) {}
explicit SourceLocation(size_t pos) : pos_{pos} {}
size_t Pos() const { return pos_; }
SourceLocation& Forward(char c = 0) {
pos_++;
return *this;
}
} cursor_;
StringView raw_str_;
bool ignore_specialization_;
protected:
void SkipSpaces();
char GetNextChar() {
if (cursor_.Pos() == raw_str_.size()) {
return -1;
}
char ch = raw_str_[cursor_.Pos()];
cursor_.Forward();
return ch;
}
char PeekNextChar() {
if (cursor_.Pos() == raw_str_.size()) {
return -1;
}
char ch = raw_str_[cursor_.Pos()];
return ch;
}
char GetNextNonSpaceChar() {
SkipSpaces();
return GetNextChar();
}
char GetChar(char c) {
char result = GetNextNonSpaceChar();
if (result != c) { Expect(c, result); }
return result;
}
void Error(std::string msg) const;
// Report expected character
void Expect(char c, char got) {
std::string msg = "Expecting: \"";
msg += c;
msg += "\", got: \"";
msg += std::string {got} + " \"";
Error(msg);
}
virtual Json ParseString();
virtual Json ParseObject();
virtual Json ParseArray();
virtual Json ParseNumber();
virtual Json ParseBoolean();
virtual Json ParseNull();
Json Parse();
private:
using Fn = std::function<Json (StringView, size_t*)>;
public:
explicit JsonReader(StringView str, bool ignore = false) :
raw_str_{str},
ignore_specialization_{ignore} {}
explicit JsonReader(StringView str, size_t pos, bool ignore = false) :
cursor_{pos},
raw_str_{str},
ignore_specialization_{ignore} {}
virtual ~JsonReader() = default;
Json Load();
static std::map<std::string, Fn>& getRegistry() {
static std::map<std::string, Fn> set;
return set;
}
static std::map<std::string, Fn> const& registry(
std::string const& key, Fn fn) {
getRegistry()[key] = fn;
return getRegistry();
}
};
class JsonWriter {
static constexpr size_t kIndentSize = 2;
FixedPrecisionStream convertor_;
size_t n_spaces_;
std::ostream* stream_;
bool pretty_;
public:
JsonWriter(std::ostream* stream, bool pretty) :
n_spaces_{0}, stream_{stream}, pretty_{pretty} {}
virtual ~JsonWriter() = default;
void NewLine() {
if (pretty_) {
*stream_ << u8"\n" << std::string(n_spaces_, ' ');
}
}
void BeginIndent() {
n_spaces_ += kIndentSize;
}
void EndIndent() {
n_spaces_ -= kIndentSize;
}
void Write(std::string str) {
*stream_ << str;
}
void Write(StringView str) {
stream_->write(str.c_str(), str.size());
}
void Save(Json json);
virtual void Visit(JsonArray const* arr);
virtual void Visit(JsonObject const* obj);
virtual void Visit(JsonNumber const* num);
virtual void Visit(JsonRaw const* raw);
virtual void Visit(JsonNull const* null);
virtual void Visit(JsonString const* str);
virtual void Visit(JsonBoolean const* boolean);
};
} // namespace xgboost
#endif // XGBOOST_JSON_IO_H_

67
src/common/io.cc Normal file
View File

@ -0,0 +1,67 @@
/*!
* Copyright (c) by Contributors 2019
*/
#if defined(__unix__)
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#endif // defined(__unix__)
#include <cstdio>
#include <string>
#include "xgboost/logging.h"
namespace xgboost {
namespace common {
std::string LoadSequentialFile(std::string fname) {
auto OpenErr = [&fname]() {
std::string msg;
msg = "Opening " + fname + " failed: ";
msg += strerror(errno);
LOG(FATAL) << msg;
};
auto ReadErr = [&fname]() {
std::string msg {"Error in reading file: "};
msg += fname;
msg += ": ";
msg += strerror(errno);
LOG(FATAL) << msg;
};
std::string buffer;
#if defined(__unix__)
struct stat fs;
if (stat(fname.c_str(), &fs) != 0) {
OpenErr();
}
size_t f_size_bytes = fs.st_size;
buffer.resize(f_size_bytes+1);
int32_t fd = open(fname.c_str(), O_RDONLY);
posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL);
ssize_t bytes_read = read(fd, &buffer[0], f_size_bytes);
if (bytes_read < 0) {
close(fd);
ReadErr();
}
close(fd);
#else
FILE *f = fopen(fname.c_str(), "r");
if (f == NULL) {
std::string msg;
OpenErr();
}
fseek(f, 0, SEEK_END);
auto fsize = ftell(f);
fseek(f, 0, SEEK_SET);
buffer.resize(fsize + 1);
fread(&buffer[0], 1, fsize, f);
fclose(f);
#endif // defined(__unix__)
return buffer;
}
} // namespace common
} // namespace xgboost

View File

@ -70,6 +70,10 @@ class PeekableInStream : public dmlc::Stream {
/*! \brief internal buffer */
std::string buffer_;
};
// Optimized for consecutive file loading in unix like systime.
std::string LoadSequentialFile(std::string fname);
} // namespace common
} // namespace xgboost
#endif // XGBOOST_COMMON_IO_H_

624
src/common/json.cc Normal file
View File

@ -0,0 +1,624 @@
/*!
* Copyright (c) by Contributors 2019
*/
#include <sstream>
#include "xgboost/logging.h"
#include "xgboost/json.h"
#include "xgboost/json_io.h"
#include "../common/timer.h"
namespace xgboost {
void JsonWriter::Save(Json json) {
json.ptr_->Save(this);
}
void JsonWriter::Visit(JsonArray const* arr) {
this->Write("[");
auto const& vec = arr->getArray();
size_t size = vec.size();
for (size_t i = 0; i < size; ++i) {
auto const& value = vec[i];
this->Save(value);
if (i != size-1) { Write(", "); }
}
this->Write("]");
}
void JsonWriter::Visit(JsonObject const* obj) {
this->Write("{");
this->BeginIndent();
this->NewLine();
size_t i = 0;
size_t size = obj->getObject().size();
for (auto& value : obj->getObject()) {
this->Write("\"" + value.first + "\": ");
this->Save(value.second);
if (i != size-1) {
this->Write(",");
this->NewLine();
}
i++;
}
this->EndIndent();
this->NewLine();
this->Write("}");
}
void JsonWriter::Visit(JsonNumber const* num) {
convertor_ << num->getNumber();
auto const& str = convertor_.str();
this->Write(StringView{str.c_str(), str.size()});
convertor_.str("");
}
void JsonWriter::Visit(JsonRaw const* raw) {
auto const& str = raw->getRaw();
this->Write(str);
}
void JsonWriter::Visit(JsonNull const* null) {
this->Write("null");
}
void JsonWriter::Visit(JsonString const* str) {
std::string buffer;
buffer += '"';
auto const& string = str->getString();
for (size_t i = 0; i < string.length(); i++) {
const char ch = string[i];
if (ch == '\\') {
if (i < string.size() && string[i+1] == 'u') {
buffer += "\\";
} else {
buffer += "\\\\";
}
} else if (ch == '"') {
buffer += "\\\"";
} else if (ch == '\b') {
buffer += "\\b";
} else if (ch == '\f') {
buffer += "\\f";
} else if (ch == '\n') {
buffer += "\\n";
} else if (ch == '\r') {
buffer += "\\r";
} else if (ch == '\t') {
buffer += "\\t";
} else if (static_cast<uint8_t>(ch) <= 0x1f) {
// Unit separator
char buf[8];
snprintf(buf, sizeof buf, "\\u%04x", ch);
buffer += buf;
} else {
buffer += ch;
}
}
buffer += '"';
this->Write(buffer);
}
void JsonWriter::Visit(JsonBoolean const* boolean) {
bool val = boolean->getBoolean();
if (val) {
this->Write(u8"true");
} else {
this->Write(u8"false");
}
}
// Value
std::string Value::TypeStr() const {
switch (kind_) {
case ValueKind::String: return "String"; break;
case ValueKind::Number: return "Number"; break;
case ValueKind::Object: return "Object"; break;
case ValueKind::Array: return "Array"; break;
case ValueKind::Boolean: return "Boolean"; break;
case ValueKind::Null: return "Null"; break;
case ValueKind::Raw: return "Raw"; break;
case ValueKind::Integer: return "Integer"; break;
}
return "";
}
// Only used for keeping old compilers happy about non-reaching return
// statement.
Json& DummyJsonObject() {
static Json obj;
return obj;
}
// Json Object
JsonObject::JsonObject(JsonObject && that) :
Value(ValueKind::Object), object_{std::move(that.object_)} {}
JsonObject::JsonObject(std::map<std::string, Json>&& object)
: Value(ValueKind::Object), object_{std::move(object)} {}
Json& JsonObject::operator[](std::string const & key) {
return object_[key];
}
Json& JsonObject::operator[](int ind) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by Integer.";
return DummyJsonObject();
}
bool JsonObject::operator==(Value const& rhs) const {
if (!IsA<JsonObject>(&rhs)) { return false; }
return object_ == Cast<JsonObject const>(&rhs)->getObject();
}
Value& JsonObject::operator=(Value const &rhs) {
JsonObject const* casted = Cast<JsonObject const>(&rhs);
object_ = casted->getObject();
return *this;
}
void JsonObject::Save(JsonWriter* writer) {
writer->Visit(this);
}
// Json String
Json& JsonString::operator[](std::string const & key) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by string.";
return DummyJsonObject();
}
Json& JsonString::operator[](int ind) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by Integer."
<< " Please try obtaining std::string first.";
return DummyJsonObject();
}
bool JsonString::operator==(Value const& rhs) const {
if (!IsA<JsonString>(&rhs)) { return false; }
return Cast<JsonString const>(&rhs)->getString() == str_;
}
Value & JsonString::operator=(Value const &rhs) {
JsonString const* casted = Cast<JsonString const>(&rhs);
str_ = casted->getString();
return *this;
}
// FIXME: UTF-8 parsing support.
void JsonString::Save(JsonWriter* writer) {
writer->Visit(this);
}
// Json Array
JsonArray::JsonArray(JsonArray && that) :
Value(ValueKind::Array), vec_{std::move(that.vec_)} {}
Json& JsonArray::operator[](std::string const & key) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by string.";
return DummyJsonObject();
}
Json& JsonArray::operator[](int ind) {
return vec_.at(ind);
}
bool JsonArray::operator==(Value const& rhs) const {
if (!IsA<JsonArray>(&rhs)) { return false; }
auto& arr = Cast<JsonArray const>(&rhs)->getArray();
return std::equal(arr.cbegin(), arr.cend(), vec_.cbegin());
}
Value & JsonArray::operator=(Value const &rhs) {
JsonArray const* casted = Cast<JsonArray const>(&rhs);
vec_ = casted->getArray();
return *this;
}
void JsonArray::Save(JsonWriter* writer) {
writer->Visit(this);
}
// Json raw
Json& JsonRaw::operator[](std::string const & key) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by string.";
return DummyJsonObject();
}
Json& JsonRaw::operator[](int ind) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by Integer.";
return DummyJsonObject();
}
bool JsonRaw::operator==(Value const& rhs) const {
if (!IsA<JsonRaw>(&rhs)) { return false; }
auto& arr = Cast<JsonRaw const>(&rhs)->getRaw();
return std::equal(arr.cbegin(), arr.cend(), str_.cbegin());
}
Value & JsonRaw::operator=(Value const &rhs) {
auto const* casted = Cast<JsonRaw const>(&rhs);
str_ = casted->getRaw();
return *this;
}
void JsonRaw::Save(JsonWriter* writer) {
writer->Visit(this);
}
// Json Number
Json& JsonNumber::operator[](std::string const & key) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by string.";
return DummyJsonObject();
}
Json& JsonNumber::operator[](int ind) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by Integer.";
return DummyJsonObject();
}
bool JsonNumber::operator==(Value const& rhs) const {
if (!IsA<JsonNumber>(&rhs)) { return false; }
return number_ == Cast<JsonNumber const>(&rhs)->getNumber();
}
Value & JsonNumber::operator=(Value const &rhs) {
JsonNumber const* casted = Cast<JsonNumber const>(&rhs);
number_ = casted->getNumber();
return *this;
}
void JsonNumber::Save(JsonWriter* writer) {
writer->Visit(this);
}
// Json Null
Json& JsonNull::operator[](std::string const & key) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by string.";
return DummyJsonObject();
}
Json& JsonNull::operator[](int ind) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by Integer.";
return DummyJsonObject();
}
bool JsonNull::operator==(Value const& rhs) const {
if (!IsA<JsonNull>(&rhs)) { return false; }
return true;
}
Value & JsonNull::operator=(Value const &rhs) {
Cast<JsonNull const>(&rhs); // Checking only.
return *this;
}
void JsonNull::Save(JsonWriter* writer) {
writer->Write("null");
}
// Json Boolean
Json& JsonBoolean::operator[](std::string const & key) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by string.";
return DummyJsonObject();
}
Json& JsonBoolean::operator[](int ind) {
LOG(FATAL) << "Object of type "
<< Value::TypeStr() << " can not be indexed by Integer.";
return DummyJsonObject();
}
bool JsonBoolean::operator==(Value const& rhs) const {
if (!IsA<JsonBoolean>(&rhs)) { return false; }
return boolean_ == Cast<JsonBoolean const>(&rhs)->getBoolean();
}
Value & JsonBoolean::operator=(Value const &rhs) {
JsonBoolean const* casted = Cast<JsonBoolean const>(&rhs);
boolean_ = casted->getBoolean();
return *this;
}
void JsonBoolean::Save(JsonWriter *writer) {
writer->Visit(this);
}
size_t constexpr JsonReader::kMaxNumLength;
Json JsonReader::Parse() {
while (true) {
SkipSpaces();
char c = PeekNextChar();
if (c == -1) { break; }
if (c == '{') {
return ParseObject();
} else if ( c == '[' ) {
return ParseArray();
} else if ( c == '-' || std::isdigit(c) ) {
return ParseNumber();
} else if ( c == '\"' ) {
return ParseString();
} else if ( c == 't' || c == 'f' ) {
return ParseBoolean();
} else if (c == 'n') {
return ParseNull();
} else {
Error("Unknown construct");
}
}
return Json();
}
Json JsonReader::Load() {
Json result = Parse();
return result;
}
void JsonReader::Error(std::string msg) const {
// just copy it.
std::istringstream str_s(raw_str_.substr(0, raw_str_.size()));
msg += ", around character: " + std::to_string(cursor_.Pos());
msg += '\n';
constexpr size_t kExtend = 8;
auto beg = cursor_.Pos() - kExtend < 0 ? 0 : cursor_.Pos() - kExtend;
auto end = cursor_.Pos() + kExtend >= raw_str_.size() ?
raw_str_.size() : cursor_.Pos() + kExtend;
msg += " ";
msg += raw_str_.substr(beg, end - beg);
msg += '\n';
msg += " ";
for (size_t i = beg; i < cursor_.Pos() - 1; ++i) {
msg += '~';
}
msg += '^';
for (size_t i = cursor_.Pos(); i < end; ++i) {
msg += '~';
}
LOG(FATAL) << msg;
}
// Json class
void JsonReader::SkipSpaces() {
while (cursor_.Pos() < raw_str_.size()) {
char c = raw_str_[cursor_.Pos()];
if (std::isspace(c)) {
cursor_.Forward(c);
} else {
break;
}
}
}
void ParseStr(std::string const& str) {
size_t end = 0;
for (size_t i = 0; i < str.size(); ++i) {
if (str[i] == '"' && i > 0 && str[i-1] != '\\') {
end = i;
break;
}
}
std::string result;
result.resize(end);
}
Json JsonReader::ParseString() {
char ch { GetChar('\"') }; // NOLINT
std::ostringstream output;
std::string str;
while (true) {
ch = GetNextChar();
if (ch == '\\') {
char next = static_cast<char>(GetNextChar());
switch (next) {
case 'r': str += u8"\r"; break;
case 'n': str += u8"\n"; break;
case '\\': str += u8"\\"; break;
case 't': str += u8"\t"; break;
case '\"': str += u8"\""; break;
case 'u':
str += ch;
str += 'u';
break;
default: Error("Unknown escape");
}
} else {
if (ch == '\"') break;
str += ch;
}
if (ch == EOF || ch == '\r' || ch == '\n') {
Expect('\"', ch);
}
}
return Json(std::move(str));
}
Json JsonReader::ParseNull() {
char ch = GetNextNonSpaceChar();
std::string buffer{ch};
for (size_t i = 0; i < 3; ++i) {
buffer.push_back(GetNextChar());
}
if (buffer != "null") {
Error("Expecting null value \"null\"");
}
return Json{JsonNull()};
}
Json JsonReader::ParseArray() {
std::vector<Json> data;
char ch { GetChar('[') }; // NOLINT
while (true) {
if (PeekNextChar() == ']') {
GetChar(']');
return Json(std::move(data));
}
auto obj = Parse();
data.push_back(obj);
ch = GetNextNonSpaceChar();
if (ch == ']') break;
if (ch != ',') {
Expect(',', ch);
}
}
return Json(std::move(data));
}
Json JsonReader::ParseObject() {
char ch = GetChar('{');
std::map<std::string, Json> data;
if (ch == '}') return Json(std::move(data));
while (true) {
SkipSpaces();
ch = PeekNextChar();
if (ch != '"') {
Expect('"', ch);
}
Json key = ParseString();
ch = GetNextNonSpaceChar();
if (ch != ':') {
Expect(':', ch);
}
Json value;
if (!ignore_specialization_ &&
(getRegistry().find(get<String>(key)) != getRegistry().cend())) {
LOG(DEBUG) << "Using specialized parser for: " << get<String>(key);
value = getRegistry().at(get<String>(key))(raw_str_, &(cursor_.pos_));
} else {
value = Parse();
}
data[get<JsonString>(key)] = std::move(value);
ch = GetNextNonSpaceChar();
if (ch == '}') break;
if (ch != ',') {
Expect(',', ch);
}
}
return Json(std::move(data));
}
Json JsonReader::ParseNumber() {
std::string substr = raw_str_.substr(cursor_.Pos(), kMaxNumLength);
size_t pos = 0;
Number::Float number{0};
number = std::stof(substr, &pos);
for (size_t i = 0; i < pos; ++i) {
GetNextChar();
}
return Json(number);
}
Json JsonReader::ParseBoolean() {
bool result = false;
char ch = GetNextNonSpaceChar();
std::string const t_value = u8"true";
std::string const f_value = u8"false";
std::string buffer;
if (ch == 't') {
for (size_t i = 0; i < 3; ++i) {
buffer.push_back(GetNextNonSpaceChar());
}
if (buffer != u8"rue") {
Error("Expecting boolean value \"true\".");
}
result = true;
} else {
for (size_t i = 0; i < 4; ++i) {
buffer.push_back(GetNextNonSpaceChar());
}
if (buffer != u8"alse") {
Error("Expecting boolean value \"false\".");
}
result = false;
}
return Json{JsonBoolean{result}};
}
// This is an ad-hoc solution for writing numeric value in standard way. We need to add
// something locale independent way of writing stream.
// FIXME(trivialfis): Remove this.
class GlobalCLocale {
std::locale ori_;
public:
GlobalCLocale() : ori_{std::locale()} {
std::string const name {"C"};
try {
std::locale::global(std::locale(name.c_str()));
} catch (std::runtime_error const& e) {
LOG(FATAL) << "Failed to set locale: " << name;
}
}
~GlobalCLocale() {
std::locale::global(ori_);
}
};
Json Json::Load(StringView str, bool ignore_specialization) {
GlobalCLocale guard;
LOG(WARNING) << "Json serialization is still experimental."
" Output schema is subject to change in the future.";
JsonReader reader(str, ignore_specialization);
common::Timer t;
t.Start();
Json json{reader.Load()};
t.Stop();
t.PrintElapsed("Json::load");
return json;
}
Json Json::Load(JsonReader* reader) {
GlobalCLocale guard;
common::Timer t;
t.Start();
Json json{reader->Load()};
t.Stop();
t.PrintElapsed("Json::load");
return json;
}
void Json::Dump(Json json, std::ostream *stream, bool pretty) {
GlobalCLocale guard;
LOG(WARNING) << "Json serialization is still experimental."
" Output schema is subject to change in the future.";
JsonWriter writer(stream, true);
common::Timer t;
t.Start();
writer.Save(json);
t.Stop();
t.PrintElapsed("Json::dump");
}
Json& Json::operator=(Json const &other) = default;
} // namespace xgboost

View File

@ -0,0 +1,371 @@
/*!
* Copyright (c) by Contributors 2019
*/
#include <gtest/gtest.h>
#include <dmlc/filesystem.h>
#include <fstream>
#include <map>
#include "xgboost/json.h"
#include "xgboost/logging.h"
#include "xgboost/json_io.h"
#include "../../../src/common/io.h"
namespace xgboost {
std::string GetModelStr() {
std::string model_json = R"json(
{
"model_parameter": {
"base_score": "0.5",
"num_class": "0",
"num_feature": "10"
},
"train_parameter": {
"debug_verbose": "0",
"disable_default_eval_metric": "0",
"dsplit": "auto",
"nthread": "0",
"seed": "0",
"seed_per_iteration": "0",
"test_flag": "",
"tree_method": "gpu_hist"
},
"configuration": {
"booster": "gbtree",
"n_gpus": "1",
"num_class": "0",
"num_feature": "10",
"objective": "reg:linear",
"predictor": "gpu_predictor",
"tree_method": "gpu_hist",
"updater": "grow_gpu_hist"
},
"objective": "reg:linear",
"booster": "gbtree",
"gbm": {
"GBTreeModelParam": {
"num_feature": "10",
"num_output_group": "1",
"num_roots": "1",
"size_leaf_vector": "0"
},
"trees": [{
"TreeParam": {
"num_feature": "10",
"num_roots": "1",
"size_leaf_vector": "0"
},
"num_nodes": "9",
"nodes": [
{
"depth": 0,
"gain": 31.8892,
"hess": 10,
"left": 1,
"missing": 1,
"nodeid": 0,
"right": 2,
"split_condition": 0.580717,
"split_index": 2
},
{
"depth": 1,
"gain": 1.5625,
"hess": 3,
"left": 5,
"missing": 5,
"nodeid": 2,
"right": 6,
"split_condition": 0.160345,
"split_index": 0
},
{
"depth": 2,
"gain": 0.25,
"hess": 2,
"left": 7,
"missing": 7,
"nodeid": 6,
"right": 8,
"split_condition": 0.62788,
"split_index": 0
},
{
"hess": 1,
"leaf": 0.375,
"nodeid": 8
},
{
"hess": 1,
"leaf": 0.075,
"nodeid": 7
},
{
"hess": 1,
"leaf": -0.075,
"nodeid": 5
},
{
"depth": 3,
"gain": 10.4866,
"hess": 7,
"left": 3,
"missing": 3,
"nodeid": 1,
"right": 4,
"split_condition": 0.238748,
"split_index": 1
},
{
"hess": 6,
"leaf": 1.54286,
"nodeid": 4
},
{
"hess": 1,
"leaf": 0.225,
"nodeid": 3
}
],
"leaf_vector": []
}],
"tree_info": [0]
}
}
)json";
return model_json;
}
TEST(Json, TestParseObject) {
std::string str = R"obj({"TreeParam" : {"num_feature": "10"}})obj";
auto json = Json::Load(StringView{str.c_str(), str.size()});
}
TEST(Json, ParseNumber) {
std::string str = "31.8892";
auto json = Json::Load(StringView{str.c_str(), str.size()});
ASSERT_NEAR(get<JsonNumber>(json), 31.8892f, kRtEps);
}
TEST(Json, ParseArray) {
std::string str = R"json(
{
"nodes": [
{
"depth": 3,
"gain": 10.4866,
"hess": 7,
"left": 3,
"missing": 3,
"nodeid": 1,
"right": 4,
"split_condition": 0.238748,
"split_index": 1
},
{
"hess": 6,
"leaf": 1.54286,
"nodeid": 4
},
{
"hess": 1,
"leaf": 0.225,
"nodeid": 3
}
]
}
)json";
auto json = Json::Load(StringView{str.c_str(), str.size()}, true);
json = json["nodes"];
std::vector<Json> arr = get<JsonArray>(json);
ASSERT_EQ(arr.size(), 3);
Json v0 = arr[0];
ASSERT_EQ(get<JsonNumber>(v0["depth"]), 3);
}
TEST(Json, Null) {
Json json {JsonNull()};
std::stringstream ss;
Json::Dump(json, &ss);
ASSERT_EQ(ss.str(), "null");
std::string null_input {R"null({"key": null })null"};
json = Json::Load({null_input.c_str(), null_input.size()});
ASSERT_TRUE(IsA<Null>(json["key"]));
}
TEST(Json, EmptyArray) {
std::string str = R"json(
{
"leaf_vector": []
}
)json";
std::istringstream iss(str);
auto json = Json::Load(StringView{str.c_str(), str.size()}, true);
auto arr = get<JsonArray>(json["leaf_vector"]);
ASSERT_EQ(arr.size(), 0);
}
TEST(Json, Boolean) {
std::string str = R"json(
{
"left_child": true,
"right_child": false
}
)json";
Json j {Json::Load(StringView{str.c_str(), str.size()}, true)};
ASSERT_EQ(get<JsonBoolean>(j["left_child"]), true);
ASSERT_EQ(get<JsonBoolean>(j["right_child"]), false);
}
TEST(Json, Indexing) {
auto str = GetModelStr();
JsonReader reader(StringView{str.c_str(), str.size()}, true);
Json j {Json::Load(&reader)};
auto& value_1 = j["model_parameter"];
auto& value = value_1["base_score"];
std::string result = Cast<JsonString>(&value.GetValue())->getString();
ASSERT_EQ(result, "0.5");
}
TEST(Json, AssigningObjects) {
{
Json json;
json = JsonObject();
json["Okay"] = JsonArray();
ASSERT_EQ(get<JsonArray>(json["Okay"]).size(), 0);
}
{
std::map<std::string, Json> objects;
Json json_objects { JsonObject() };
std::vector<Json> arr_0 (1, Json(3.3));
json_objects["tree_parameters"] = JsonArray(arr_0);
std::vector<Json> json_arr = get<JsonArray>(json_objects["tree_parameters"]);
ASSERT_NEAR(get<JsonNumber>(json_arr[0]), 3.3f, kRtEps);
}
{
Json json_object { JsonObject() };
auto str = JsonString("1");
auto& k = json_object["1"];
k = str;
auto& m = json_object["1"];
std::string value = get<JsonString>(m);
ASSERT_EQ(value, "1");
ASSERT_EQ(get<JsonString>(json_object["1"]), "1");
}
}
TEST(Json, AssigningArray) {
Json json;
json = JsonArray();
std::vector<Json> tmp_0 {Json(Number(1)), Json(Number(2))};
json = tmp_0;
std::vector<Json> tmp_1 {Json(Number(3))};
get<Array>(json) = tmp_1;
std::vector<Json> res = get<Array>(json);
ASSERT_EQ(get<Number>(res[0]), 3);
}
TEST(Json, AssigningNumber) {
{
// right value
Json json = Json{ Number(4) };
get<Number>(json) = 15;
ASSERT_EQ(get<Number>(json), 15);
}
{
// left value ref
Json json = Json{ Number(4) };
Number::Float& ref = get<Number>(json);
ref = 15;
ASSERT_EQ(get<Number>(json), 15);
}
{
// left value
Json json = Json{ Number(4) };
double value = get<Number>(json);
ASSERT_EQ(value, 4);
value = 15; // NOLINT
ASSERT_EQ(get<Number>(json), 4);
}
}
TEST(Json, AssigningString) {
{
// right value
Json json = Json{ String("str") };
get<String>(json) = "modified";
ASSERT_EQ(get<String>(json), "modified");
}
{
// left value ref
Json json = Json{ String("str") };
std::string& ref = get<String>(json);
ref = "modified";
ASSERT_EQ(get<String>(json), "modified");
}
{
// left value
Json json = Json{ String("str") };
std::string value = get<String>(json);
value = "modified";
ASSERT_EQ(get<String>(json), "str");
}
}
TEST(Json, LoadDump) {
std::string buffer = GetModelStr();
Json origin {Json::Load(StringView{buffer.c_str(), buffer.size()}, true)};
dmlc::TemporaryDirectory tempdir;
auto const& path = tempdir.path + "test_model_dump";
std::ofstream fout (path);
Json::Dump(origin, &fout);
fout.close();
buffer = common::LoadSequentialFile(path);
Json load_back {Json::Load(StringView(buffer.c_str(), buffer.size()), true)};
ASSERT_EQ(load_back, origin);
}
// For now Json is quite ignorance about unicode.
TEST(Json, CopyUnicode) {
std::string json_str = R"json(
{"m": ["\ud834\udd1e", "\u20ac", "\u0416", "\u00f6"]}
)json";
Json loaded {Json::Load(StringView{json_str.c_str(), json_str.size()}, true)};
std::stringstream ss_1;
Json::Dump(loaded, &ss_1);
std::string dumped_string = ss_1.str();
ASSERT_NE(dumped_string.find("\\u20ac"), std::string::npos);
}
TEST(Json, WrongCasts) {
{
Json json = Json{ String{"str"} };
ASSERT_ANY_THROW(get<Number>(json));
}
{
Json json = Json{ Array{ std::vector<Json>{ Json{ Number{1} } } } };
ASSERT_ANY_THROW(get<Number>(json));
}
{
Json json = Json{ Object{std::map<std::string, Json>{
{"key", Json{String{"value"}}}} } };
ASSERT_ANY_THROW(get<Number>(json));
}
}
} // namespace xgboost