Implement ubjson. (#7549)
* Implement ubjson. This is a partial implementation of UBJSON with support for typed arrays. Some missing features are `f64`, typed object, and the no-op.
This commit is contained in:
@@ -127,8 +127,8 @@ template <typename T> class IntrusivePtr {
|
||||
ptr_ = nullptr;
|
||||
}
|
||||
void reset(element_type *that) { IntrusivePtr{that}.swap(*this); } // NOLINT
|
||||
|
||||
element_type &operator*() const noexcept { return *ptr_; }
|
||||
// clang-tidy might manufacture a null value, disable the check
|
||||
element_type &operator*() const noexcept { return *ptr_; } // NOLINT
|
||||
element_type *operator->() const noexcept { return ptr_; }
|
||||
element_type *get() const noexcept { return ptr_; } // NOLINT
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright (c) by XGBoost Contributors 2019-2021
|
||||
* Copyright (c) by XGBoost Contributors 2019-2022
|
||||
*/
|
||||
#ifndef XGBOOST_JSON_H_
|
||||
#define XGBOOST_JSON_H_
|
||||
@@ -39,7 +39,12 @@ class Value {
|
||||
kObject, // std::map
|
||||
kArray, // std::vector
|
||||
kBoolean,
|
||||
kNull
|
||||
kNull,
|
||||
// typed array for ubjson
|
||||
kNumberArray,
|
||||
kU8Array,
|
||||
kI32Array,
|
||||
kI64Array
|
||||
};
|
||||
|
||||
explicit Value(ValueKind _kind) : kind_{_kind} {}
|
||||
@@ -47,13 +52,13 @@ class Value {
|
||||
ValueKind Type() const { return kind_; }
|
||||
virtual ~Value() = default;
|
||||
|
||||
virtual void Save(JsonWriter* writer) = 0;
|
||||
virtual void Save(JsonWriter* writer) const = 0;
|
||||
|
||||
virtual Json& operator[](std::string const & key) = 0;
|
||||
virtual Json& operator[](int ind) = 0;
|
||||
virtual Json& operator[](std::string const& key);
|
||||
virtual Json& operator[](int ind);
|
||||
|
||||
virtual bool operator==(Value const& rhs) const = 0;
|
||||
virtual Value& operator=(Value const& rhs) = 0;
|
||||
virtual Value& operator=(Value const& rhs) = delete;
|
||||
|
||||
std::string TypeStr() const;
|
||||
|
||||
@@ -88,17 +93,13 @@ class JsonString : public Value {
|
||||
JsonString(JsonString&& str) noexcept : // NOLINT
|
||||
Value(ValueKind::kString), str_{std::move(str.str_)} {}
|
||||
|
||||
void Save(JsonWriter* writer) override;
|
||||
|
||||
Json& operator[](std::string const & key) override;
|
||||
Json& operator[](int ind) override;
|
||||
void Save(JsonWriter* writer) const override;
|
||||
|
||||
std::string const& GetString() && { return str_; }
|
||||
std::string const& GetString() const & { return str_; }
|
||||
std::string& GetString() & { return str_; }
|
||||
|
||||
bool operator==(Value const& rhs) const override;
|
||||
Value& operator=(Value const& rhs) override;
|
||||
|
||||
static bool IsClassOf(Value const* value) {
|
||||
return value->Type() == ValueKind::kString;
|
||||
@@ -117,23 +118,71 @@ class JsonArray : public Value {
|
||||
JsonArray(JsonArray const& that) = delete;
|
||||
JsonArray(JsonArray && that) noexcept;
|
||||
|
||||
void Save(JsonWriter* writer) override;
|
||||
void Save(JsonWriter* writer) const override;
|
||||
|
||||
Json& operator[](std::string const & key) override;
|
||||
Json& operator[](int ind) override;
|
||||
Json& operator[](int ind) override { return vec_.at(ind); }
|
||||
// silent the partial oveeridden warning
|
||||
Json& operator[](std::string const& key) override { return Value::operator[](key); }
|
||||
|
||||
std::vector<Json> const& GetArray() && { return vec_; }
|
||||
std::vector<Json> const& GetArray() const & { return vec_; }
|
||||
std::vector<Json>& GetArray() & { return vec_; }
|
||||
|
||||
bool operator==(Value const& rhs) const override;
|
||||
Value& operator=(Value const& rhs) override;
|
||||
|
||||
static bool IsClassOf(Value const* value) {
|
||||
return value->Type() == ValueKind::kArray;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Typed array for Universal Binary JSON.
|
||||
*
|
||||
* \tparam T The underlying primitive type.
|
||||
* \tparam kind Value kind defined by JSON type.
|
||||
*/
|
||||
template <typename T, Value::ValueKind kind>
|
||||
class JsonTypedArray : public Value {
|
||||
std::vector<T> vec_;
|
||||
|
||||
public:
|
||||
using Type = T;
|
||||
|
||||
JsonTypedArray() : Value(kind) {}
|
||||
explicit JsonTypedArray(size_t n) : Value(kind) { vec_.resize(n); }
|
||||
JsonTypedArray(JsonTypedArray&& that) noexcept : Value{kind}, vec_{std::move(that.vec_)} {}
|
||||
|
||||
bool operator==(Value const& rhs) const override;
|
||||
|
||||
void Set(size_t i, T v) { vec_[i] = v; }
|
||||
size_t Size() const { return vec_.size(); }
|
||||
|
||||
void Save(JsonWriter* writer) const override;
|
||||
|
||||
std::vector<T> const& GetArray() && { return vec_; }
|
||||
std::vector<T> const& GetArray() const& { return vec_; }
|
||||
std::vector<T>& GetArray() & { return vec_; }
|
||||
|
||||
static bool IsClassOf(Value const* value) { return value->Type() == kind; }
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Typed UBJSON array for 32-bit floating point.
|
||||
*/
|
||||
using F32Array = JsonTypedArray<float, Value::ValueKind::kNumberArray>;
|
||||
/**
|
||||
* \brief Typed UBJSON array for uint8_t.
|
||||
*/
|
||||
using U8Array = JsonTypedArray<uint8_t, Value::ValueKind::kU8Array>;
|
||||
/**
|
||||
* \brief Typed UBJSON array for int32_t.
|
||||
*/
|
||||
using I32Array = JsonTypedArray<int32_t, Value::ValueKind::kI32Array>;
|
||||
/**
|
||||
* \brief Typed UBJSON array for int64_t.
|
||||
*/
|
||||
using I64Array = JsonTypedArray<int64_t, Value::ValueKind::kI64Array>;
|
||||
|
||||
class JsonObject : public Value {
|
||||
std::map<std::string, Json> object_;
|
||||
|
||||
@@ -143,17 +192,17 @@ class JsonObject : public Value {
|
||||
JsonObject(JsonObject const& that) = delete;
|
||||
JsonObject(JsonObject && that) noexcept;
|
||||
|
||||
void Save(JsonWriter* writer) override;
|
||||
void Save(JsonWriter* writer) const override;
|
||||
|
||||
Json& operator[](std::string const & key) override;
|
||||
Json& operator[](int ind) override;
|
||||
// silent the partial oveeridden warning
|
||||
Json& operator[](int ind) override { return Value::operator[](ind); }
|
||||
Json& operator[](std::string const& key) override { return object_[key]; }
|
||||
|
||||
std::map<std::string, Json> const& GetObject() && { return object_; }
|
||||
std::map<std::string, Json> const& GetObject() const & { return object_; }
|
||||
std::map<std::string, Json> & GetObject() & { return object_; }
|
||||
|
||||
bool operator==(Value const& rhs) const override;
|
||||
Value& operator=(Value const& rhs) override;
|
||||
|
||||
static bool IsClassOf(Value const* value) {
|
||||
return value->Type() == ValueKind::kObject;
|
||||
@@ -182,18 +231,13 @@ class JsonNumber : public Value {
|
||||
JsonNumber(JsonNumber const& that) = delete;
|
||||
JsonNumber(JsonNumber&& that) noexcept : Value{ValueKind::kNumber}, number_{that.number_} {}
|
||||
|
||||
void Save(JsonWriter* writer) override;
|
||||
|
||||
Json& operator[](std::string const & key) override;
|
||||
Json& operator[](int ind) override;
|
||||
void Save(JsonWriter* writer) const override;
|
||||
|
||||
Float const& GetNumber() && { return number_; }
|
||||
Float const& GetNumber() const & { return number_; }
|
||||
Float& GetNumber() & { return number_; }
|
||||
|
||||
|
||||
bool operator==(Value const& rhs) const override;
|
||||
Value& operator=(Value const& rhs) override;
|
||||
|
||||
static bool IsClassOf(Value const* value) {
|
||||
return value->Type() == ValueKind::kNumber;
|
||||
@@ -231,16 +275,12 @@ class JsonInteger : public Value {
|
||||
JsonInteger(JsonInteger &&that) noexcept
|
||||
: Value{ValueKind::kInteger}, integer_{that.integer_} {}
|
||||
|
||||
Json& operator[](std::string const & key) override;
|
||||
Json& operator[](int ind) override;
|
||||
|
||||
bool operator==(Value const& rhs) const override;
|
||||
Value& operator=(Value const& rhs) override;
|
||||
|
||||
Int const& GetInteger() && { return integer_; }
|
||||
Int const& GetInteger() const & { return integer_; }
|
||||
Int& GetInteger() & { return integer_; }
|
||||
void Save(JsonWriter* writer) override;
|
||||
void Save(JsonWriter* writer) const override;
|
||||
|
||||
static bool IsClassOf(Value const* value) {
|
||||
return value->Type() == ValueKind::kInteger;
|
||||
@@ -253,13 +293,9 @@ class JsonNull : public Value {
|
||||
JsonNull(std::nullptr_t) : Value(ValueKind::kNull) {} // NOLINT
|
||||
JsonNull(JsonNull&&) noexcept : Value(ValueKind::kNull) {}
|
||||
|
||||
void Save(JsonWriter* writer) override;
|
||||
|
||||
Json& operator[](std::string const & key) override;
|
||||
Json& operator[](int ind) override;
|
||||
void Save(JsonWriter* writer) const override;
|
||||
|
||||
bool operator==(Value const& rhs) const override;
|
||||
Value& operator=(Value const& rhs) override;
|
||||
|
||||
static bool IsClassOf(Value const* value) {
|
||||
return value->Type() == ValueKind::kNull;
|
||||
@@ -282,17 +318,13 @@ class JsonBoolean : public Value {
|
||||
JsonBoolean(JsonBoolean&& value) noexcept: // NOLINT
|
||||
Value(ValueKind::kBoolean), boolean_{value.boolean_} {}
|
||||
|
||||
void Save(JsonWriter* writer) override;
|
||||
|
||||
Json& operator[](std::string const & key) override;
|
||||
Json& operator[](int ind) override;
|
||||
void Save(JsonWriter* writer) const override;
|
||||
|
||||
bool const& GetBoolean() && { return boolean_; }
|
||||
bool const& GetBoolean() const & { return boolean_; }
|
||||
bool& GetBoolean() & { return boolean_; }
|
||||
|
||||
bool operator==(Value const& rhs) const override;
|
||||
Value& operator=(Value const& rhs) override;
|
||||
|
||||
static bool IsClassOf(Value const* value) {
|
||||
return value->Type() == ValueKind::kBoolean;
|
||||
@@ -317,14 +349,22 @@ class JsonBoolean : public Value {
|
||||
* \endcode
|
||||
*/
|
||||
class Json {
|
||||
friend JsonWriter;
|
||||
|
||||
public:
|
||||
/*! \brief Load a Json object from string. */
|
||||
static Json Load(StringView str);
|
||||
/**
|
||||
* \brief Decode the JSON object. Optional parameter mode for choosing between text
|
||||
* and binary (ubjson) input.
|
||||
*/
|
||||
static Json Load(StringView str, std::ios::openmode mode = std::ios::in);
|
||||
/*! \brief Pass your own JsonReader. */
|
||||
static Json Load(JsonReader* reader);
|
||||
static void Dump(Json json, std::string* out);
|
||||
/**
|
||||
* \brief Encode the JSON object. Optional parameter mode for choosing between text
|
||||
* and binary (ubjson) output.
|
||||
*/
|
||||
static void Dump(Json json, std::string* out, std::ios::openmode mode = std::ios::out);
|
||||
static void Dump(Json json, std::vector<char>* out, std::ios::openmode mode = std::ios::out);
|
||||
/*! \brief Use your own JsonWriter. */
|
||||
static void Dump(Json json, JsonWriter* writer);
|
||||
|
||||
Json() : ptr_{new JsonNull} {}
|
||||
|
||||
@@ -334,14 +374,12 @@ class Json {
|
||||
ptr_.reset(new JsonNumber(std::move(number)));
|
||||
return *this;
|
||||
}
|
||||
|
||||
// integer
|
||||
explicit Json(JsonInteger integer) : ptr_{new JsonInteger(std::move(integer))} {}
|
||||
Json& operator=(JsonInteger integer) {
|
||||
ptr_.reset(new JsonInteger(std::move(integer)));
|
||||
return *this;
|
||||
}
|
||||
|
||||
// array
|
||||
explicit Json(JsonArray list) :
|
||||
ptr_ {new JsonArray(std::move(list))} {}
|
||||
@@ -349,7 +387,15 @@ class Json {
|
||||
ptr_.reset(new JsonArray(std::move(array)));
|
||||
return *this;
|
||||
}
|
||||
|
||||
// typed array
|
||||
template <typename T, Value::ValueKind kind>
|
||||
explicit Json(JsonTypedArray<T, kind>&& list)
|
||||
: ptr_{new JsonTypedArray<T, kind>(std::forward<JsonTypedArray<T, kind>>(list))} {}
|
||||
template <typename T, Value::ValueKind kind>
|
||||
Json& operator=(JsonTypedArray<T, kind>&& array) {
|
||||
ptr_.reset(new JsonTypedArray<T, kind>(std::forward<JsonTypedArray<T, kind>>(array)));
|
||||
return *this;
|
||||
}
|
||||
// object
|
||||
explicit Json(JsonObject object) :
|
||||
ptr_{new JsonObject(std::move(object))} {}
|
||||
@@ -381,7 +427,7 @@ class Json {
|
||||
|
||||
// copy
|
||||
Json(Json const& other) = default;
|
||||
Json& operator=(Json const& other);
|
||||
Json& operator=(Json const& other) = default;
|
||||
// move
|
||||
Json(Json &&other) noexcept { std::swap(this->ptr_, other.ptr_); }
|
||||
Json &operator=(Json &&other) noexcept {
|
||||
@@ -410,10 +456,21 @@ class Json {
|
||||
return os;
|
||||
}
|
||||
|
||||
IntrusivePtr<Value> const& Ptr() const { return ptr_; }
|
||||
|
||||
private:
|
||||
IntrusivePtr<Value> ptr_;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Check whether a Json object has specific type.
|
||||
*
|
||||
* \code
|
||||
* Json json {Array{}};
|
||||
* bool is_array = IsA<Array>(json);
|
||||
* CHECK(is_array);
|
||||
* \endcode
|
||||
*/
|
||||
template <typename T>
|
||||
bool IsA(Json const& j) {
|
||||
auto const& v = j.GetValue();
|
||||
@@ -421,7 +478,6 @@ bool IsA(Json const& j) {
|
||||
}
|
||||
|
||||
namespace detail {
|
||||
|
||||
// Number
|
||||
template <typename T,
|
||||
typename std::enable_if<
|
||||
@@ -492,6 +548,16 @@ std::vector<Json> const& GetImpl(T& val) { // NOLINT
|
||||
return val.GetArray();
|
||||
}
|
||||
|
||||
// Typed Array
|
||||
template <typename T, Value::ValueKind kind>
|
||||
std::vector<T>& GetImpl(JsonTypedArray<T, kind>& val) { // NOLINT
|
||||
return val.GetArray();
|
||||
}
|
||||
template <typename T, Value::ValueKind kind>
|
||||
std::vector<T> const& GetImpl(JsonTypedArray<T, kind> const& val) {
|
||||
return val.GetArray();
|
||||
}
|
||||
|
||||
// Object
|
||||
template <typename T,
|
||||
typename std::enable_if<
|
||||
@@ -505,7 +571,6 @@ template <typename T,
|
||||
std::map<std::string, Json> const& GetImpl(T& val) { // NOLINT
|
||||
return val.GetObject();
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
/*!
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
/*!
|
||||
* Copyright (c) by Contributors 2019
|
||||
* Copyright (c) by Contributors 2019-2022
|
||||
*/
|
||||
#ifndef XGBOOST_JSON_IO_H_
|
||||
#define XGBOOST_JSON_IO_H_
|
||||
#include <xgboost/json.h>
|
||||
#include <dmlc/endian.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/json.h>
|
||||
|
||||
#include <vector>
|
||||
#include <cinttypes>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include <limits>
|
||||
#include <sstream>
|
||||
#include <locale>
|
||||
#include <cinttypes>
|
||||
#include <vector>
|
||||
|
||||
namespace xgboost {
|
||||
/*
|
||||
@@ -47,7 +47,7 @@ class JsonReader {
|
||||
void SkipSpaces();
|
||||
|
||||
char GetNextChar() {
|
||||
if (cursor_.Pos() == raw_str_.size()) {
|
||||
if (XGBOOST_EXPECT((cursor_.Pos() == raw_str_.size()), false)) {
|
||||
return -1;
|
||||
}
|
||||
char ch = raw_str_[cursor_.Pos()];
|
||||
@@ -109,12 +109,30 @@ class JsonReader {
|
||||
|
||||
virtual ~JsonReader() = default;
|
||||
|
||||
Json Load();
|
||||
virtual Json Load();
|
||||
};
|
||||
|
||||
class JsonWriter {
|
||||
static constexpr size_t kIndentSize = 2;
|
||||
template <typename T, std::enable_if_t<!std::is_same<Json, T>::value>* = nullptr>
|
||||
void Save(T const& v) {
|
||||
this->Save(Json{v});
|
||||
}
|
||||
template <typename Array, typename Fn>
|
||||
void WriteArray(Array const* arr, Fn&& fn) {
|
||||
stream_->emplace_back('[');
|
||||
auto const& vec = arr->GetArray();
|
||||
size_t size = vec.size();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
auto const& value = vec[i];
|
||||
this->Save(fn(value));
|
||||
if (i != size - 1) {
|
||||
stream_->emplace_back(',');
|
||||
}
|
||||
}
|
||||
stream_->emplace_back(']');
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<char>* stream_;
|
||||
|
||||
public:
|
||||
@@ -122,9 +140,13 @@ class JsonWriter {
|
||||
|
||||
virtual ~JsonWriter() = default;
|
||||
|
||||
void Save(Json json);
|
||||
virtual void Save(Json json);
|
||||
|
||||
virtual void Visit(JsonArray const* arr);
|
||||
virtual void Visit(F32Array const* arr);
|
||||
virtual void Visit(U8Array const* arr);
|
||||
virtual void Visit(I32Array const* arr);
|
||||
virtual void Visit(I64Array const* arr);
|
||||
virtual void Visit(JsonObject const* obj);
|
||||
virtual void Visit(JsonNumber const* num);
|
||||
virtual void Visit(JsonInteger const* num);
|
||||
@@ -132,6 +154,113 @@ class JsonWriter {
|
||||
virtual void Visit(JsonString const* str);
|
||||
virtual void Visit(JsonBoolean const* boolean);
|
||||
};
|
||||
|
||||
#if defined(__GLIBC__)
|
||||
template <typename T>
|
||||
T BuiltinBSwap(T v);
|
||||
|
||||
template <>
|
||||
inline uint16_t BuiltinBSwap(uint16_t v) {
|
||||
return __builtin_bswap16(v);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline uint32_t BuiltinBSwap(uint32_t v) {
|
||||
return __builtin_bswap32(v);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline uint64_t BuiltinBSwap(uint64_t v) {
|
||||
return __builtin_bswap64(v);
|
||||
}
|
||||
#else
|
||||
template <typename T>
|
||||
T BuiltinBSwap(T v) {
|
||||
dmlc::ByteSwap(&v, sizeof(v), 1);
|
||||
return v;
|
||||
}
|
||||
#endif // defined(__GLIBC__)
|
||||
|
||||
template <typename T, std::enable_if_t<sizeof(T) == 1>* = nullptr>
|
||||
inline T ByteSwap(T v) {
|
||||
return v;
|
||||
}
|
||||
|
||||
template <typename T, std::enable_if_t<sizeof(T) != 1>* = nullptr>
|
||||
inline T ByteSwap(T v) {
|
||||
static_assert(std::is_pod<T>::value, "Only pod is supported.");
|
||||
#if DMLC_LITTLE_ENDIAN
|
||||
auto constexpr kS = sizeof(T);
|
||||
std::conditional_t<kS == 2, uint16_t, std::conditional_t<kS == 4, uint32_t, uint64_t>> u;
|
||||
std::memcpy(&u, &v, sizeof(u));
|
||||
u = BuiltinBSwap(u);
|
||||
std::memcpy(&v, &u, sizeof(u));
|
||||
#endif // DMLC_LITTLE_ENDIAN
|
||||
return v;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Reader for UBJSON https://ubjson.org/
|
||||
*/
|
||||
class UBJReader : public JsonReader {
|
||||
Json Parse();
|
||||
|
||||
template <typename T>
|
||||
T ReadStream() {
|
||||
auto ptr = this->raw_str_.c_str() + cursor_.Pos();
|
||||
T v{0};
|
||||
std::memcpy(&v, ptr, sizeof(v));
|
||||
cursor_.Forward(sizeof(v));
|
||||
return v;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T ReadPrimitive() {
|
||||
auto v = ReadStream<T>();
|
||||
v = ByteSwap(v);
|
||||
return v;
|
||||
}
|
||||
|
||||
template <typename TypedArray>
|
||||
auto ParseTypedArray(int64_t n) {
|
||||
TypedArray results{static_cast<size_t>(n)};
|
||||
for (int64_t i = 0; i < n; ++i) {
|
||||
auto v = this->ReadPrimitive<typename TypedArray::Type>();
|
||||
results.Set(i, v);
|
||||
}
|
||||
return Json{std::move(results)};
|
||||
}
|
||||
|
||||
std::string DecodeStr();
|
||||
|
||||
Json ParseArray() override;
|
||||
Json ParseObject() override;
|
||||
|
||||
public:
|
||||
using JsonReader::JsonReader;
|
||||
Json Load() override;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Writer for UBJSON https://ubjson.org/
|
||||
*/
|
||||
class UBJWriter : public JsonWriter {
|
||||
void Visit(JsonArray const* arr) override;
|
||||
void Visit(F32Array const* arr) override;
|
||||
void Visit(U8Array const* arr) override;
|
||||
void Visit(I32Array const* arr) override;
|
||||
void Visit(I64Array const* arr) override;
|
||||
void Visit(JsonObject const* obj) override;
|
||||
void Visit(JsonNumber const* num) override;
|
||||
void Visit(JsonInteger const* num) override;
|
||||
void Visit(JsonNull const* null) override;
|
||||
void Visit(JsonString const* str) override;
|
||||
void Visit(JsonBoolean const* boolean) override;
|
||||
|
||||
public:
|
||||
using JsonWriter::JsonWriter;
|
||||
void Save(Json json) override;
|
||||
};
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // XGBOOST_JSON_IO_H_
|
||||
|
||||
Reference in New Issue
Block a user