Implement ubjson. (#7549)

* Implement ubjson.

This is a partial implementation of UBJSON with support for typed arrays.  Some missing
features are `f64`, typed object, and the no-op.
This commit is contained in:
Jiaming Yuan
2022-01-10 23:24:23 +08:00
committed by GitHub
parent 001503186c
commit c635d4c46a
6 changed files with 792 additions and 242 deletions

View File

@@ -127,8 +127,8 @@ template <typename T> class IntrusivePtr {
ptr_ = nullptr;
}
void reset(element_type *that) { IntrusivePtr{that}.swap(*this); } // NOLINT
element_type &operator*() const noexcept { return *ptr_; }
// clang-tidy might manufacture a null value, disable the check
element_type &operator*() const noexcept { return *ptr_; } // NOLINT
element_type *operator->() const noexcept { return ptr_; }
element_type *get() const noexcept { return ptr_; } // NOLINT

View File

@@ -1,5 +1,5 @@
/*!
* Copyright (c) by XGBoost Contributors 2019-2021
* Copyright (c) by XGBoost Contributors 2019-2022
*/
#ifndef XGBOOST_JSON_H_
#define XGBOOST_JSON_H_
@@ -39,7 +39,12 @@ class Value {
kObject, // std::map
kArray, // std::vector
kBoolean,
kNull
kNull,
// typed array for ubjson
kNumberArray,
kU8Array,
kI32Array,
kI64Array
};
explicit Value(ValueKind _kind) : kind_{_kind} {}
@@ -47,13 +52,13 @@ class Value {
ValueKind Type() const { return kind_; }
virtual ~Value() = default;
virtual void Save(JsonWriter* writer) = 0;
virtual void Save(JsonWriter* writer) const = 0;
virtual Json& operator[](std::string const & key) = 0;
virtual Json& operator[](int ind) = 0;
virtual Json& operator[](std::string const& key);
virtual Json& operator[](int ind);
virtual bool operator==(Value const& rhs) const = 0;
virtual Value& operator=(Value const& rhs) = 0;
virtual Value& operator=(Value const& rhs) = delete;
std::string TypeStr() const;
@@ -88,17 +93,13 @@ class JsonString : public Value {
JsonString(JsonString&& str) noexcept : // NOLINT
Value(ValueKind::kString), str_{std::move(str.str_)} {}
void Save(JsonWriter* writer) override;
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
void Save(JsonWriter* writer) const override;
std::string const& GetString() && { return str_; }
std::string const& GetString() const & { return str_; }
std::string& GetString() & { return str_; }
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
static bool IsClassOf(Value const* value) {
return value->Type() == ValueKind::kString;
@@ -117,23 +118,71 @@ class JsonArray : public Value {
JsonArray(JsonArray const& that) = delete;
JsonArray(JsonArray && that) noexcept;
void Save(JsonWriter* writer) override;
void Save(JsonWriter* writer) const override;
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
Json& operator[](int ind) override { return vec_.at(ind); }
// silent the partial oveeridden warning
Json& operator[](std::string const& key) override { return Value::operator[](key); }
std::vector<Json> const& GetArray() && { return vec_; }
std::vector<Json> const& GetArray() const & { return vec_; }
std::vector<Json>& GetArray() & { return vec_; }
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
static bool IsClassOf(Value const* value) {
return value->Type() == ValueKind::kArray;
}
};
/**
* \brief Typed array for Universal Binary JSON.
*
* \tparam T The underlying primitive type.
* \tparam kind Value kind defined by JSON type.
*/
template <typename T, Value::ValueKind kind>
class JsonTypedArray : public Value {
std::vector<T> vec_;
public:
using Type = T;
JsonTypedArray() : Value(kind) {}
explicit JsonTypedArray(size_t n) : Value(kind) { vec_.resize(n); }
JsonTypedArray(JsonTypedArray&& that) noexcept : Value{kind}, vec_{std::move(that.vec_)} {}
bool operator==(Value const& rhs) const override;
void Set(size_t i, T v) { vec_[i] = v; }
size_t Size() const { return vec_.size(); }
void Save(JsonWriter* writer) const override;
std::vector<T> const& GetArray() && { return vec_; }
std::vector<T> const& GetArray() const& { return vec_; }
std::vector<T>& GetArray() & { return vec_; }
static bool IsClassOf(Value const* value) { return value->Type() == kind; }
};
/**
* \brief Typed UBJSON array for 32-bit floating point.
*/
using F32Array = JsonTypedArray<float, Value::ValueKind::kNumberArray>;
/**
* \brief Typed UBJSON array for uint8_t.
*/
using U8Array = JsonTypedArray<uint8_t, Value::ValueKind::kU8Array>;
/**
* \brief Typed UBJSON array for int32_t.
*/
using I32Array = JsonTypedArray<int32_t, Value::ValueKind::kI32Array>;
/**
* \brief Typed UBJSON array for int64_t.
*/
using I64Array = JsonTypedArray<int64_t, Value::ValueKind::kI64Array>;
class JsonObject : public Value {
std::map<std::string, Json> object_;
@@ -143,17 +192,17 @@ class JsonObject : public Value {
JsonObject(JsonObject const& that) = delete;
JsonObject(JsonObject && that) noexcept;
void Save(JsonWriter* writer) override;
void Save(JsonWriter* writer) const override;
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
// silent the partial oveeridden warning
Json& operator[](int ind) override { return Value::operator[](ind); }
Json& operator[](std::string const& key) override { return object_[key]; }
std::map<std::string, Json> const& GetObject() && { return object_; }
std::map<std::string, Json> const& GetObject() const & { return object_; }
std::map<std::string, Json> & GetObject() & { return object_; }
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
static bool IsClassOf(Value const* value) {
return value->Type() == ValueKind::kObject;
@@ -182,18 +231,13 @@ class JsonNumber : public Value {
JsonNumber(JsonNumber const& that) = delete;
JsonNumber(JsonNumber&& that) noexcept : Value{ValueKind::kNumber}, number_{that.number_} {}
void Save(JsonWriter* writer) override;
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
void Save(JsonWriter* writer) const override;
Float const& GetNumber() && { return number_; }
Float const& GetNumber() const & { return number_; }
Float& GetNumber() & { return number_; }
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
static bool IsClassOf(Value const* value) {
return value->Type() == ValueKind::kNumber;
@@ -231,16 +275,12 @@ class JsonInteger : public Value {
JsonInteger(JsonInteger &&that) noexcept
: Value{ValueKind::kInteger}, integer_{that.integer_} {}
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
Int const& GetInteger() && { return integer_; }
Int const& GetInteger() const & { return integer_; }
Int& GetInteger() & { return integer_; }
void Save(JsonWriter* writer) override;
void Save(JsonWriter* writer) const override;
static bool IsClassOf(Value const* value) {
return value->Type() == ValueKind::kInteger;
@@ -253,13 +293,9 @@ class JsonNull : public Value {
JsonNull(std::nullptr_t) : Value(ValueKind::kNull) {} // NOLINT
JsonNull(JsonNull&&) noexcept : Value(ValueKind::kNull) {}
void Save(JsonWriter* writer) override;
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
void Save(JsonWriter* writer) const override;
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
static bool IsClassOf(Value const* value) {
return value->Type() == ValueKind::kNull;
@@ -282,17 +318,13 @@ class JsonBoolean : public Value {
JsonBoolean(JsonBoolean&& value) noexcept: // NOLINT
Value(ValueKind::kBoolean), boolean_{value.boolean_} {}
void Save(JsonWriter* writer) override;
Json& operator[](std::string const & key) override;
Json& operator[](int ind) override;
void Save(JsonWriter* writer) const override;
bool const& GetBoolean() && { return boolean_; }
bool const& GetBoolean() const & { return boolean_; }
bool& GetBoolean() & { return boolean_; }
bool operator==(Value const& rhs) const override;
Value& operator=(Value const& rhs) override;
static bool IsClassOf(Value const* value) {
return value->Type() == ValueKind::kBoolean;
@@ -317,14 +349,22 @@ class JsonBoolean : public Value {
* \endcode
*/
class Json {
friend JsonWriter;
public:
/*! \brief Load a Json object from string. */
static Json Load(StringView str);
/**
* \brief Decode the JSON object. Optional parameter mode for choosing between text
* and binary (ubjson) input.
*/
static Json Load(StringView str, std::ios::openmode mode = std::ios::in);
/*! \brief Pass your own JsonReader. */
static Json Load(JsonReader* reader);
static void Dump(Json json, std::string* out);
/**
* \brief Encode the JSON object. Optional parameter mode for choosing between text
* and binary (ubjson) output.
*/
static void Dump(Json json, std::string* out, std::ios::openmode mode = std::ios::out);
static void Dump(Json json, std::vector<char>* out, std::ios::openmode mode = std::ios::out);
/*! \brief Use your own JsonWriter. */
static void Dump(Json json, JsonWriter* writer);
Json() : ptr_{new JsonNull} {}
@@ -334,14 +374,12 @@ class Json {
ptr_.reset(new JsonNumber(std::move(number)));
return *this;
}
// integer
explicit Json(JsonInteger integer) : ptr_{new JsonInteger(std::move(integer))} {}
Json& operator=(JsonInteger integer) {
ptr_.reset(new JsonInteger(std::move(integer)));
return *this;
}
// array
explicit Json(JsonArray list) :
ptr_ {new JsonArray(std::move(list))} {}
@@ -349,7 +387,15 @@ class Json {
ptr_.reset(new JsonArray(std::move(array)));
return *this;
}
// typed array
template <typename T, Value::ValueKind kind>
explicit Json(JsonTypedArray<T, kind>&& list)
: ptr_{new JsonTypedArray<T, kind>(std::forward<JsonTypedArray<T, kind>>(list))} {}
template <typename T, Value::ValueKind kind>
Json& operator=(JsonTypedArray<T, kind>&& array) {
ptr_.reset(new JsonTypedArray<T, kind>(std::forward<JsonTypedArray<T, kind>>(array)));
return *this;
}
// object
explicit Json(JsonObject object) :
ptr_{new JsonObject(std::move(object))} {}
@@ -381,7 +427,7 @@ class Json {
// copy
Json(Json const& other) = default;
Json& operator=(Json const& other);
Json& operator=(Json const& other) = default;
// move
Json(Json &&other) noexcept { std::swap(this->ptr_, other.ptr_); }
Json &operator=(Json &&other) noexcept {
@@ -410,10 +456,21 @@ class Json {
return os;
}
IntrusivePtr<Value> const& Ptr() const { return ptr_; }
private:
IntrusivePtr<Value> ptr_;
};
/**
* \brief Check whether a Json object has specific type.
*
* \code
* Json json {Array{}};
* bool is_array = IsA<Array>(json);
* CHECK(is_array);
* \endcode
*/
template <typename T>
bool IsA(Json const& j) {
auto const& v = j.GetValue();
@@ -421,7 +478,6 @@ bool IsA(Json const& j) {
}
namespace detail {
// Number
template <typename T,
typename std::enable_if<
@@ -492,6 +548,16 @@ std::vector<Json> const& GetImpl(T& val) { // NOLINT
return val.GetArray();
}
// Typed Array
template <typename T, Value::ValueKind kind>
std::vector<T>& GetImpl(JsonTypedArray<T, kind>& val) { // NOLINT
return val.GetArray();
}
template <typename T, Value::ValueKind kind>
std::vector<T> const& GetImpl(JsonTypedArray<T, kind> const& val) {
return val.GetArray();
}
// Object
template <typename T,
typename std::enable_if<
@@ -505,7 +571,6 @@ template <typename T,
std::map<std::string, Json> const& GetImpl(T& val) { // NOLINT
return val.GetObject();
}
} // namespace detail
/*!

View File

@@ -1,20 +1,20 @@
/*!
* Copyright (c) by Contributors 2019
* Copyright (c) by Contributors 2019-2022
*/
#ifndef XGBOOST_JSON_IO_H_
#define XGBOOST_JSON_IO_H_
#include <xgboost/json.h>
#include <dmlc/endian.h>
#include <xgboost/base.h>
#include <xgboost/json.h>
#include <vector>
#include <cinttypes>
#include <limits>
#include <map>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <map>
#include <limits>
#include <sstream>
#include <locale>
#include <cinttypes>
#include <vector>
namespace xgboost {
/*
@@ -47,7 +47,7 @@ class JsonReader {
void SkipSpaces();
char GetNextChar() {
if (cursor_.Pos() == raw_str_.size()) {
if (XGBOOST_EXPECT((cursor_.Pos() == raw_str_.size()), false)) {
return -1;
}
char ch = raw_str_[cursor_.Pos()];
@@ -109,12 +109,30 @@ class JsonReader {
virtual ~JsonReader() = default;
Json Load();
virtual Json Load();
};
class JsonWriter {
static constexpr size_t kIndentSize = 2;
template <typename T, std::enable_if_t<!std::is_same<Json, T>::value>* = nullptr>
void Save(T const& v) {
this->Save(Json{v});
}
template <typename Array, typename Fn>
void WriteArray(Array const* arr, Fn&& fn) {
stream_->emplace_back('[');
auto const& vec = arr->GetArray();
size_t size = vec.size();
for (size_t i = 0; i < size; ++i) {
auto const& value = vec[i];
this->Save(fn(value));
if (i != size - 1) {
stream_->emplace_back(',');
}
}
stream_->emplace_back(']');
}
protected:
std::vector<char>* stream_;
public:
@@ -122,9 +140,13 @@ class JsonWriter {
virtual ~JsonWriter() = default;
void Save(Json json);
virtual void Save(Json json);
virtual void Visit(JsonArray const* arr);
virtual void Visit(F32Array const* arr);
virtual void Visit(U8Array const* arr);
virtual void Visit(I32Array const* arr);
virtual void Visit(I64Array const* arr);
virtual void Visit(JsonObject const* obj);
virtual void Visit(JsonNumber const* num);
virtual void Visit(JsonInteger const* num);
@@ -132,6 +154,113 @@ class JsonWriter {
virtual void Visit(JsonString const* str);
virtual void Visit(JsonBoolean const* boolean);
};
#if defined(__GLIBC__)
template <typename T>
T BuiltinBSwap(T v);
template <>
inline uint16_t BuiltinBSwap(uint16_t v) {
return __builtin_bswap16(v);
}
template <>
inline uint32_t BuiltinBSwap(uint32_t v) {
return __builtin_bswap32(v);
}
template <>
inline uint64_t BuiltinBSwap(uint64_t v) {
return __builtin_bswap64(v);
}
#else
template <typename T>
T BuiltinBSwap(T v) {
dmlc::ByteSwap(&v, sizeof(v), 1);
return v;
}
#endif // defined(__GLIBC__)
template <typename T, std::enable_if_t<sizeof(T) == 1>* = nullptr>
inline T ByteSwap(T v) {
return v;
}
template <typename T, std::enable_if_t<sizeof(T) != 1>* = nullptr>
inline T ByteSwap(T v) {
static_assert(std::is_pod<T>::value, "Only pod is supported.");
#if DMLC_LITTLE_ENDIAN
auto constexpr kS = sizeof(T);
std::conditional_t<kS == 2, uint16_t, std::conditional_t<kS == 4, uint32_t, uint64_t>> u;
std::memcpy(&u, &v, sizeof(u));
u = BuiltinBSwap(u);
std::memcpy(&v, &u, sizeof(u));
#endif // DMLC_LITTLE_ENDIAN
return v;
}
/**
* \brief Reader for UBJSON https://ubjson.org/
*/
class UBJReader : public JsonReader {
Json Parse();
template <typename T>
T ReadStream() {
auto ptr = this->raw_str_.c_str() + cursor_.Pos();
T v{0};
std::memcpy(&v, ptr, sizeof(v));
cursor_.Forward(sizeof(v));
return v;
}
template <typename T>
T ReadPrimitive() {
auto v = ReadStream<T>();
v = ByteSwap(v);
return v;
}
template <typename TypedArray>
auto ParseTypedArray(int64_t n) {
TypedArray results{static_cast<size_t>(n)};
for (int64_t i = 0; i < n; ++i) {
auto v = this->ReadPrimitive<typename TypedArray::Type>();
results.Set(i, v);
}
return Json{std::move(results)};
}
std::string DecodeStr();
Json ParseArray() override;
Json ParseObject() override;
public:
using JsonReader::JsonReader;
Json Load() override;
};
/**
* \brief Writer for UBJSON https://ubjson.org/
*/
class UBJWriter : public JsonWriter {
void Visit(JsonArray const* arr) override;
void Visit(F32Array const* arr) override;
void Visit(U8Array const* arr) override;
void Visit(I32Array const* arr) override;
void Visit(I64Array const* arr) override;
void Visit(JsonObject const* obj) override;
void Visit(JsonNumber const* num) override;
void Visit(JsonInteger const* num) override;
void Visit(JsonNull const* null) override;
void Visit(JsonString const* str) override;
void Visit(JsonBoolean const* boolean) override;
public:
using JsonWriter::JsonWriter;
void Save(Json json) override;
};
} // namespace xgboost
#endif // XGBOOST_JSON_IO_H_