[breaking] Change internal model serialization to UBJSON. (#7556)

* Use typed array for models.
* Change the memory snapshot format.
* Add new C API for saving to raw format.
This commit is contained in:
Jiaming Yuan
2022-01-16 02:11:53 +08:00
committed by GitHub
parent 13b0fa4b97
commit a1bcd33a3b
24 changed files with 566 additions and 255 deletions

View File

@@ -1,6 +1,7 @@
/*!
* Copyright 2019-2021 by Contributors
* Copyright 2019-2022 by Contributors
*/
#include <algorithm>
#include <utility>
#include <limits>
#include "xgboost/json.h"
@@ -13,22 +14,28 @@ void GBLinearModel::SaveModel(Json* p_out) const {
auto& out = *p_out;
size_t const n_weights = weight.size();
std::vector<Json> j_weights(n_weights);
for (size_t i = 0; i < n_weights; ++i) {
j_weights[i] = weight[i];
}
F32Array j_weights{n_weights};
std::copy(weight.begin(), weight.end(), j_weights.GetArray().begin());
out["weights"] = std::move(j_weights);
out["boosted_rounds"] = Json{this->num_boosted_rounds};
}
void GBLinearModel::LoadModel(Json const& in) {
auto const& j_weights = get<Array const>(in["weights"]);
auto n_weights = j_weights.size();
weight.resize(n_weights);
for (size_t i = 0; i < n_weights; ++i) {
weight[i] = get<Number const>(j_weights[i]);
}
auto const& obj = get<Object const>(in);
auto weight_it = obj.find("weights");
if (IsA<F32Array>(weight_it->second)) {
auto const& j_weights = get<F32Array const>(weight_it->second);
weight.resize(j_weights.size());
std::copy(j_weights.begin(), j_weights.end(), weight.begin());
} else {
auto const& j_weights = get<Array const>(weight_it->second);
auto n_weights = j_weights.size();
weight.resize(n_weights);
for (size_t i = 0; i < n_weights; ++i) {
weight[i] = get<Number const>(j_weights[i]);
}
}
auto boosted_rounds = obj.find("boosted_rounds");
if (boosted_rounds != obj.cend()) {
this->num_boosted_rounds = get<Integer const>(boosted_rounds->second);

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2019-2020 by Contributors
* Copyright 2019-2022 by Contributors
*/
#include <utility>
@@ -69,13 +69,13 @@ void GBTreeModel::SaveModel(Json* p_out) const {
out["gbtree_model_param"] = ToJson(param);
std::vector<Json> trees_json(trees.size());
for (size_t t = 0; t < trees.size(); ++t) {
common::ParallelFor(trees.size(), omp_get_max_threads(), [&](auto t) {
auto const& tree = trees[t];
Json tree_json{Object()};
tree->SaveModel(&tree_json);
tree_json["id"] = Integer(static_cast<Integer::Int>(t));
tree_json["id"] = Integer{static_cast<Integer::Int>(t)};
trees_json[t] = std::move(tree_json);
}
});
std::vector<Json> tree_info_json(tree_info.size());
for (size_t i = 0; i < tree_info.size(); ++i) {
@@ -95,11 +95,11 @@ void GBTreeModel::LoadModel(Json const& in) {
auto const& trees_json = get<Array const>(in["trees"]);
trees.resize(trees_json.size());
for (size_t t = 0; t < trees_json.size(); ++t) { // NOLINT
common::ParallelFor(trees_json.size(), omp_get_max_threads(), [&](auto t) {
auto tree_id = get<Integer>(trees_json[t]["id"]);
trees.at(tree_id).reset(new RegTree());
trees.at(tree_id)->LoadModel(trees_json[t]);
}
});
tree_info.resize(param.num_trees);
auto const& tree_info_json = get<Array const>(in["tree_info"]);