[breaking] Change internal model serialization to UBJSON. (#7556)

* Use typed array for models.
* Change the memory snapshot format.
* Add new C API for saving to raw format.
This commit is contained in:
Jiaming Yuan
2022-01-16 02:11:53 +08:00
committed by GitHub
parent 13b0fa4b97
commit a1bcd33a3b
24 changed files with 566 additions and 255 deletions

View File

@@ -111,7 +111,7 @@ class ConfigParser {
const auto last_char = str.find_last_not_of(" \t\n\r");
if (first_char == std::string::npos) {
// Every character in str is a whitespace
return std::string();
return {};
}
CHECK_NE(last_char, std::string::npos);
const auto substr_len = last_char + 1 - first_char;

View File

@@ -1,5 +1,5 @@
/*!
* Copyright (c) by XGBoost Contributors 2019
* Copyright (c) by XGBoost Contributors 2019-2022
*/
#if defined(__unix__)
#include <sys/stat.h>
@@ -52,7 +52,7 @@ size_t PeekableInStream::PeekRead(void* dptr, size_t size) {
FixedSizeStream::FixedSizeStream(PeekableInStream* stream) : PeekableInStream(stream), pointer_{0} {
size_t constexpr kInitialSize = 4096;
size_t size {kInitialSize}, total {0};
size_t size{kInitialSize}, total{0};
buffer_.clear();
while (true) {
buffer_.resize(size);
@@ -142,5 +142,18 @@ std::string LoadSequentialFile(std::string uri, bool stream) {
buffer.resize(total);
return buffer;
}
std::string FileExtension(std::string fname, bool lower) {
if (lower) {
std::transform(fname.begin(), fname.end(), fname.begin(),
[](char c) { return std::tolower(c); });
}
auto splited = Split(fname, '.');
if (splited.size() > 1) {
return splited.back();
} else {
return "";
}
}
} // namespace common
} // namespace xgboost

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2014 by Contributors
* Copyright by XGBoost Contributors 2014-2022
* \file io.h
* \brief general stream interface for serialization, I/O
* \author Tianqi Chen
@@ -86,15 +86,31 @@ class FixedSizeStream : public PeekableInStream {
*/
std::string LoadSequentialFile(std::string uri, bool stream = false);
inline std::string FileExtension(std::string const& fname) {
auto splited = Split(fname, '.');
if (splited.size() > 1) {
return splited.back();
} else {
return "";
}
}
/**
* \brief Get file extension from file name.
*
* \param lower Return in lower case.
*
* \return File extension without the `.`
*/
std::string FileExtension(std::string fname, bool lower = true);
/**
* \brief Read the whole buffer from dmlc stream.
*/
inline std::string ReadAll(dmlc::Stream* fi, PeekableInStream* fp) {
std::string buffer;
if (auto fixed_size = dynamic_cast<common::MemoryFixSizeBuffer*>(fi)) {
fixed_size->Seek(common::MemoryFixSizeBuffer::kSeekEnd);
size_t size = fixed_size->Tell();
buffer.resize(size);
fixed_size->Seek(0);
CHECK_EQ(fixed_size->Read(&buffer[0], size), size);
} else {
FixedSizeStream{fp}.Take(&buffer);
}
return buffer;
}
} // namespace common
} // namespace xgboost
#endif // XGBOOST_COMMON_IO_H_