Swap byte-order in binary serializer to support big-endian arch (#5813)

* fixed some endian issues

* Use dmlc::ByteSwap() to simplify code

* Fix lint check

* [CI] Add test for s390x

* Download latest CMake on s390x

* Fix a bug in my code

* Save magic number in dmatrix with byteswap on big-endian machine

* Save version in binary with byteswap on big-endian machine

* Load scalar with byteswap in MetaInfo

* Add a debugging message

* Handle arrays correctly when byteswapping

* EOF can also be 255

* Handle magic number in MetaInfo carefully

* Skip Tree.Load test for big-endian, since the test manually builds little-endian binary model

* Handle missing packages in Python tests

* Don't use boto3 in model compatibility tests

* Add s390 Docker file for local testing

* Add model compatibility tests

* Add R compatibility test

* Revert "Add R compatibility test"

This reverts commit c2d2bdcb7dbae133cbb927fcd20f7e83ee2b18a8.

Co-authored-by: Qi Zhang <q.zhang@ibm.com>
Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Qi Zhang
2020-08-18 17:47:17 -04:00
committed by GitHub
parent 4d99c58a5f
commit 989ddd036f
20 changed files with 266 additions and 67 deletions

View File

@@ -12,18 +12,35 @@ namespace gbm {
void GBTreeModel::Save(dmlc::Stream* fo) const {
CHECK_EQ(param.num_trees, static_cast<int32_t>(trees.size()));
fo->Write(&param, sizeof(param));
if (DMLC_IO_NO_ENDIAN_SWAP) {
fo->Write(&param, sizeof(param));
} else {
auto x = param.ByteSwap();
fo->Write(&x, sizeof(x));
}
for (const auto & tree : trees) {
tree->Save(fo);
}
if (tree_info.size() != 0) {
fo->Write(dmlc::BeginPtr(tree_info), sizeof(int32_t) * tree_info.size());
if (DMLC_IO_NO_ENDIAN_SWAP) {
fo->Write(dmlc::BeginPtr(tree_info), sizeof(int32_t) * tree_info.size());
} else {
for (const auto& e : tree_info) {
auto x = e;
dmlc::ByteSwap(&x, sizeof(x), 1);
fo->Write(&x, sizeof(x));
}
}
}
}
void GBTreeModel::Load(dmlc::Stream* fi) {
CHECK_EQ(fi->Read(&param, sizeof(param)), sizeof(param))
<< "GBTree: invalid model file";
if (!DMLC_IO_NO_ENDIAN_SWAP) {
param = param.ByteSwap();
}
trees.clear();
trees_to_update.clear();
for (int32_t i = 0; i < param.num_trees; ++i) {
@@ -33,9 +50,16 @@ void GBTreeModel::Load(dmlc::Stream* fi) {
}
tree_info.resize(param.num_trees);
if (param.num_trees != 0) {
CHECK_EQ(
fi->Read(dmlc::BeginPtr(tree_info), sizeof(int32_t) * param.num_trees),
sizeof(int32_t) * param.num_trees);
if (DMLC_IO_NO_ENDIAN_SWAP) {
CHECK_EQ(
fi->Read(dmlc::BeginPtr(tree_info), sizeof(int32_t) * param.num_trees),
sizeof(int32_t) * param.num_trees);
} else {
for (auto& info : tree_info) {
CHECK_EQ(fi->Read(&info, sizeof(int32_t)), sizeof(int32_t));
dmlc::ByteSwap(&info, sizeof(info), 1);
}
}
}
}