Swap byte-order in binary serializer to support big-endian arch (#5813)

* fixed some endian issues

* Use dmlc::ByteSwap() to simplify code

* Fix lint check

* [CI] Add test for s390x

* Download latest CMake on s390x

* Fix a bug in my code

* Save magic number in dmatrix with byteswap on big-endian machine

* Save version in binary with byteswap on big-endian machine

* Load scalar with byteswap in MetaInfo

* Add a debugging message

* Handle arrays correctly when byteswapping

* EOF can also be 255

* Handle magic number in MetaInfo carefully

* Skip Tree.Load test for big-endian, since the test manually builds little-endian binary model

* Handle missing packages in Python tests

* Don't use boto3 in model compatibility tests

* Add s390 Docker file for local testing

* Add model compatibility tests

* Add R compatibility test

* Revert "Add R compatibility test"

This reverts commit c2d2bdcb7dbae133cbb927fcd20f7e83ee2b18a8.

Co-authored-by: Qi Zhang <q.zhang@ibm.com>
Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Qi Zhang
2020-08-18 17:47:17 -04:00
committed by GitHub
parent 4d99c58a5f
commit 989ddd036f
20 changed files with 266 additions and 67 deletions

View File

@@ -664,13 +664,26 @@ bst_node_t RegTree::GetNumSplitNodes() const {
void RegTree::Load(dmlc::Stream* fi) {
CHECK_EQ(fi->Read(&param, sizeof(TreeParam)), sizeof(TreeParam));
if (!DMLC_IO_NO_ENDIAN_SWAP) {
param = param.ByteSwap();
}
nodes_.resize(param.num_nodes);
stats_.resize(param.num_nodes);
CHECK_NE(param.num_nodes, 0);
CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size()),
sizeof(Node) * nodes_.size());
if (!DMLC_IO_NO_ENDIAN_SWAP) {
for (Node& node : nodes_) {
node = node.ByteSwap();
}
}
CHECK_EQ(fi->Read(dmlc::BeginPtr(stats_), sizeof(RTreeNodeStat) * stats_.size()),
sizeof(RTreeNodeStat) * stats_.size());
if (!DMLC_IO_NO_ENDIAN_SWAP) {
for (RTreeNodeStat& stat : stats_) {
stat = stat.ByteSwap();
}
}
// chg deleted nodes
deleted_nodes_.resize(0);
for (int i = 1; i < param.num_nodes; ++i) {
@@ -683,11 +696,32 @@ void RegTree::Load(dmlc::Stream* fi) {
void RegTree::Save(dmlc::Stream* fo) const {
CHECK_EQ(param.num_nodes, static_cast<int>(nodes_.size()));
CHECK_EQ(param.num_nodes, static_cast<int>(stats_.size()));
fo->Write(&param, sizeof(TreeParam));
CHECK_EQ(param.deprecated_num_roots, 1);
CHECK_NE(param.num_nodes, 0);
fo->Write(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size());
fo->Write(dmlc::BeginPtr(stats_), sizeof(RTreeNodeStat) * nodes_.size());
if (DMLC_IO_NO_ENDIAN_SWAP) {
fo->Write(&param, sizeof(TreeParam));
} else {
TreeParam x = param.ByteSwap();
fo->Write(&x, sizeof(x));
}
if (DMLC_IO_NO_ENDIAN_SWAP) {
fo->Write(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size());
} else {
for (const Node& node : nodes_) {
Node x = node.ByteSwap();
fo->Write(&x, sizeof(x));
}
}
if (DMLC_IO_NO_ENDIAN_SWAP) {
fo->Write(dmlc::BeginPtr(stats_), sizeof(RTreeNodeStat) * nodes_.size());
} else {
for (const RTreeNodeStat& stat : stats_) {
RTreeNodeStat x = stat.ByteSwap();
fo->Write(&x, sizeof(x));
}
}
}
void RegTree::LoadModel(Json const& in) {