Swap byte-order in binary serializer to support big-endian arch (#5813)

* fixed some endian issues

* Use dmlc::ByteSwap() to simplify code

* Fix lint check

* [CI] Add test for s390x

* Download latest CMake on s390x

* Fix a bug in my code

* Save magic number in dmatrix with byteswap on big-endian machine

* Save version in binary with byteswap on big-endian machine

* Load scalar with byteswap in MetaInfo

* Add a debugging message

* Handle arrays correctly when byteswapping

* EOF can also be 255

* Handle magic number in MetaInfo carefully

* Skip Tree.Load test for big-endian, since the test manually builds little-endian binary model

* Handle missing packages in Python tests

* Don't use boto3 in model compatibility tests

* Add s390 Docker file for local testing

* Add model compatibility tests

* Add R compatibility test

* Revert "Add R compatibility test"

This reverts commit c2d2bdcb7dbae133cbb927fcd20f7e83ee2b18a8.

Co-authored-by: Qi Zhang <q.zhang@ibm.com>
Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Qi Zhang
2020-08-18 17:47:17 -04:00
committed by GitHub
parent 4d99c58a5f
commit 989ddd036f
20 changed files with 266 additions and 67 deletions

View File

@@ -59,6 +59,21 @@ struct TreeParam : public dmlc::Parameter<TreeParam> {
num_nodes = 1;
deprecated_num_roots = 1;
}
// Swap byte order for all fields. Useful for transporting models between machines with different
// endianness (big endian vs little endian)
inline TreeParam ByteSwap() const {
TreeParam x = *this;
dmlc::ByteSwap(&x.deprecated_num_roots, sizeof(x.deprecated_num_roots), 1);
dmlc::ByteSwap(&x.num_nodes, sizeof(x.num_nodes), 1);
dmlc::ByteSwap(&x.num_deleted, sizeof(x.num_deleted), 1);
dmlc::ByteSwap(&x.deprecated_max_depth, sizeof(x.deprecated_max_depth), 1);
dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
dmlc::ByteSwap(&x.size_leaf_vector, sizeof(x.size_leaf_vector), 1);
dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
return x;
}
// declare the parameters
DMLC_DECLARE_PARAMETER(TreeParam) {
// only declare the parameters that can be set by the user.
@@ -97,6 +112,16 @@ struct RTreeNodeStat {
return loss_chg == b.loss_chg && sum_hess == b.sum_hess &&
base_weight == b.base_weight && leaf_child_cnt == b.leaf_child_cnt;
}
// Swap byte order for all fields. Useful for transporting models between machines with different
// endianness (big endian vs little endian)
inline RTreeNodeStat ByteSwap() const {
RTreeNodeStat x = *this;
dmlc::ByteSwap(&x.loss_chg, sizeof(x.loss_chg), 1);
dmlc::ByteSwap(&x.sum_hess, sizeof(x.sum_hess), 1);
dmlc::ByteSwap(&x.base_weight, sizeof(x.base_weight), 1);
dmlc::ByteSwap(&x.leaf_child_cnt, sizeof(x.leaf_child_cnt), 1);
return x;
}
};
/*!
@@ -227,6 +252,16 @@ class RegTree : public Model {
info_.leaf_value == b.info_.leaf_value;
}
inline Node ByteSwap() const {
Node x = *this;
dmlc::ByteSwap(&x.parent_, sizeof(x.parent_), 1);
dmlc::ByteSwap(&x.cleft_, sizeof(x.cleft_), 1);
dmlc::ByteSwap(&x.cright_, sizeof(x.cright_), 1);
dmlc::ByteSwap(&x.sindex_, sizeof(x.sindex_), 1);
dmlc::ByteSwap(&x.info_, sizeof(x.info_), 1);
return x;
}
private:
/*!
* \brief in leaf node, we have weights, in non-leaf nodes,