Swap byte-order in binary serializer to support big-endian arch (#5813)

* fixed some endian issues

* Use dmlc::ByteSwap() to simplify code

* Fix lint check

* [CI] Add test for s390x

* Download latest CMake on s390x

* Fix a bug in my code

* Save magic number in dmatrix with byteswap on big-endian machine

* Save version in binary with byteswap on big-endian machine

* Load scalar with byteswap in MetaInfo

* Add a debugging message

* Handle arrays correctly when byteswapping

* EOF can also be 255

* Handle magic number in MetaInfo carefully

* Skip Tree.Load test for big-endian, since the test manually builds little-endian binary model

* Handle missing packages in Python tests

* Don't use boto3 in model compatibility tests

* Add s390 Docker file for local testing

* Add model compatibility tests

* Add R compatibility test

* Revert "Add R compatibility test"

This reverts commit c2d2bdcb7dbae133cbb927fcd20f7e83ee2b18a8.

Co-authored-by: Qi Zhang <q.zhang@ibm.com>
Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Qi Zhang
2020-08-18 17:47:17 -04:00
committed by GitHub
parent 4d99c58a5f
commit 989ddd036f
20 changed files with 266 additions and 67 deletions

View File

@@ -128,6 +128,19 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
std::string str = get<String const>(j_param.at("base_score"));
from_chars(str.c_str(), str.c_str() + str.size(), base_score);
}
inline LearnerModelParamLegacy ByteSwap() const {
LearnerModelParamLegacy x = *this;
dmlc::ByteSwap(&x.base_score, sizeof(x.base_score), 1);
dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
dmlc::ByteSwap(&x.num_class, sizeof(x.num_class), 1);
dmlc::ByteSwap(&x.contain_extra_attrs, sizeof(x.contain_extra_attrs), 1);
dmlc::ByteSwap(&x.contain_eval_metrics, sizeof(x.contain_eval_metrics), 1);
dmlc::ByteSwap(&x.major_version, sizeof(x.major_version), 1);
dmlc::ByteSwap(&x.minor_version, sizeof(x.minor_version), 1);
dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
return x;
}
// declare parameters
DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {
DMLC_DECLARE_FIELD(base_score)
@@ -694,7 +707,9 @@ class LearnerIO : public LearnerConfiguration {
// read parameter
CHECK_EQ(fi->Read(&mparam_, sizeof(mparam_)), sizeof(mparam_))
<< "BoostLearner: wrong model format";
if (!DMLC_IO_NO_ENDIAN_SWAP) {
mparam_ = mparam_.ByteSwap();
}
CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format";
CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
@@ -828,7 +843,12 @@ class LearnerIO : public LearnerConfiguration {
}
std::string header {"binf"};
fo->Write(header.data(), 4);
fo->Write(&mparam, sizeof(LearnerModelParamLegacy));
if (DMLC_IO_NO_ENDIAN_SWAP) {
fo->Write(&mparam, sizeof(LearnerModelParamLegacy));
} else {
LearnerModelParamLegacy x = mparam.ByteSwap();
fo->Write(&x, sizeof(LearnerModelParamLegacy));
}
fo->Write(tparam_.objective);
fo->Write(tparam_.booster);
gbm_->Save(fo);
@@ -867,7 +887,13 @@ class LearnerIO : public LearnerConfiguration {
// concatonate the model and config at final output, it's a temporary solution for
// continuing support for binary model format
fo->Write(&serialisation_header_[0], serialisation_header_.size());
fo->Write(&json_offset, sizeof(json_offset));
if (DMLC_IO_NO_ENDIAN_SWAP) {
fo->Write(&json_offset, sizeof(json_offset));
} else {
auto x = json_offset;
dmlc::ByteSwap(&x, sizeof(x), 1);
fo->Write(&x, sizeof(json_offset));
}
fo->Write(&binary_buf[0], binary_buf.size());
fo->Write(&config_str[0], config_str.size());
}
@@ -904,6 +930,9 @@ class LearnerIO : public LearnerConfiguration {
)doc";
int64_t sz {-1};
CHECK_EQ(fp.Read(&sz, sizeof(sz)), sizeof(sz));
if (!DMLC_IO_NO_ENDIAN_SWAP) {
dmlc::ByteSwap(&sz, sizeof(sz), 1);
}
CHECK_GT(sz, 0);
size_t json_offset = static_cast<size_t>(sz);
std::string buffer;