Swap byte-order in binary serializer to support big-endian arch (#5813)

* fixed some endian issues * Use dmlc::ByteSwap() to simplify code * Fix lint check * [CI] Add test for s390x * Download latest CMake on s390x * Fix a bug in my code * Save magic number in dmatrix with byteswap on big-endian machine * Save version in binary with byteswap on big-endian machine * Load scalar with byteswap in MetaInfo * Add a debugging message * Handle arrays correctly when byteswapping * EOF can also be 255 * Handle magic number in MetaInfo carefully * Skip Tree.Load test for big-endian, since the test manually builds little-endian binary model * Handle missing packages in Python tests * Don't use boto3 in model compatibility tests * Add s390 Docker file for local testing * Add model compatibility tests * Add R compatibility test * Revert "Add R compatibility test" This reverts commit c2d2bdcb7dbae133cbb927fcd20f7e83ee2b18a8. Co-authored-by: Qi Zhang <q.zhang@ibm.com> Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
2020-08-18 17:47:17 -04:00
parent 4d99c58a5f
commit 989ddd036f
20 changed files with 266 additions and 67 deletions
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -128,6 +128,19 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
    std::string str = get<String const>(j_param.at("base_score"));
    from_chars(str.c_str(), str.c_str() + str.size(), base_score);
  }
+  inline LearnerModelParamLegacy ByteSwap() const {
+    LearnerModelParamLegacy x = *this;
+    dmlc::ByteSwap(&x.base_score, sizeof(x.base_score), 1);
+    dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
+    dmlc::ByteSwap(&x.num_class, sizeof(x.num_class), 1);
+    dmlc::ByteSwap(&x.contain_extra_attrs, sizeof(x.contain_extra_attrs), 1);
+    dmlc::ByteSwap(&x.contain_eval_metrics, sizeof(x.contain_eval_metrics), 1);
+    dmlc::ByteSwap(&x.major_version, sizeof(x.major_version), 1);
+    dmlc::ByteSwap(&x.minor_version, sizeof(x.minor_version), 1);
+    dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
+    return x;
+  }
+
  // declare parameters
  DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {
    DMLC_DECLARE_FIELD(base_score)
@@ -694,7 +707,9 @@ class LearnerIO : public LearnerConfiguration {
    // read parameter
    CHECK_EQ(fi->Read(&mparam_, sizeof(mparam_)), sizeof(mparam_))
        << "BoostLearner: wrong model format";
-
+    if (!DMLC_IO_NO_ENDIAN_SWAP) {
+      mparam_ = mparam_.ByteSwap();
+    }
    CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format";
    CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";

@@ -828,7 +843,12 @@ class LearnerIO : public LearnerConfiguration {
    }
    std::string header {"binf"};
    fo->Write(header.data(), 4);
-    fo->Write(&mparam, sizeof(LearnerModelParamLegacy));
+    if (DMLC_IO_NO_ENDIAN_SWAP) {
+      fo->Write(&mparam, sizeof(LearnerModelParamLegacy));
+    } else {
+      LearnerModelParamLegacy x = mparam.ByteSwap();
+      fo->Write(&x, sizeof(LearnerModelParamLegacy));
+    }
    fo->Write(tparam_.objective);
    fo->Write(tparam_.booster);
    gbm_->Save(fo);
@@ -867,7 +887,13 @@ class LearnerIO : public LearnerConfiguration {
      // concatonate the model and config at final output, it's a temporary solution for
      // continuing support for binary model format
      fo->Write(&serialisation_header_[0], serialisation_header_.size());
-      fo->Write(&json_offset, sizeof(json_offset));
+      if (DMLC_IO_NO_ENDIAN_SWAP) {
+        fo->Write(&json_offset, sizeof(json_offset));
+      } else {
+        auto x = json_offset;
+        dmlc::ByteSwap(&x, sizeof(x), 1);
+        fo->Write(&x, sizeof(json_offset));
+      }
      fo->Write(&binary_buf[0], binary_buf.size());
      fo->Write(&config_str[0], config_str.size());
    }
@@ -904,6 +930,9 @@ class LearnerIO : public LearnerConfiguration {
 )doc";
      int64_t sz {-1};
      CHECK_EQ(fp.Read(&sz, sizeof(sz)), sizeof(sz));
+      if (!DMLC_IO_NO_ENDIAN_SWAP) {
+        dmlc::ByteSwap(&sz, sizeof(sz), 1);
+      }
      CHECK_GT(sz, 0);
      size_t json_offset = static_cast<size_t>(sz);
      std::string buffer;