Swap byte-order in binary serializer to support big-endian arch (#5813)
* fixed some endian issues * Use dmlc::ByteSwap() to simplify code * Fix lint check * [CI] Add test for s390x * Download latest CMake on s390x * Fix a bug in my code * Save magic number in dmatrix with byteswap on big-endian machine * Save version in binary with byteswap on big-endian machine * Load scalar with byteswap in MetaInfo * Add a debugging message * Handle arrays correctly when byteswapping * EOF can also be 255 * Handle magic number in MetaInfo carefully * Skip Tree.Load test for big-endian, since the test manually builds little-endian binary model * Handle missing packages in Python tests * Don't use boto3 in model compatibility tests * Add s390 Docker file for local testing * Add model compatibility tests * Add R compatibility test * Revert "Add R compatibility test" This reverts commit c2d2bdcb7dbae133cbb927fcd20f7e83ee2b18a8. Co-authored-by: Qi Zhang <q.zhang@ibm.com> Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
@@ -83,7 +83,7 @@ void LoadScalarField(dmlc::Stream* strm, const std::string& expected_name,
|
||||
CHECK(strm->Read(&is_scalar)) << invalid;
|
||||
CHECK(is_scalar)
|
||||
<< invalid << "Expected field " << expected_name << " to be a scalar; got a vector";
|
||||
CHECK(strm->Read(field, sizeof(T))) << invalid;
|
||||
CHECK(strm->Read(field)) << invalid;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@@ -653,14 +653,18 @@ DMatrix* DMatrix::Load(const std::string& uri,
|
||||
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
|
||||
if (fi != nullptr) {
|
||||
common::PeekableInStream is(fi.get());
|
||||
if (is.PeekRead(&magic, sizeof(magic)) == sizeof(magic) &&
|
||||
magic == data::SimpleDMatrix::kMagic) {
|
||||
DMatrix* dmat = new data::SimpleDMatrix(&is);
|
||||
if (!silent) {
|
||||
LOG(CONSOLE) << dmat->Info().num_row_ << 'x' << dmat->Info().num_col_ << " matrix with "
|
||||
<< dmat->Info().num_nonzero_ << " entries loaded from " << uri;
|
||||
if (is.PeekRead(&magic, sizeof(magic)) == sizeof(magic)) {
|
||||
if (!DMLC_IO_NO_ENDIAN_SWAP) {
|
||||
dmlc::ByteSwap(&magic, sizeof(magic), 1);
|
||||
}
|
||||
if (magic == data::SimpleDMatrix::kMagic) {
|
||||
DMatrix* dmat = new data::SimpleDMatrix(&is);
|
||||
if (!silent) {
|
||||
LOG(CONSOLE) << dmat->Info().num_row_ << 'x' << dmat->Info().num_col_ << " matrix with "
|
||||
<< dmat->Info().num_nonzero_ << " entries loaded from " << uri;
|
||||
}
|
||||
return dmat;
|
||||
}
|
||||
return dmat;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -192,8 +192,7 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
|
||||
|
||||
SimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) {
|
||||
int tmagic;
|
||||
CHECK(in_stream->Read(&tmagic, sizeof(tmagic)) == sizeof(tmagic))
|
||||
<< "invalid input file format";
|
||||
CHECK(in_stream->Read(&tmagic)) << "invalid input file format";
|
||||
CHECK_EQ(tmagic, kMagic) << "invalid format, magic number mismatch";
|
||||
info_.LoadBinary(in_stream);
|
||||
in_stream->Read(&sparse_page_.offset.HostVector());
|
||||
@@ -203,7 +202,7 @@ SimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) {
|
||||
void SimpleDMatrix::SaveToLocalFile(const std::string& fname) {
|
||||
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
|
||||
int tmagic = kMagic;
|
||||
fo->Write(&tmagic, sizeof(tmagic));
|
||||
fo->Write(tmagic);
|
||||
info_.SaveBinary(fo.get());
|
||||
fo->Write(sparse_page_.offset.HostVector());
|
||||
fo->Write(sparse_page_.data.HostVector());
|
||||
|
||||
@@ -144,7 +144,7 @@ class ExternalMemoryPrefetcher : dmlc::DataIter<PageT> {
|
||||
std::unique_ptr<dmlc::Stream> finfo(
|
||||
dmlc::Stream::Create(info.name_info.c_str(), "r"));
|
||||
int tmagic;
|
||||
CHECK_EQ(finfo->Read(&tmagic, sizeof(tmagic)), sizeof(tmagic));
|
||||
CHECK(finfo->Read(&tmagic));
|
||||
CHECK_EQ(tmagic, kMagic) << "invalid format, magic number mismatch";
|
||||
}
|
||||
files_.resize(info.name_shards.size());
|
||||
@@ -359,7 +359,7 @@ class SparsePageSource {
|
||||
std::unique_ptr<dmlc::Stream> fo(
|
||||
dmlc::Stream::Create(cache_info_.name_info.c_str(), "w"));
|
||||
int tmagic = kMagic;
|
||||
fo->Write(&tmagic, sizeof(tmagic));
|
||||
fo->Write(tmagic);
|
||||
// Either every row has query ID or none at all
|
||||
CHECK(qids.empty() || qids.size() == info.num_row_);
|
||||
info.SaveBinary(fo.get());
|
||||
|
||||
Reference in New Issue
Block a user