Swap byte-order in binary serializer to support big-endian arch (#5813)
* fixed some endian issues * Use dmlc::ByteSwap() to simplify code * Fix lint check * [CI] Add test for s390x * Download latest CMake on s390x * Fix a bug in my code * Save magic number in dmatrix with byteswap on big-endian machine * Save version in binary with byteswap on big-endian machine * Load scalar with byteswap in MetaInfo * Add a debugging message * Handle arrays correctly when byteswapping * EOF can also be 255 * Handle magic number in MetaInfo carefully * Skip Tree.Load test for big-endian, since the test manually builds little-endian binary model * Handle missing packages in Python tests * Don't use boto3 in model compatibility tests * Add s390 Docker file for local testing * Add model compatibility tests * Add R compatibility test * Revert "Add R compatibility test" This reverts commit c2d2bdcb7dbae133cbb927fcd20f7e83ee2b18a8. Co-authored-by: Qi Zhang <q.zhang@ibm.com> Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
parent
4d99c58a5f
commit
989ddd036f
40
.travis.yml
40
.travis.yml
@ -1,38 +1,33 @@
|
|||||||
# disable sudo for container build.
|
|
||||||
sudo: required
|
sudo: required
|
||||||
|
|
||||||
# Enabling test OS X
|
|
||||||
os:
|
|
||||||
- linux
|
|
||||||
- osx
|
|
||||||
|
|
||||||
osx_image: xcode10.1
|
osx_image: xcode10.1
|
||||||
dist: bionic
|
dist: bionic
|
||||||
|
|
||||||
# Use Build Matrix to do lint and build seperately
|
|
||||||
env:
|
env:
|
||||||
matrix:
|
|
||||||
# python package test
|
|
||||||
- TASK=python_test
|
|
||||||
# test installation of Python source distribution
|
|
||||||
- TASK=python_sdist_test
|
|
||||||
# java package test
|
|
||||||
- TASK=java_test
|
|
||||||
# cmake test
|
|
||||||
- TASK=cmake_test
|
|
||||||
|
|
||||||
global:
|
global:
|
||||||
- secure: "PR16i9F8QtNwn99C5NDp8nptAS+97xwDtXEJJfEiEVhxPaaRkOp0MPWhogCaK0Eclxk1TqkgWbdXFknwGycX620AzZWa/A1K3gAs+GrpzqhnPMuoBJ0Z9qxXTbSJvCyvMbYwVrjaxc/zWqdMU8waWz8A7iqKGKs/SqbQ3rO6v7c="
|
- secure: "PR16i9F8QtNwn99C5NDp8nptAS+97xwDtXEJJfEiEVhxPaaRkOp0MPWhogCaK0Eclxk1TqkgWbdXFknwGycX620AzZWa/A1K3gAs+GrpzqhnPMuoBJ0Z9qxXTbSJvCyvMbYwVrjaxc/zWqdMU8waWz8A7iqKGKs/SqbQ3rO6v7c="
|
||||||
- secure: "dAGAjBokqm/0nVoLMofQni/fWIBcYSmdq4XvCBX1ZAMDsWnuOfz/4XCY6h2lEI1rVHZQ+UdZkc9PioOHGPZh5BnvE49/xVVWr9c4/61lrDOlkD01ZjSAeoV0fAZq+93V/wPl4QV+MM+Sem9hNNzFSbN5VsQLAiWCSapWsLdKzqA="
|
- secure: "dAGAjBokqm/0nVoLMofQni/fWIBcYSmdq4XvCBX1ZAMDsWnuOfz/4XCY6h2lEI1rVHZQ+UdZkc9PioOHGPZh5BnvE49/xVVWr9c4/61lrDOlkD01ZjSAeoV0fAZq+93V/wPl4QV+MM+Sem9hNNzFSbN5VsQLAiWCSapWsLdKzqA="
|
||||||
|
|
||||||
matrix:
|
jobs:
|
||||||
exclude:
|
include:
|
||||||
- os: linux
|
- os: linux
|
||||||
|
arch: amd64
|
||||||
|
env: TASK=python_sdist_test
|
||||||
|
- os: osx
|
||||||
|
arch: amd64
|
||||||
env: TASK=python_test
|
env: TASK=python_test
|
||||||
- os: linux
|
- os: osx
|
||||||
|
arch: amd64
|
||||||
|
env: TASK=python_sdist_test
|
||||||
|
- os: osx
|
||||||
|
arch: amd64
|
||||||
env: TASK=java_test
|
env: TASK=java_test
|
||||||
- os: linux
|
- os: osx
|
||||||
|
arch: amd64
|
||||||
env: TASK=cmake_test
|
env: TASK=cmake_test
|
||||||
|
- os: linux
|
||||||
|
arch: s390x
|
||||||
|
env: TASK=s390x_test
|
||||||
|
|
||||||
# dependent brew packages
|
# dependent brew packages
|
||||||
addons:
|
addons:
|
||||||
@ -47,6 +42,9 @@ addons:
|
|||||||
- wget
|
- wget
|
||||||
- r
|
- r
|
||||||
update: true
|
update: true
|
||||||
|
apt:
|
||||||
|
packages:
|
||||||
|
- snapd
|
||||||
|
|
||||||
before_install:
|
before_install:
|
||||||
- source tests/travis/travis_setup_env.sh
|
- source tests/travis/travis_setup_env.sh
|
||||||
|
|||||||
@ -59,6 +59,21 @@ struct TreeParam : public dmlc::Parameter<TreeParam> {
|
|||||||
num_nodes = 1;
|
num_nodes = 1;
|
||||||
deprecated_num_roots = 1;
|
deprecated_num_roots = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Swap byte order for all fields. Useful for transporting models between machines with different
|
||||||
|
// endianness (big endian vs little endian)
|
||||||
|
inline TreeParam ByteSwap() const {
|
||||||
|
TreeParam x = *this;
|
||||||
|
dmlc::ByteSwap(&x.deprecated_num_roots, sizeof(x.deprecated_num_roots), 1);
|
||||||
|
dmlc::ByteSwap(&x.num_nodes, sizeof(x.num_nodes), 1);
|
||||||
|
dmlc::ByteSwap(&x.num_deleted, sizeof(x.num_deleted), 1);
|
||||||
|
dmlc::ByteSwap(&x.deprecated_max_depth, sizeof(x.deprecated_max_depth), 1);
|
||||||
|
dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
|
||||||
|
dmlc::ByteSwap(&x.size_leaf_vector, sizeof(x.size_leaf_vector), 1);
|
||||||
|
dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
// declare the parameters
|
// declare the parameters
|
||||||
DMLC_DECLARE_PARAMETER(TreeParam) {
|
DMLC_DECLARE_PARAMETER(TreeParam) {
|
||||||
// only declare the parameters that can be set by the user.
|
// only declare the parameters that can be set by the user.
|
||||||
@ -97,6 +112,16 @@ struct RTreeNodeStat {
|
|||||||
return loss_chg == b.loss_chg && sum_hess == b.sum_hess &&
|
return loss_chg == b.loss_chg && sum_hess == b.sum_hess &&
|
||||||
base_weight == b.base_weight && leaf_child_cnt == b.leaf_child_cnt;
|
base_weight == b.base_weight && leaf_child_cnt == b.leaf_child_cnt;
|
||||||
}
|
}
|
||||||
|
// Swap byte order for all fields. Useful for transporting models between machines with different
|
||||||
|
// endianness (big endian vs little endian)
|
||||||
|
inline RTreeNodeStat ByteSwap() const {
|
||||||
|
RTreeNodeStat x = *this;
|
||||||
|
dmlc::ByteSwap(&x.loss_chg, sizeof(x.loss_chg), 1);
|
||||||
|
dmlc::ByteSwap(&x.sum_hess, sizeof(x.sum_hess), 1);
|
||||||
|
dmlc::ByteSwap(&x.base_weight, sizeof(x.base_weight), 1);
|
||||||
|
dmlc::ByteSwap(&x.leaf_child_cnt, sizeof(x.leaf_child_cnt), 1);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
@ -227,6 +252,16 @@ class RegTree : public Model {
|
|||||||
info_.leaf_value == b.info_.leaf_value;
|
info_.leaf_value == b.info_.leaf_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline Node ByteSwap() const {
|
||||||
|
Node x = *this;
|
||||||
|
dmlc::ByteSwap(&x.parent_, sizeof(x.parent_), 1);
|
||||||
|
dmlc::ByteSwap(&x.cleft_, sizeof(x.cleft_), 1);
|
||||||
|
dmlc::ByteSwap(&x.cright_, sizeof(x.cright_), 1);
|
||||||
|
dmlc::ByteSwap(&x.sindex_, sizeof(x.sindex_), 1);
|
||||||
|
dmlc::ByteSwap(&x.info_, sizeof(x.info_), 1);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/*!
|
/*!
|
||||||
* \brief in leaf node, we have weights, in non-leaf nodes,
|
* \brief in leaf node, we have weights, in non-leaf nodes,
|
||||||
|
|||||||
@ -1465,8 +1465,12 @@ class Booster(object):
|
|||||||
ctypes.c_uint(iteration_range[1]))
|
ctypes.c_uint(iteration_range[1]))
|
||||||
|
|
||||||
# once caching is supported, we can pass id(data) as cache id.
|
# once caching is supported, we can pass id(data) as cache id.
|
||||||
if isinstance(data, DataFrame):
|
try:
|
||||||
data = data.values
|
import pandas as pd
|
||||||
|
if isinstance(data, pd.DataFrame):
|
||||||
|
data = data.values
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
if isinstance(data, np.ndarray):
|
if isinstance(data, np.ndarray):
|
||||||
assert data.flags.c_contiguous
|
assert data.flags.c_contiguous
|
||||||
arr = np.array(data.reshape(data.size), copy=False,
|
arr = np.array(data.reshape(data.size), copy=False,
|
||||||
|
|||||||
@ -49,9 +49,9 @@ Version::TripletT Version::Load(dmlc::Stream* fi) {
|
|||||||
LOG(FATAL) << msg;
|
LOG(FATAL) << msg;
|
||||||
}
|
}
|
||||||
|
|
||||||
CHECK_EQ(fi->Read(&major, sizeof(major)), sizeof(major)) << msg;
|
CHECK(fi->Read(&major)) << msg;
|
||||||
CHECK_EQ(fi->Read(&minor, sizeof(major)), sizeof(minor)) << msg;
|
CHECK(fi->Read(&minor)) << msg;
|
||||||
CHECK_EQ(fi->Read(&patch, sizeof(major)), sizeof(patch)) << msg;
|
CHECK(fi->Read(&patch)) << msg;
|
||||||
|
|
||||||
return std::make_tuple(major, minor, patch);
|
return std::make_tuple(major, minor, patch);
|
||||||
}
|
}
|
||||||
@ -69,9 +69,9 @@ void Version::Save(dmlc::Stream* fo) {
|
|||||||
std::tie(major, minor, patch) = Self();
|
std::tie(major, minor, patch) = Self();
|
||||||
std::string verstr { u8"version:" };
|
std::string verstr { u8"version:" };
|
||||||
fo->Write(&verstr[0], verstr.size());
|
fo->Write(&verstr[0], verstr.size());
|
||||||
fo->Write(&major, sizeof(major));
|
fo->Write(major);
|
||||||
fo->Write(&minor, sizeof(minor));
|
fo->Write(minor);
|
||||||
fo->Write(&patch, sizeof(patch));
|
fo->Write(patch);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Version::String(TripletT const& version) {
|
std::string Version::String(TripletT const& version) {
|
||||||
|
|||||||
@ -83,7 +83,7 @@ void LoadScalarField(dmlc::Stream* strm, const std::string& expected_name,
|
|||||||
CHECK(strm->Read(&is_scalar)) << invalid;
|
CHECK(strm->Read(&is_scalar)) << invalid;
|
||||||
CHECK(is_scalar)
|
CHECK(is_scalar)
|
||||||
<< invalid << "Expected field " << expected_name << " to be a scalar; got a vector";
|
<< invalid << "Expected field " << expected_name << " to be a scalar; got a vector";
|
||||||
CHECK(strm->Read(field, sizeof(T))) << invalid;
|
CHECK(strm->Read(field)) << invalid;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -653,14 +653,18 @@ DMatrix* DMatrix::Load(const std::string& uri,
|
|||||||
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
|
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
|
||||||
if (fi != nullptr) {
|
if (fi != nullptr) {
|
||||||
common::PeekableInStream is(fi.get());
|
common::PeekableInStream is(fi.get());
|
||||||
if (is.PeekRead(&magic, sizeof(magic)) == sizeof(magic) &&
|
if (is.PeekRead(&magic, sizeof(magic)) == sizeof(magic)) {
|
||||||
magic == data::SimpleDMatrix::kMagic) {
|
if (!DMLC_IO_NO_ENDIAN_SWAP) {
|
||||||
DMatrix* dmat = new data::SimpleDMatrix(&is);
|
dmlc::ByteSwap(&magic, sizeof(magic), 1);
|
||||||
if (!silent) {
|
}
|
||||||
LOG(CONSOLE) << dmat->Info().num_row_ << 'x' << dmat->Info().num_col_ << " matrix with "
|
if (magic == data::SimpleDMatrix::kMagic) {
|
||||||
<< dmat->Info().num_nonzero_ << " entries loaded from " << uri;
|
DMatrix* dmat = new data::SimpleDMatrix(&is);
|
||||||
|
if (!silent) {
|
||||||
|
LOG(CONSOLE) << dmat->Info().num_row_ << 'x' << dmat->Info().num_col_ << " matrix with "
|
||||||
|
<< dmat->Info().num_nonzero_ << " entries loaded from " << uri;
|
||||||
|
}
|
||||||
|
return dmat;
|
||||||
}
|
}
|
||||||
return dmat;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -192,8 +192,7 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
|
|||||||
|
|
||||||
SimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) {
|
SimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) {
|
||||||
int tmagic;
|
int tmagic;
|
||||||
CHECK(in_stream->Read(&tmagic, sizeof(tmagic)) == sizeof(tmagic))
|
CHECK(in_stream->Read(&tmagic)) << "invalid input file format";
|
||||||
<< "invalid input file format";
|
|
||||||
CHECK_EQ(tmagic, kMagic) << "invalid format, magic number mismatch";
|
CHECK_EQ(tmagic, kMagic) << "invalid format, magic number mismatch";
|
||||||
info_.LoadBinary(in_stream);
|
info_.LoadBinary(in_stream);
|
||||||
in_stream->Read(&sparse_page_.offset.HostVector());
|
in_stream->Read(&sparse_page_.offset.HostVector());
|
||||||
@ -203,7 +202,7 @@ SimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) {
|
|||||||
void SimpleDMatrix::SaveToLocalFile(const std::string& fname) {
|
void SimpleDMatrix::SaveToLocalFile(const std::string& fname) {
|
||||||
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
|
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
|
||||||
int tmagic = kMagic;
|
int tmagic = kMagic;
|
||||||
fo->Write(&tmagic, sizeof(tmagic));
|
fo->Write(tmagic);
|
||||||
info_.SaveBinary(fo.get());
|
info_.SaveBinary(fo.get());
|
||||||
fo->Write(sparse_page_.offset.HostVector());
|
fo->Write(sparse_page_.offset.HostVector());
|
||||||
fo->Write(sparse_page_.data.HostVector());
|
fo->Write(sparse_page_.data.HostVector());
|
||||||
|
|||||||
@ -144,7 +144,7 @@ class ExternalMemoryPrefetcher : dmlc::DataIter<PageT> {
|
|||||||
std::unique_ptr<dmlc::Stream> finfo(
|
std::unique_ptr<dmlc::Stream> finfo(
|
||||||
dmlc::Stream::Create(info.name_info.c_str(), "r"));
|
dmlc::Stream::Create(info.name_info.c_str(), "r"));
|
||||||
int tmagic;
|
int tmagic;
|
||||||
CHECK_EQ(finfo->Read(&tmagic, sizeof(tmagic)), sizeof(tmagic));
|
CHECK(finfo->Read(&tmagic));
|
||||||
CHECK_EQ(tmagic, kMagic) << "invalid format, magic number mismatch";
|
CHECK_EQ(tmagic, kMagic) << "invalid format, magic number mismatch";
|
||||||
}
|
}
|
||||||
files_.resize(info.name_shards.size());
|
files_.resize(info.name_shards.size());
|
||||||
@ -359,7 +359,7 @@ class SparsePageSource {
|
|||||||
std::unique_ptr<dmlc::Stream> fo(
|
std::unique_ptr<dmlc::Stream> fo(
|
||||||
dmlc::Stream::Create(cache_info_.name_info.c_str(), "w"));
|
dmlc::Stream::Create(cache_info_.name_info.c_str(), "w"));
|
||||||
int tmagic = kMagic;
|
int tmagic = kMagic;
|
||||||
fo->Write(&tmagic, sizeof(tmagic));
|
fo->Write(tmagic);
|
||||||
// Either every row has query ID or none at all
|
// Either every row has query ID or none at all
|
||||||
CHECK(qids.empty() || qids.size() == info.num_row_);
|
CHECK(qids.empty() || qids.size() == info.num_row_);
|
||||||
info.SaveBinary(fo.get());
|
info.SaveBinary(fo.get());
|
||||||
|
|||||||
@ -12,18 +12,35 @@ namespace gbm {
|
|||||||
|
|
||||||
void GBTreeModel::Save(dmlc::Stream* fo) const {
|
void GBTreeModel::Save(dmlc::Stream* fo) const {
|
||||||
CHECK_EQ(param.num_trees, static_cast<int32_t>(trees.size()));
|
CHECK_EQ(param.num_trees, static_cast<int32_t>(trees.size()));
|
||||||
fo->Write(¶m, sizeof(param));
|
|
||||||
|
if (DMLC_IO_NO_ENDIAN_SWAP) {
|
||||||
|
fo->Write(¶m, sizeof(param));
|
||||||
|
} else {
|
||||||
|
auto x = param.ByteSwap();
|
||||||
|
fo->Write(&x, sizeof(x));
|
||||||
|
}
|
||||||
for (const auto & tree : trees) {
|
for (const auto & tree : trees) {
|
||||||
tree->Save(fo);
|
tree->Save(fo);
|
||||||
}
|
}
|
||||||
if (tree_info.size() != 0) {
|
if (tree_info.size() != 0) {
|
||||||
fo->Write(dmlc::BeginPtr(tree_info), sizeof(int32_t) * tree_info.size());
|
if (DMLC_IO_NO_ENDIAN_SWAP) {
|
||||||
|
fo->Write(dmlc::BeginPtr(tree_info), sizeof(int32_t) * tree_info.size());
|
||||||
|
} else {
|
||||||
|
for (const auto& e : tree_info) {
|
||||||
|
auto x = e;
|
||||||
|
dmlc::ByteSwap(&x, sizeof(x), 1);
|
||||||
|
fo->Write(&x, sizeof(x));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GBTreeModel::Load(dmlc::Stream* fi) {
|
void GBTreeModel::Load(dmlc::Stream* fi) {
|
||||||
CHECK_EQ(fi->Read(¶m, sizeof(param)), sizeof(param))
|
CHECK_EQ(fi->Read(¶m, sizeof(param)), sizeof(param))
|
||||||
<< "GBTree: invalid model file";
|
<< "GBTree: invalid model file";
|
||||||
|
if (!DMLC_IO_NO_ENDIAN_SWAP) {
|
||||||
|
param = param.ByteSwap();
|
||||||
|
}
|
||||||
trees.clear();
|
trees.clear();
|
||||||
trees_to_update.clear();
|
trees_to_update.clear();
|
||||||
for (int32_t i = 0; i < param.num_trees; ++i) {
|
for (int32_t i = 0; i < param.num_trees; ++i) {
|
||||||
@ -33,9 +50,16 @@ void GBTreeModel::Load(dmlc::Stream* fi) {
|
|||||||
}
|
}
|
||||||
tree_info.resize(param.num_trees);
|
tree_info.resize(param.num_trees);
|
||||||
if (param.num_trees != 0) {
|
if (param.num_trees != 0) {
|
||||||
CHECK_EQ(
|
if (DMLC_IO_NO_ENDIAN_SWAP) {
|
||||||
fi->Read(dmlc::BeginPtr(tree_info), sizeof(int32_t) * param.num_trees),
|
CHECK_EQ(
|
||||||
sizeof(int32_t) * param.num_trees);
|
fi->Read(dmlc::BeginPtr(tree_info), sizeof(int32_t) * param.num_trees),
|
||||||
|
sizeof(int32_t) * param.num_trees);
|
||||||
|
} else {
|
||||||
|
for (auto& info : tree_info) {
|
||||||
|
CHECK_EQ(fi->Read(&info, sizeof(int32_t)), sizeof(int32_t));
|
||||||
|
dmlc::ByteSwap(&info, sizeof(info), 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -61,6 +61,21 @@ struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
|
|||||||
.set_default(0)
|
.set_default(0)
|
||||||
.describe("Reserved option for vector tree.");
|
.describe("Reserved option for vector tree.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Swap byte order for all fields. Useful for transporting models between machines with different
|
||||||
|
// endianness (big endian vs little endian)
|
||||||
|
inline GBTreeModelParam ByteSwap() const {
|
||||||
|
GBTreeModelParam x = *this;
|
||||||
|
dmlc::ByteSwap(&x.num_trees, sizeof(x.num_trees), 1);
|
||||||
|
dmlc::ByteSwap(&x.deprecated_num_roots, sizeof(x.deprecated_num_roots), 1);
|
||||||
|
dmlc::ByteSwap(&x.deprecated_num_feature, sizeof(x.deprecated_num_feature), 1);
|
||||||
|
dmlc::ByteSwap(&x.pad_32bit, sizeof(x.pad_32bit), 1);
|
||||||
|
dmlc::ByteSwap(&x.deprecated_num_pbuffer, sizeof(x.deprecated_num_pbuffer), 1);
|
||||||
|
dmlc::ByteSwap(&x.deprecated_num_output_group, sizeof(x.deprecated_num_output_group), 1);
|
||||||
|
dmlc::ByteSwap(&x.size_leaf_vector, sizeof(x.size_leaf_vector), 1);
|
||||||
|
dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
|
||||||
|
return x;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct GBTreeModel : public Model {
|
struct GBTreeModel : public Model {
|
||||||
|
|||||||
@ -128,6 +128,19 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
|
|||||||
std::string str = get<String const>(j_param.at("base_score"));
|
std::string str = get<String const>(j_param.at("base_score"));
|
||||||
from_chars(str.c_str(), str.c_str() + str.size(), base_score);
|
from_chars(str.c_str(), str.c_str() + str.size(), base_score);
|
||||||
}
|
}
|
||||||
|
inline LearnerModelParamLegacy ByteSwap() const {
|
||||||
|
LearnerModelParamLegacy x = *this;
|
||||||
|
dmlc::ByteSwap(&x.base_score, sizeof(x.base_score), 1);
|
||||||
|
dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
|
||||||
|
dmlc::ByteSwap(&x.num_class, sizeof(x.num_class), 1);
|
||||||
|
dmlc::ByteSwap(&x.contain_extra_attrs, sizeof(x.contain_extra_attrs), 1);
|
||||||
|
dmlc::ByteSwap(&x.contain_eval_metrics, sizeof(x.contain_eval_metrics), 1);
|
||||||
|
dmlc::ByteSwap(&x.major_version, sizeof(x.major_version), 1);
|
||||||
|
dmlc::ByteSwap(&x.minor_version, sizeof(x.minor_version), 1);
|
||||||
|
dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
// declare parameters
|
// declare parameters
|
||||||
DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {
|
DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {
|
||||||
DMLC_DECLARE_FIELD(base_score)
|
DMLC_DECLARE_FIELD(base_score)
|
||||||
@ -694,7 +707,9 @@ class LearnerIO : public LearnerConfiguration {
|
|||||||
// read parameter
|
// read parameter
|
||||||
CHECK_EQ(fi->Read(&mparam_, sizeof(mparam_)), sizeof(mparam_))
|
CHECK_EQ(fi->Read(&mparam_, sizeof(mparam_)), sizeof(mparam_))
|
||||||
<< "BoostLearner: wrong model format";
|
<< "BoostLearner: wrong model format";
|
||||||
|
if (!DMLC_IO_NO_ENDIAN_SWAP) {
|
||||||
|
mparam_ = mparam_.ByteSwap();
|
||||||
|
}
|
||||||
CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format";
|
CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format";
|
||||||
CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
|
CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
|
||||||
|
|
||||||
@ -828,7 +843,12 @@ class LearnerIO : public LearnerConfiguration {
|
|||||||
}
|
}
|
||||||
std::string header {"binf"};
|
std::string header {"binf"};
|
||||||
fo->Write(header.data(), 4);
|
fo->Write(header.data(), 4);
|
||||||
fo->Write(&mparam, sizeof(LearnerModelParamLegacy));
|
if (DMLC_IO_NO_ENDIAN_SWAP) {
|
||||||
|
fo->Write(&mparam, sizeof(LearnerModelParamLegacy));
|
||||||
|
} else {
|
||||||
|
LearnerModelParamLegacy x = mparam.ByteSwap();
|
||||||
|
fo->Write(&x, sizeof(LearnerModelParamLegacy));
|
||||||
|
}
|
||||||
fo->Write(tparam_.objective);
|
fo->Write(tparam_.objective);
|
||||||
fo->Write(tparam_.booster);
|
fo->Write(tparam_.booster);
|
||||||
gbm_->Save(fo);
|
gbm_->Save(fo);
|
||||||
@ -867,7 +887,13 @@ class LearnerIO : public LearnerConfiguration {
|
|||||||
// concatonate the model and config at final output, it's a temporary solution for
|
// concatonate the model and config at final output, it's a temporary solution for
|
||||||
// continuing support for binary model format
|
// continuing support for binary model format
|
||||||
fo->Write(&serialisation_header_[0], serialisation_header_.size());
|
fo->Write(&serialisation_header_[0], serialisation_header_.size());
|
||||||
fo->Write(&json_offset, sizeof(json_offset));
|
if (DMLC_IO_NO_ENDIAN_SWAP) {
|
||||||
|
fo->Write(&json_offset, sizeof(json_offset));
|
||||||
|
} else {
|
||||||
|
auto x = json_offset;
|
||||||
|
dmlc::ByteSwap(&x, sizeof(x), 1);
|
||||||
|
fo->Write(&x, sizeof(json_offset));
|
||||||
|
}
|
||||||
fo->Write(&binary_buf[0], binary_buf.size());
|
fo->Write(&binary_buf[0], binary_buf.size());
|
||||||
fo->Write(&config_str[0], config_str.size());
|
fo->Write(&config_str[0], config_str.size());
|
||||||
}
|
}
|
||||||
@ -904,6 +930,9 @@ class LearnerIO : public LearnerConfiguration {
|
|||||||
)doc";
|
)doc";
|
||||||
int64_t sz {-1};
|
int64_t sz {-1};
|
||||||
CHECK_EQ(fp.Read(&sz, sizeof(sz)), sizeof(sz));
|
CHECK_EQ(fp.Read(&sz, sizeof(sz)), sizeof(sz));
|
||||||
|
if (!DMLC_IO_NO_ENDIAN_SWAP) {
|
||||||
|
dmlc::ByteSwap(&sz, sizeof(sz), 1);
|
||||||
|
}
|
||||||
CHECK_GT(sz, 0);
|
CHECK_GT(sz, 0);
|
||||||
size_t json_offset = static_cast<size_t>(sz);
|
size_t json_offset = static_cast<size_t>(sz);
|
||||||
std::string buffer;
|
std::string buffer;
|
||||||
|
|||||||
@ -664,13 +664,26 @@ bst_node_t RegTree::GetNumSplitNodes() const {
|
|||||||
|
|
||||||
void RegTree::Load(dmlc::Stream* fi) {
|
void RegTree::Load(dmlc::Stream* fi) {
|
||||||
CHECK_EQ(fi->Read(¶m, sizeof(TreeParam)), sizeof(TreeParam));
|
CHECK_EQ(fi->Read(¶m, sizeof(TreeParam)), sizeof(TreeParam));
|
||||||
|
if (!DMLC_IO_NO_ENDIAN_SWAP) {
|
||||||
|
param = param.ByteSwap();
|
||||||
|
}
|
||||||
nodes_.resize(param.num_nodes);
|
nodes_.resize(param.num_nodes);
|
||||||
stats_.resize(param.num_nodes);
|
stats_.resize(param.num_nodes);
|
||||||
CHECK_NE(param.num_nodes, 0);
|
CHECK_NE(param.num_nodes, 0);
|
||||||
CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size()),
|
CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size()),
|
||||||
sizeof(Node) * nodes_.size());
|
sizeof(Node) * nodes_.size());
|
||||||
|
if (!DMLC_IO_NO_ENDIAN_SWAP) {
|
||||||
|
for (Node& node : nodes_) {
|
||||||
|
node = node.ByteSwap();
|
||||||
|
}
|
||||||
|
}
|
||||||
CHECK_EQ(fi->Read(dmlc::BeginPtr(stats_), sizeof(RTreeNodeStat) * stats_.size()),
|
CHECK_EQ(fi->Read(dmlc::BeginPtr(stats_), sizeof(RTreeNodeStat) * stats_.size()),
|
||||||
sizeof(RTreeNodeStat) * stats_.size());
|
sizeof(RTreeNodeStat) * stats_.size());
|
||||||
|
if (!DMLC_IO_NO_ENDIAN_SWAP) {
|
||||||
|
for (RTreeNodeStat& stat : stats_) {
|
||||||
|
stat = stat.ByteSwap();
|
||||||
|
}
|
||||||
|
}
|
||||||
// chg deleted nodes
|
// chg deleted nodes
|
||||||
deleted_nodes_.resize(0);
|
deleted_nodes_.resize(0);
|
||||||
for (int i = 1; i < param.num_nodes; ++i) {
|
for (int i = 1; i < param.num_nodes; ++i) {
|
||||||
@ -683,11 +696,32 @@ void RegTree::Load(dmlc::Stream* fi) {
|
|||||||
void RegTree::Save(dmlc::Stream* fo) const {
|
void RegTree::Save(dmlc::Stream* fo) const {
|
||||||
CHECK_EQ(param.num_nodes, static_cast<int>(nodes_.size()));
|
CHECK_EQ(param.num_nodes, static_cast<int>(nodes_.size()));
|
||||||
CHECK_EQ(param.num_nodes, static_cast<int>(stats_.size()));
|
CHECK_EQ(param.num_nodes, static_cast<int>(stats_.size()));
|
||||||
fo->Write(¶m, sizeof(TreeParam));
|
|
||||||
CHECK_EQ(param.deprecated_num_roots, 1);
|
CHECK_EQ(param.deprecated_num_roots, 1);
|
||||||
CHECK_NE(param.num_nodes, 0);
|
CHECK_NE(param.num_nodes, 0);
|
||||||
fo->Write(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size());
|
|
||||||
fo->Write(dmlc::BeginPtr(stats_), sizeof(RTreeNodeStat) * nodes_.size());
|
if (DMLC_IO_NO_ENDIAN_SWAP) {
|
||||||
|
fo->Write(¶m, sizeof(TreeParam));
|
||||||
|
} else {
|
||||||
|
TreeParam x = param.ByteSwap();
|
||||||
|
fo->Write(&x, sizeof(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (DMLC_IO_NO_ENDIAN_SWAP) {
|
||||||
|
fo->Write(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size());
|
||||||
|
} else {
|
||||||
|
for (const Node& node : nodes_) {
|
||||||
|
Node x = node.ByteSwap();
|
||||||
|
fo->Write(&x, sizeof(x));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (DMLC_IO_NO_ENDIAN_SWAP) {
|
||||||
|
fo->Write(dmlc::BeginPtr(stats_), sizeof(RTreeNodeStat) * nodes_.size());
|
||||||
|
} else {
|
||||||
|
for (const RTreeNodeStat& stat : stats_) {
|
||||||
|
RTreeNodeStat x = stat.ByteSwap();
|
||||||
|
fo->Write(&x, sizeof(x));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegTree::LoadModel(Json const& in) {
|
void RegTree::LoadModel(Json const& in) {
|
||||||
|
|||||||
27
tests/ci_build/Dockerfile.s390x
Normal file
27
tests/ci_build/Dockerfile.s390x
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
FROM s390x/ubuntu:20.04
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
ENV DEBIAN_FRONTEND noninteractive
|
||||||
|
SHELL ["/bin/bash", "-c"] # Use Bash as shell
|
||||||
|
|
||||||
|
# Install all basic requirements
|
||||||
|
RUN \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends tar unzip wget git build-essential ninja-build \
|
||||||
|
cmake time python3 python3-pip python3-numpy python3-scipy python3-sklearn r-base && \
|
||||||
|
python3 -m pip install pytest hypothesis
|
||||||
|
|
||||||
|
ENV GOSU_VERSION 1.10
|
||||||
|
|
||||||
|
# Install lightweight sudo (not bound to TTY)
|
||||||
|
RUN set -ex; \
|
||||||
|
wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
|
||||||
|
chmod +x /usr/local/bin/gosu && \
|
||||||
|
gosu nobody true
|
||||||
|
|
||||||
|
# Default entry-point to use if running locally
|
||||||
|
# It will preserve attributes of created files
|
||||||
|
COPY entrypoint.sh /scripts/
|
||||||
|
|
||||||
|
WORKDIR /workspace
|
||||||
|
ENTRYPOINT ["/scripts/entrypoint.sh"]
|
||||||
@ -453,7 +453,8 @@ TEST(Json, Invalid) {
|
|||||||
Json load{Json::Load(StringView(str.c_str(), str.size()))};
|
Json load{Json::Load(StringView(str.c_str(), str.size()))};
|
||||||
} catch (dmlc::Error const &e) {
|
} catch (dmlc::Error const &e) {
|
||||||
std::string msg = e.what();
|
std::string msg = e.what();
|
||||||
ASSERT_NE(msg.find("EOF"), std::string::npos);
|
ASSERT_TRUE(msg.find("EOF") != std::string::npos
|
||||||
|
|| msg.find("255") != std::string::npos); // EOF is printed as 255 on s390x
|
||||||
has_thrown = true;
|
has_thrown = true;
|
||||||
};
|
};
|
||||||
ASSERT_TRUE(has_thrown);
|
ASSERT_TRUE(has_thrown);
|
||||||
|
|||||||
@ -6,6 +6,7 @@
|
|||||||
#include "xgboost/json_io.h"
|
#include "xgboost/json_io.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
#if DMLC_IO_NO_ENDIAN_SWAP // skip on big-endian machines
|
||||||
// Manually construct tree in binary format
|
// Manually construct tree in binary format
|
||||||
// Do not use structs in case they change
|
// Do not use structs in case they change
|
||||||
// We want to preserve backwards compatibility
|
// We want to preserve backwards compatibility
|
||||||
@ -85,6 +86,7 @@ TEST(Tree, Load) {
|
|||||||
EXPECT_EQ(tree[1].LeafValue(), 0.1f);
|
EXPECT_EQ(tree[1].LeafValue(), 0.1f);
|
||||||
EXPECT_TRUE(tree[1].IsLeaf());
|
EXPECT_TRUE(tree[1].IsLeaf());
|
||||||
}
|
}
|
||||||
|
#endif // DMLC_IO_NO_ENDIAN_SWAP
|
||||||
|
|
||||||
TEST(Tree, AllocateNode) {
|
TEST(Tree, AllocateNode) {
|
||||||
RegTree tree;
|
RegTree tree;
|
||||||
|
|||||||
@ -109,6 +109,8 @@ def test_evals_result_demo():
|
|||||||
subprocess.check_call(cmd)
|
subprocess.check_call(cmd)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
def test_aft_demo():
|
def test_aft_demo():
|
||||||
script = os.path.join(DEMO_DIR, 'aft_survival', 'aft_survival_demo.py')
|
script = os.path.join(DEMO_DIR, 'aft_survival', 'aft_survival_demo.py')
|
||||||
cmd = ['python', script]
|
cmd = ['python', script]
|
||||||
|
|||||||
@ -82,6 +82,7 @@ class TestEarlyStopping(unittest.TestCase):
|
|||||||
self.assert_metrics_length(cv, 1)
|
self.assert_metrics_length(cv, 1)
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
def test_cv_early_stopping_with_multiple_eval_sets_and_metrics(self):
|
def test_cv_early_stopping_with_multiple_eval_sets_and_metrics(self):
|
||||||
from sklearn.datasets import load_breast_cancer
|
from sklearn.datasets import load_breast_cancer
|
||||||
|
|
||||||
|
|||||||
@ -1,10 +1,12 @@
|
|||||||
import xgboost
|
import xgboost
|
||||||
import os
|
import os
|
||||||
import generate_models as gm
|
import generate_models as gm
|
||||||
|
import testing as tm
|
||||||
import json
|
import json
|
||||||
import zipfile
|
import zipfile
|
||||||
import pytest
|
import pytest
|
||||||
import copy
|
import copy
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
|
||||||
def run_model_param_check(config):
|
def run_model_param_check(config):
|
||||||
@ -87,6 +89,7 @@ def run_scikit_model_check(name, path):
|
|||||||
assert False
|
assert False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
def test_model_compatibility():
|
def test_model_compatibility():
|
||||||
'''Test model compatibility, can only be run on CI as others don't
|
'''Test model compatibility, can only be run on CI as others don't
|
||||||
have the credentials.
|
have the credentials.
|
||||||
@ -94,17 +97,9 @@ def test_model_compatibility():
|
|||||||
'''
|
'''
|
||||||
path = os.path.dirname(os.path.abspath(__file__))
|
path = os.path.dirname(os.path.abspath(__file__))
|
||||||
path = os.path.join(path, 'models')
|
path = os.path.join(path, 'models')
|
||||||
try:
|
|
||||||
import boto3
|
|
||||||
import botocore
|
|
||||||
except ImportError:
|
|
||||||
pytest.skip(
|
|
||||||
'Skiping compatibility tests as boto3 is not installed.')
|
|
||||||
|
|
||||||
s3_bucket = boto3.resource('s3').Bucket('xgboost-ci-jenkins-artifacts')
|
|
||||||
zip_path = 'xgboost_model_compatibility_test.zip'
|
|
||||||
s3_bucket.download_file(zip_path, zip_path)
|
|
||||||
|
|
||||||
|
zip_path, _ = urllib.request.urlretrieve('https://xgboost-ci-jenkins-artifacts.s3-us-west-2' +
|
||||||
|
'.amazonaws.com/xgboost_model_compatibility_test.zip')
|
||||||
with zipfile.ZipFile(zip_path, 'r') as z:
|
with zipfile.ZipFile(zip_path, 'r') as z:
|
||||||
z.extractall(path)
|
z.extractall(path)
|
||||||
|
|
||||||
|
|||||||
@ -2,13 +2,17 @@
|
|||||||
import os
|
import os
|
||||||
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
|
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
|
||||||
from xgboost.compat import DASK_INSTALLED
|
from xgboost.compat import DASK_INSTALLED
|
||||||
|
import pytest
|
||||||
|
import tempfile
|
||||||
|
import xgboost as xgb
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
hypothesis = pytest.importorskip('hypothesis')
|
||||||
|
sklearn = pytest.importorskip('sklearn')
|
||||||
from hypothesis import strategies
|
from hypothesis import strategies
|
||||||
from hypothesis.extra.numpy import arrays
|
from hypothesis.extra.numpy import arrays
|
||||||
from joblib import Memory
|
from joblib import Memory
|
||||||
from sklearn import datasets
|
from sklearn import datasets
|
||||||
import tempfile
|
|
||||||
import xgboost as xgb
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import cupy as cp
|
import cupy as cp
|
||||||
|
|||||||
@ -88,3 +88,19 @@ if [ ${TASK} == "cmake_test" ]; then
|
|||||||
cd ..
|
cd ..
|
||||||
rm -rf build
|
rm -rf build
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ ${TASK} == "s390x_test" ]; then
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Build and run C++ tests
|
||||||
|
rm -rf build
|
||||||
|
mkdir build && cd build
|
||||||
|
cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DGOOGLE_TEST=ON -DUSE_OPENMP=ON -DUSE_DMLC_GTEST=ON -GNinja
|
||||||
|
time ninja -v
|
||||||
|
./testxgboost
|
||||||
|
|
||||||
|
# Run model compatibility tests
|
||||||
|
cd ..
|
||||||
|
python3 -m pip install --user pytest hypothesis
|
||||||
|
PYTHONPATH=./python-package python3 -m pytest --fulltrace -v -rxXs tests/python/ -k 'test_model'
|
||||||
|
fi
|
||||||
|
|||||||
@ -20,6 +20,15 @@ if [ ${TASK} == "cmake_test" ] && [ ${TRAVIS_OS_NAME} == "osx" ]; then
|
|||||||
sudo softwareupdate -i "Command Line Tools (macOS High Sierra version 10.13) for Xcode-9.3"
|
sudo softwareupdate -i "Command Line Tools (macOS High Sierra version 10.13) for Xcode-9.3"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ ${TASK} == "s390x_test" ] && [ ${TRAVIS_CPU_ARCH} == "s390x" ]; then
|
||||||
|
sudo snap install cmake --channel=3.17/beta --classic
|
||||||
|
export PATH=/snap/bin:${PATH}
|
||||||
|
cmake --version
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y --no-install-recommends tar unzip wget git build-essential ninja-build \
|
||||||
|
time python3 python3-pip python3-numpy python3-scipy python3-sklearn r-base
|
||||||
|
fi
|
||||||
|
|
||||||
if [ ${TASK} == "python_sdist_test" ] && [ ${TRAVIS_OS_NAME} == "linux" ]; then
|
if [ ${TASK} == "python_sdist_test" ] && [ ${TRAVIS_OS_NAME} == "linux" ]; then
|
||||||
wget https://github.com/Kitware/CMake/releases/download/v3.17.1/cmake-3.17.1-Linux-x86_64.sh
|
wget https://github.com/Kitware/CMake/releases/download/v3.17.1/cmake-3.17.1-Linux-x86_64.sh
|
||||||
sudo bash cmake-3.17.1-Linux-x86_64.sh --prefix=/usr/local --skip-license
|
sudo bash cmake-3.17.1-Linux-x86_64.sh --prefix=/usr/local --skip-license
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user