From 5620322a48a410adcfa97b25a79425422ea7da45 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 22 Oct 2019 23:27:26 -0400 Subject: [PATCH] [Breaking] Add global versioning. (#4936) * Use CMake config file for representing version. * Generate c and Python version file with CMake. The generated file is written into source tree. But unless XGBoost upgrades its version, there will be no actual modification. This retains compatibility with Makefiles for R. * Add XGBoost version the DMatrix binaries. * Simplify prefetch detection in CMakeLists.txt --- CMakeLists.txt | 10 ++-- amalgamation/xgboost-all0.cc | 1 + cmake/FindPrefetchIntrinsics.cmake | 22 ++++++++ cmake/Python_version.in | 1 + cmake/Version.cmake | 10 ++++ cmake/build_config.h.in | 17 ++++++ include/xgboost/base.h | 2 + include/xgboost/build_config.h | 6 ++ include/xgboost/c_api.h | 10 ++++ include/xgboost/data.h | 4 -- src/CMakeLists.txt | 29 ---------- src/c_api/c_api.cc | 12 ++++ src/common/version.cc | 90 ++++++++++++++++++++++++++++++ src/common/version.h | 35 ++++++++++++ src/data/data.cc | 21 ++++--- tests/cpp/c_api/test_c_api.cc | 6 ++ tests/cpp/common/test_version.cc | 61 ++++++++++++++++++++ tests/cpp/data/test_metainfo.cc | 20 ++++--- 18 files changed, 301 insertions(+), 56 deletions(-) create mode 100644 cmake/FindPrefetchIntrinsics.cmake create mode 100644 cmake/Python_version.in create mode 100644 cmake/Version.cmake create mode 100644 cmake/build_config.h.in create mode 100644 src/common/version.cc create mode 100644 src/common/version.h create mode 100644 tests/cpp/common/test_version.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index ceb44a597..c70212f6b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,12 +13,10 @@ if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0) message(FATAL_ERROR "GCC version must be at least 5.0!") endif() -message(STATUS "xgboost VERSION: ${xgboost_VERSION}") -set(XGBOOST_DEFINITIONS - ${XGBOOST_DEFINITIONS} - -DXGBOOST_VER_MAJOR=${xgboost_VERSION_MAJOR} - -DXGBOOST_VER_MINOR=${xgboost_VERSION_MINOR} - -DXGBOOST_VER_PATCH=${xgboost_VERSION_PATCH}) +include(${xgboost_SOURCE_DIR}/cmake/FindPrefetchIntrinsics.cmake) +find_prefetch_intrinsics() +include(${xgboost_SOURCE_DIR}/cmake/Version.cmake) +write_version() set_default_configuration_release() #-- Options diff --git a/amalgamation/xgboost-all0.cc b/amalgamation/xgboost-all0.cc index b7712bd39..2c528d13a 100644 --- a/amalgamation/xgboost-all0.cc +++ b/amalgamation/xgboost-all0.cc @@ -69,6 +69,7 @@ #include "../src/common/hist_util.cc" #include "../src/common/json.cc" #include "../src/common/io.cc" +#include "../src/common/version.cc" // c_api #include "../src/c_api/c_api.cc" diff --git a/cmake/FindPrefetchIntrinsics.cmake b/cmake/FindPrefetchIntrinsics.cmake new file mode 100644 index 000000000..b00ff57d7 --- /dev/null +++ b/cmake/FindPrefetchIntrinsics.cmake @@ -0,0 +1,22 @@ +function (find_prefetch_intrinsics) + include(CheckCXXSourceCompiles) + check_cxx_source_compiles(" + #include + int main() { + char data = 0; + const char* address = &data; + _mm_prefetch(address, _MM_HINT_NTA); + return 0; + } + " XGBOOST_MM_PREFETCH_PRESENT) + check_cxx_source_compiles(" + int main() { + char data = 0; + const char* address = &data; + __builtin_prefetch(address, 0, 0); + return 0; + } + " XGBOOST_BUILTIN_PREFETCH_PRESENT) + set(XGBOOST_MM_PREFETCH_PRESENT ${XGBOOST_MM_PREFETCH_PRESENT} PARENT_SCOPE) + set(XGBOOST_BUILTIN_PREFETCH_PRESENT ${XGBOOST_BUILTIN_PREFETCH_PRESENT} PARENT_SCOPE) +endfunction (find_prefetch_intrinsics) diff --git a/cmake/Python_version.in b/cmake/Python_version.in new file mode 100644 index 000000000..219b1d74a --- /dev/null +++ b/cmake/Python_version.in @@ -0,0 +1 @@ +@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@-SNAPSHOT \ No newline at end of file diff --git a/cmake/Version.cmake b/cmake/Version.cmake new file mode 100644 index 000000000..c8eafc8f1 --- /dev/null +++ b/cmake/Version.cmake @@ -0,0 +1,10 @@ +function (write_version) + message(STATUS "xgboost VERSION: ${xgboost_VERSION}") + configure_file( + ${xgboost_SOURCE_DIR}/cmake/build_config.h.in + ${xgboost_SOURCE_DIR}/include/xgboost/build_config.h @ONLY) + configure_file( + ${xgboost_SOURCE_DIR}/cmake/Python_version.in + ${xgboost_SOURCE_DIR}/python-package/xgboost/VERSION + ) +endfunction (write_version) diff --git a/cmake/build_config.h.in b/cmake/build_config.h.in new file mode 100644 index 000000000..74d980cdf --- /dev/null +++ b/cmake/build_config.h.in @@ -0,0 +1,17 @@ +/*! + * Copyright 2019 by Contributors + * \file build_config.h + * + * Generated from `cmake/build_config.h.in` by cmake. + */ +#ifndef XGBOOST_BUILD_CONFIG_H_ +#define XGBOOST_BUILD_CONFIG_H_ + +#cmakedefine XGBOOST_MM_PREFETCH_PRESENT +#cmakedefine XGBOOST_BUILTIN_PREFETCH_PRESENT + +#define XGBOOST_VER_MAJOR @xgboost_VERSION_MAJOR@ +#define XGBOOST_VER_MINOR @xgboost_VERSION_MINOR@ +#define XGBOOST_VER_PATCH @xgboost_VERSION_PATCH@ + +#endif // XGBOOST_BUILD_CONFIG_H_ diff --git a/include/xgboost/base.h b/include/xgboost/base.h index 0922cb22e..673bd633f 100644 --- a/include/xgboost/base.h +++ b/include/xgboost/base.h @@ -217,6 +217,8 @@ const bst_float kRtEps = 1e-6f; using omp_ulong = dmlc::omp_ulong; // NOLINT /*! \brief define unsigned int for openmp loop */ using bst_omp_uint = dmlc::omp_uint; // NOLINT +/*! \brief Type used for representing version number in binary form.*/ +using XGBoostVersionT = int32_t; /*! * \brief define compatible keywords in g++ diff --git a/include/xgboost/build_config.h b/include/xgboost/build_config.h index 6d364a6ff..f626f390a 100644 --- a/include/xgboost/build_config.h +++ b/include/xgboost/build_config.h @@ -1,6 +1,8 @@ /*! * Copyright 2019 by Contributors * \file build_config.h + * + * Generated from `cmake/build_config.h.in` by cmake. */ #ifndef XGBOOST_BUILD_CONFIG_H_ #define XGBOOST_BUILD_CONFIG_H_ @@ -19,4 +21,8 @@ #endif // !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined() +#define XGBOOST_VER_MAJOR 1 +#define XGBOOST_VER_MINOR 0 +#define XGBOOST_VER_PATCH 0 + #endif // XGBOOST_BUILD_CONFIG_H_ diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index 39f6811d8..2a9d492ac 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -58,6 +58,16 @@ typedef struct { // NOLINT(*) float* value; } XGBoostBatchCSR; +/*! + * \brief Return the version of the XGBoost library being currently used. + * + * The output variable is only written if it's not NULL. + * + * \param major Store the major version number + * \param minor Store the minor version number + * \param patch Store the patch (revision) number + */ +XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch); /*! * \brief Callback to set the data to handle, diff --git a/include/xgboost/data.h b/include/xgboost/data.h index b8bce91de..44d9ea841 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -66,10 +66,6 @@ class MetaInfo { * can be used to specify initial prediction to boost from. */ HostDeviceVector base_margin_; - /*! \brief version flag, used to check version of this info */ - static const int kVersion = 3; - /*! \brief version that contains qid field */ - static const int kVersionWithQid = 2; /*! \brief default constructor */ MetaInfo() = default; /*! diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f2b29897b..ad3af9556 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,25 +1,6 @@ file(GLOB_RECURSE CPU_SOURCES *.cc *.h) list(REMOVE_ITEM CPU_SOURCES ${PROJECT_SOURCE_DIR}/src/cli_main.cc) -include(CheckCXXSourceCompiles) -check_cxx_source_compiles(" -#include -int main() { - char data = 0; - const char* address = &data; - _mm_prefetch(address, _MM_HINT_NTA); - return 0; -} -" XGBOOST_MM_PREFETCH_PRESENT) -check_cxx_source_compiles(" -int main() { - char data = 0; - const char* address = &data; - __builtin_prefetch(address, 0, 0); - return 0; -} -" XGBOOST_BUILTIN_PREFETCH_PRESENT) - #-- Object library # Object library is necessary for jvm-package, which creates its own shared # library. @@ -82,16 +63,6 @@ target_compile_definitions(objxgboost -DDMLC_LOG_CUSTOMIZE=1 # enable custom logging $<$>:_MWAITXINTRIN_H_INCLUDED> ${XGBOOST_DEFINITIONS}) -if (XGBOOST_MM_PREFETCH_PRESENT) - target_compile_definitions(objxgboost - PRIVATE - -DXGBOOST_MM_PREFETCH_PRESENT=1) -endif(XGBOOST_MM_PREFETCH_PRESENT) -if (XGBOOST_BUILTIN_PREFETCH_PRESENT) - target_compile_definitions(objxgboost - PRIVATE - -DXGBOOST_BUILTIN_PREFETCH_PRESENT=1) -endif (XGBOOST_BUILTIN_PREFETCH_PRESENT) if (USE_OPENMP) find_package(OpenMP REQUIRED) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 75190adc8..c6369f9f0 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -149,6 +149,18 @@ struct XGBAPIThreadLocalEntry { std::vector tmp_gpair; }; +XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch) { + if (major) { + *major = XGBOOST_VER_MAJOR; + } + if (minor) { + *minor = XGBOOST_VER_MINOR; + } + if (patch) { + *patch = XGBOOST_VER_PATCH; + } +} + // define the threadlocal store. using XGBAPIThreadLocalStore = dmlc::ThreadLocalStore; diff --git a/src/common/version.cc b/src/common/version.cc new file mode 100644 index 000000000..5f25d1057 --- /dev/null +++ b/src/common/version.cc @@ -0,0 +1,90 @@ +/*! + * Copyright 2019 XGBoost contributors + */ +#include + +#include +#include +#include + +#include "xgboost/logging.h" +#include "xgboost/json.h" +#include "version.h" + +namespace xgboost { + +const Version::TripletT Version::kInvalid {-1, -1, -1}; + +Version::TripletT Version::Load(Json const& in, bool check) { + if (get(in).find("version") == get(in).cend()) { + return kInvalid; + } + Integer::Int major {0}, minor {0}, patch {0}; + try { + auto const& j_version = get(in["version"]); + std::tie(major, minor, patch) = std::make_tuple( + get(j_version.at(0)), + get(j_version.at(1)), + get(j_version.at(2))); + } catch (dmlc::Error const& e) { + LOG(FATAL) << "Invaid version format in loaded JSON object: " << in; + } + + return std::make_tuple(major, minor, patch); +} + +Version::TripletT Version::Load(dmlc::Stream* fi) { + XGBoostVersionT major{0}, minor{0}, patch{0}; + // This is only used in DMatrix serialization, so doesn't break model compability. + std::string msg { "Incorrect version format found in binary file. " + "Binary file from XGBoost < 1.0.0 is no longer supported. " + "Please generate it again." }; + std::string verstr { u8"version:" }, read; + read.resize(verstr.size(), 0); + + CHECK_EQ(fi->Read(&read[0], verstr.size()), verstr.size()) << msg; + if (verstr != read) { + // read might contain `\0` that terminates the string. + LOG(FATAL) << msg; + } + + CHECK_EQ(fi->Read(&major, sizeof(major)), sizeof(major)) << msg; + CHECK_EQ(fi->Read(&minor, sizeof(major)), sizeof(minor)) << msg; + CHECK_EQ(fi->Read(&patch, sizeof(major)), sizeof(patch)) << msg; + + return std::make_tuple(major, minor, patch); +} + +void Version::Save(Json* out) { + Integer::Int major, minor, patch; + std::tie(major, minor, patch)= Self(); + (*out)["version"] = std::vector{Json(Integer{major}), + Json(Integer{minor}), + Json(Integer{patch})}; +} + +void Version::Save(dmlc::Stream* fo) { + XGBoostVersionT major, minor, patch; + std::tie(major, minor, patch) = Self(); + std::string verstr { u8"version:" }; + fo->Write(&verstr[0], verstr.size()); + fo->Write(&major, sizeof(major)); + fo->Write(&minor, sizeof(minor)); + fo->Write(&patch, sizeof(patch)); +} + +std::string Version::String(TripletT const& version) { + std::stringstream ss; + ss << std::get<0>(version) << "." << get<1>(version) << "." << get<2>(version); + return ss.str(); +} + +Version::TripletT Version::Self() { + return std::make_tuple(XGBOOST_VER_MAJOR, XGBOOST_VER_MINOR, XGBOOST_VER_PATCH); +} + +bool Version::Same(TripletT const& triplet) { + return triplet == Self(); +} + +} // namespace xgboost diff --git a/src/common/version.h b/src/common/version.h new file mode 100644 index 000000000..96885f6a8 --- /dev/null +++ b/src/common/version.h @@ -0,0 +1,35 @@ +/*! + * Copyright 2019 XGBoost contributors + */ +#ifndef XGBOOST_COMMON_VERSION_H_ +#define XGBOOST_COMMON_VERSION_H_ + +#include +#include +#include + +#include "xgboost/base.h" + +namespace xgboost { +class Json; +// a static class for handling version info +struct Version { + using TripletT = std::tuple; + static const TripletT kInvalid; + + // Save/Load version info to Json document + static TripletT Load(Json const& in, bool check = false); + static void Save(Json* out); + + // Save/Load version info to dmlc::Stream + static Version::TripletT Load(dmlc::Stream* fi); + static void Save(dmlc::Stream* fo); + + static std::string String(TripletT const& version); + static TripletT Self(); + + static bool Same(TripletT const& triplet); +}; + +} // namespace xgboost +#endif // XGBOOST_COMMON_VERSION_H_ diff --git a/src/data/data.cc b/src/data/data.cc index 026285150..72cc67383 100644 --- a/src/data/data.cc +++ b/src/data/data.cc @@ -4,6 +4,7 @@ */ #include #include +#include #include #include @@ -11,6 +12,7 @@ #include "./simple_dmatrix.h" #include "./simple_csr_source.h" #include "../common/io.h" +#include "../common/version.h" #include "../common/group_data.h" #if DMLC_ENABLE_STD_THREAD @@ -34,8 +36,7 @@ void MetaInfo::Clear() { } void MetaInfo::SaveBinary(dmlc::Stream *fo) const { - int32_t version = kVersion; - fo->Write(&version, sizeof(version)); + Version::Save(fo); fo->Write(&num_row_, sizeof(num_row_)); fo->Write(&num_col_, sizeof(num_col_)); fo->Write(&num_nonzero_, sizeof(num_nonzero_)); @@ -47,19 +48,21 @@ void MetaInfo::SaveBinary(dmlc::Stream *fo) const { } void MetaInfo::LoadBinary(dmlc::Stream *fi) { - int version; - CHECK(fi->Read(&version, sizeof(version)) == sizeof(version)) << "MetaInfo: invalid version"; - CHECK(version >= 1 && version <= kVersion) << "MetaInfo: unsupported file version"; + auto version = Version::Load(fi); + auto major = std::get<0>(version); + // MetaInfo is saved in `SparsePageSource'. So the version in MetaInfo represents the + // version of DMatrix. + CHECK_EQ(major, 1) << "Binary DMatrix generated by XGBoost: " + << Version::String(version) << " is no longer supported. " + << "Please process and save your data in current version: " + << Version::String(Version::Self()) << " again."; CHECK(fi->Read(&num_row_, sizeof(num_row_)) == sizeof(num_row_)) << "MetaInfo: invalid format"; CHECK(fi->Read(&num_col_, sizeof(num_col_)) == sizeof(num_col_)) << "MetaInfo: invalid format"; CHECK(fi->Read(&num_nonzero_, sizeof(num_nonzero_)) == sizeof(num_nonzero_)) << "MetaInfo: invalid format"; CHECK(fi->Read(&labels_.HostVector())) << "MetaInfo: invalid format"; CHECK(fi->Read(&group_ptr_)) << "MetaInfo: invalid format"; - if (version == kVersionWithQid) { - std::vector qids; - CHECK(fi->Read(&qids)) << "MetaInfo: invalid format"; - } + CHECK(fi->Read(&weights_.HostVector())) << "MetaInfo: invalid format"; CHECK(fi->Read(&root_index_)) << "MetaInfo: invalid format"; CHECK(fi->Read(&base_margin_.HostVector())) << "MetaInfo: invalid format"; diff --git a/tests/cpp/c_api/test_c_api.cc b/tests/cpp/c_api/test_c_api.cc index d61d1dced..256068d55 100644 --- a/tests/cpp/c_api/test_c_api.cc +++ b/tests/cpp/c_api/test_c_api.cc @@ -63,3 +63,9 @@ TEST(c_api, XGDMatrixCreateFromMat_omp) { delete dmat; } } + +TEST(c_api, Version) { + int patch {0}; + XGBoostVersion(NULL, NULL, &patch); // NOLINT + ASSERT_EQ(patch, XGBOOST_VER_PATCH); +} diff --git a/tests/cpp/common/test_version.cc b/tests/cpp/common/test_version.cc new file mode 100644 index 000000000..fbbef5992 --- /dev/null +++ b/tests/cpp/common/test_version.cc @@ -0,0 +1,61 @@ +/*! + * Copyright 2019 XGBoost contributors + */ +#include + +#include +#include + +#include +#include + +#include + +#include "../../../src/common/version.h" + +namespace xgboost { +TEST(Version, Basic) { + Json j_ver { Object() }; + Version::Save(&j_ver); + auto triplet { Version::Load(j_ver) }; + ASSERT_TRUE(Version::Same(triplet)); + + dmlc::TemporaryDirectory tempdir; + const std::string fname = tempdir.path + "/version"; + + { + std::unique_ptr fo(dmlc::Stream::Create(fname.c_str(), "w")); + Version::Save(fo.get()); + } + + { + std::unique_ptr fi(dmlc::Stream::Create(fname.c_str(), "r")); + auto triplet { Version::Load(fi.get())};; + ASSERT_TRUE(Version::Same(triplet)); + } + + std::string str { Version::String(triplet) }; + + size_t ptr {0}; + XGBoostVersionT v {0}; + v = std::stoi(str, &ptr); + ASSERT_EQ(str.at(ptr), '.'); + ASSERT_EQ(v, XGBOOST_VER_MAJOR) << "major: " << v; + + str = str.substr(ptr+1); + + ptr = 0; + v = std::stoi(str, &ptr); + ASSERT_EQ(str.at(ptr), '.'); + ASSERT_EQ(v, XGBOOST_VER_MINOR) << "minor: " << v;; + + str = str.substr(ptr+1); + + ptr = 0; + v = std::stoi(str, &ptr); + ASSERT_EQ(v, XGBOOST_VER_MINOR) << "patch: " << v;; + + str = str.substr(ptr); + ASSERT_EQ(str.size(), 0); +} +} // namespace xgboost \ No newline at end of file diff --git a/tests/cpp/data/test_metainfo.cc b/tests/cpp/data/test_metainfo.cc index 38e157fbb..c031dd9ed 100644 --- a/tests/cpp/data/test_metainfo.cc +++ b/tests/cpp/data/test_metainfo.cc @@ -51,20 +51,24 @@ TEST(MetaInfo, SaveLoadBinary) { dmlc::TemporaryDirectory tempdir; const std::string tmp_file = tempdir.path + "/metainfo.binary"; - dmlc::Stream* fs = dmlc::Stream::Create(tmp_file.c_str(), "w"); - info.SaveBinary(fs); - delete fs; + { + std::unique_ptr fs { + dmlc::Stream::Create(tmp_file.c_str(), "w") + }; + info.SaveBinary(fs.get()); + } - ASSERT_EQ(GetFileSize(tmp_file), 76) - << "Expected saved binary file size to be same as object size"; + ASSERT_EQ(GetFileSize(tmp_file), 92) + << "Expected saved binary file size to be same as object size"; - fs = dmlc::Stream::Create(tmp_file.c_str(), "r"); + std::unique_ptr fs { + dmlc::Stream::Create(tmp_file.c_str(), "r") + }; xgboost::MetaInfo inforead; - inforead.LoadBinary(fs); + inforead.LoadBinary(fs.get()); EXPECT_EQ(inforead.labels_.HostVector(), info.labels_.HostVector()); EXPECT_EQ(inforead.num_col_, info.num_col_); EXPECT_EQ(inforead.num_row_, info.num_row_); - delete fs; } TEST(MetaInfo, LoadQid) {