Handle UTF-8 paths correctly on Windows platform (#9443)
* Fix round-trip serialization with UTF-8 paths * Add compiler version check * Add comment to C API functions * Add Python tests * [CI] Updatre MacOS deployment target * Use std::filesystem instead of dmlc::TemporaryDirectory
This commit is contained in:
parent
97fd5207dd
commit
7ce090e775
@ -14,8 +14,24 @@ endif ((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUA
|
||||
|
||||
message(STATUS "CMake version ${CMAKE_VERSION}")
|
||||
|
||||
if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
|
||||
message(FATAL_ERROR "GCC version must be at least 5.0!")
|
||||
# Check compiler versions
|
||||
# Use recent compilers to ensure that std::filesystem is available
|
||||
if(MSVC)
|
||||
if(MSVC_VERSION LESS 1920)
|
||||
message(FATAL_ERROR "Need Visual Studio 2019 or newer to build XGBoost")
|
||||
endif()
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8.1")
|
||||
message(FATAL_ERROR "Need GCC 8.1 or newer to build XGBoost")
|
||||
endif()
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "11.0")
|
||||
message(FATAL_ERROR "Need Xcode 11.0 (AppleClang 11.0) or newer to build XGBoost")
|
||||
endif()
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9.0")
|
||||
message(FATAL_ERROR "Need Clang 9.0 or newer to build XGBoost")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include(${xgboost_SOURCE_DIR}/cmake/FindPrefetchIntrinsics.cmake)
|
||||
|
||||
@ -1221,7 +1221,7 @@ XGB_DLL int XGBoosterPredictFromCudaColumnar(BoosterHandle handle, char const *v
|
||||
* \brief Load model from existing file
|
||||
*
|
||||
* \param handle handle
|
||||
* \param fname File URI or file name.
|
||||
* \param fname File URI or file name. The string must be UTF-8 encoded.
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterLoadModel(BoosterHandle handle,
|
||||
@ -1230,7 +1230,7 @@ XGB_DLL int XGBoosterLoadModel(BoosterHandle handle,
|
||||
* \brief Save model into existing file
|
||||
*
|
||||
* \param handle handle
|
||||
* \param fname File URI or file name.
|
||||
* \param fname File URI or file name. The string must be UTF-8 encoded.
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterSaveModel(BoosterHandle handle,
|
||||
|
||||
@ -28,6 +28,7 @@
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for int32_t, uint32_t
|
||||
#include <cstring> // for memcpy
|
||||
#include <filesystem> // for filesystem
|
||||
#include <fstream> // for ifstream
|
||||
#include <iterator> // for distance
|
||||
#include <limits> // for numeric_limits
|
||||
@ -153,7 +154,7 @@ std::string LoadSequentialFile(std::string uri, bool stream) {
|
||||
// Open in binary mode so that correct file size can be computed with
|
||||
// seekg(). This accommodates Windows platform:
|
||||
// https://docs.microsoft.com/en-us/cpp/standard-library/basic-istream-class?view=vs-2019#seekg
|
||||
std::ifstream ifs(uri, std::ios_base::binary | std::ios_base::in);
|
||||
std::ifstream ifs(std::filesystem::u8path(uri), std::ios_base::binary | std::ios_base::in);
|
||||
if (!ifs) {
|
||||
// https://stackoverflow.com/a/17338934
|
||||
OpenErr();
|
||||
|
||||
@ -35,7 +35,7 @@ if [[ "$platform_id" == macosx_* ]]; then
|
||||
# MacOS, Intel
|
||||
wheel_tag=macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64
|
||||
cpython_ver=38
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.13
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.15
|
||||
#OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-64/llvm-openmp-11.1.0-hda6cdc1_1.tar.bz2"
|
||||
OPENMP_URL="https://xgboost-ci-jenkins-artifacts.s3.us-west-2.amazonaws.com/llvm-openmp-11.1.0-hda6cdc1_1-osx-64.tar.bz2"
|
||||
else
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
|
||||
#include <array> // for array
|
||||
#include <cstddef> // std::size_t
|
||||
#include <filesystem> // std::filesystem
|
||||
#include <limits> // std::numeric_limits
|
||||
#include <string> // std::string
|
||||
#include <vector>
|
||||
@ -162,7 +163,7 @@ TEST(CAPI, ConfigIO) {
|
||||
TEST(CAPI, JsonModelIO) {
|
||||
size_t constexpr kRows = 10;
|
||||
size_t constexpr kCols = 10;
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
auto tempdir = std::filesystem::temp_directory_path();
|
||||
|
||||
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
|
||||
@ -178,19 +179,19 @@ TEST(CAPI, JsonModelIO) {
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
BoosterHandle handle = learner.get();
|
||||
|
||||
std::string modelfile_0 = tempdir.path + "/model_0.json";
|
||||
XGBoosterSaveModel(handle, modelfile_0.c_str());
|
||||
XGBoosterLoadModel(handle, modelfile_0.c_str());
|
||||
auto modelfile_0 = tempdir / std::filesystem::u8path(u8"모델_0.json");
|
||||
XGBoosterSaveModel(handle, modelfile_0.u8string().c_str());
|
||||
XGBoosterLoadModel(handle, modelfile_0.u8string().c_str());
|
||||
|
||||
bst_ulong num_feature {0};
|
||||
ASSERT_EQ(XGBoosterGetNumFeature(handle, &num_feature), 0);
|
||||
ASSERT_EQ(num_feature, kCols);
|
||||
|
||||
std::string modelfile_1 = tempdir.path + "/model_1.json";
|
||||
XGBoosterSaveModel(handle, modelfile_1.c_str());
|
||||
auto modelfile_1 = tempdir / "model_1.json";
|
||||
XGBoosterSaveModel(handle, modelfile_1.u8string().c_str());
|
||||
|
||||
auto model_str_0 = common::LoadSequentialFile(modelfile_0);
|
||||
auto model_str_1 = common::LoadSequentialFile(modelfile_1);
|
||||
auto model_str_0 = common::LoadSequentialFile(modelfile_0.u8string());
|
||||
auto model_str_1 = common::LoadSequentialFile(modelfile_1.u8string());
|
||||
|
||||
ASSERT_EQ(model_str_0.front(), '{');
|
||||
ASSERT_EQ(model_str_0, model_str_1);
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
@ -167,6 +168,17 @@ class TestBasic:
|
||||
with pytest.raises(xgb.core.XGBoostError):
|
||||
xgb.Booster(model_file=u'不正なパス')
|
||||
|
||||
@pytest.mark.parametrize("path", ["모델.ubj", "がうる・ぐら.json"], ids=["path-0", "path-1"])
|
||||
def test_unicode_path(self, tmpdir, path):
|
||||
model_path = pathlib.Path(tmpdir) / path
|
||||
dtrain, _ = tm.load_agaricus(__file__)
|
||||
param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
|
||||
bst = xgb.train(param, dtrain, num_boost_round=2)
|
||||
bst.save_model(model_path)
|
||||
|
||||
bst2 = xgb.Booster(model_file=model_path)
|
||||
assert bst.get_dump(dump_format="text") == bst2.get_dump(dump_format="text")
|
||||
|
||||
def test_dmatrix_numpy_init_omp(self):
|
||||
|
||||
rows = [1000, 11326, 15000]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user