Handle UTF-8 paths correctly on Windows platform (#9443)
* Fix round-trip serialization with UTF-8 paths * Add compiler version check * Add comment to C API functions * Add Python tests * [CI] Updatre MacOS deployment target * Use std::filesystem instead of dmlc::TemporaryDirectory
This commit is contained in:
committed by
GitHub
parent
97fd5207dd
commit
7ce090e775
@@ -35,7 +35,7 @@ if [[ "$platform_id" == macosx_* ]]; then
|
||||
# MacOS, Intel
|
||||
wheel_tag=macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64
|
||||
cpython_ver=38
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.13
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.15
|
||||
#OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-64/llvm-openmp-11.1.0-hda6cdc1_1.tar.bz2"
|
||||
OPENMP_URL="https://xgboost-ci-jenkins-artifacts.s3.us-west-2.amazonaws.com/llvm-openmp-11.1.0-hda6cdc1_1-osx-64.tar.bz2"
|
||||
else
|
||||
|
||||
@@ -8,10 +8,11 @@
|
||||
#include <xgboost/learner.h>
|
||||
#include <xgboost/version_config.h>
|
||||
|
||||
#include <array> // for array
|
||||
#include <cstddef> // std::size_t
|
||||
#include <limits> // std::numeric_limits
|
||||
#include <string> // std::string
|
||||
#include <array> // for array
|
||||
#include <cstddef> // std::size_t
|
||||
#include <filesystem> // std::filesystem
|
||||
#include <limits> // std::numeric_limits
|
||||
#include <string> // std::string
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/c_api/c_api_error.h"
|
||||
@@ -162,7 +163,7 @@ TEST(CAPI, ConfigIO) {
|
||||
TEST(CAPI, JsonModelIO) {
|
||||
size_t constexpr kRows = 10;
|
||||
size_t constexpr kCols = 10;
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
auto tempdir = std::filesystem::temp_directory_path();
|
||||
|
||||
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
|
||||
@@ -178,19 +179,19 @@ TEST(CAPI, JsonModelIO) {
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
BoosterHandle handle = learner.get();
|
||||
|
||||
std::string modelfile_0 = tempdir.path + "/model_0.json";
|
||||
XGBoosterSaveModel(handle, modelfile_0.c_str());
|
||||
XGBoosterLoadModel(handle, modelfile_0.c_str());
|
||||
auto modelfile_0 = tempdir / std::filesystem::u8path(u8"모델_0.json");
|
||||
XGBoosterSaveModel(handle, modelfile_0.u8string().c_str());
|
||||
XGBoosterLoadModel(handle, modelfile_0.u8string().c_str());
|
||||
|
||||
bst_ulong num_feature {0};
|
||||
ASSERT_EQ(XGBoosterGetNumFeature(handle, &num_feature), 0);
|
||||
ASSERT_EQ(num_feature, kCols);
|
||||
|
||||
std::string modelfile_1 = tempdir.path + "/model_1.json";
|
||||
XGBoosterSaveModel(handle, modelfile_1.c_str());
|
||||
auto modelfile_1 = tempdir / "model_1.json";
|
||||
XGBoosterSaveModel(handle, modelfile_1.u8string().c_str());
|
||||
|
||||
auto model_str_0 = common::LoadSequentialFile(modelfile_0);
|
||||
auto model_str_1 = common::LoadSequentialFile(modelfile_1);
|
||||
auto model_str_0 = common::LoadSequentialFile(modelfile_0.u8string());
|
||||
auto model_str_1 = common::LoadSequentialFile(modelfile_1.u8string());
|
||||
|
||||
ASSERT_EQ(model_str_0.front(), '{');
|
||||
ASSERT_EQ(model_str_0, model_str_1);
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
@@ -167,6 +168,17 @@ class TestBasic:
|
||||
with pytest.raises(xgb.core.XGBoostError):
|
||||
xgb.Booster(model_file=u'不正なパス')
|
||||
|
||||
@pytest.mark.parametrize("path", ["모델.ubj", "がうる・ぐら.json"], ids=["path-0", "path-1"])
|
||||
def test_unicode_path(self, tmpdir, path):
|
||||
model_path = pathlib.Path(tmpdir) / path
|
||||
dtrain, _ = tm.load_agaricus(__file__)
|
||||
param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
|
||||
bst = xgb.train(param, dtrain, num_boost_round=2)
|
||||
bst.save_model(model_path)
|
||||
|
||||
bst2 = xgb.Booster(model_file=model_path)
|
||||
assert bst.get_dump(dump_format="text") == bst2.get_dump(dump_format="text")
|
||||
|
||||
def test_dmatrix_numpy_init_omp(self):
|
||||
|
||||
rows = [1000, 11326, 15000]
|
||||
|
||||
Reference in New Issue
Block a user