Handle UTF-8 paths correctly on Windows platform (#9443)

* Fix round-trip serialization with UTF-8 paths

* Add compiler version check

* Add comment to C API functions

* Add Python tests

* [CI] Updatre MacOS deployment target

* Use std::filesystem instead of dmlc::TemporaryDirectory
This commit is contained in:
Philip Hyunsu Cho
2023-08-07 23:27:25 -07:00
committed by GitHub
parent 97fd5207dd
commit 7ce090e775
6 changed files with 48 additions and 18 deletions

View File

@@ -35,7 +35,7 @@ if [[ "$platform_id" == macosx_* ]]; then
# MacOS, Intel
wheel_tag=macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64
cpython_ver=38
export MACOSX_DEPLOYMENT_TARGET=10.13
export MACOSX_DEPLOYMENT_TARGET=10.15
#OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-64/llvm-openmp-11.1.0-hda6cdc1_1.tar.bz2"
OPENMP_URL="https://xgboost-ci-jenkins-artifacts.s3.us-west-2.amazonaws.com/llvm-openmp-11.1.0-hda6cdc1_1-osx-64.tar.bz2"
else

View File

@@ -8,10 +8,11 @@
#include <xgboost/learner.h>
#include <xgboost/version_config.h>
#include <array> // for array
#include <cstddef> // std::size_t
#include <limits> // std::numeric_limits
#include <string> // std::string
#include <array> // for array
#include <cstddef> // std::size_t
#include <filesystem> // std::filesystem
#include <limits> // std::numeric_limits
#include <string> // std::string
#include <vector>
#include "../../../src/c_api/c_api_error.h"
@@ -162,7 +163,7 @@ TEST(CAPI, ConfigIO) {
TEST(CAPI, JsonModelIO) {
size_t constexpr kRows = 10;
size_t constexpr kCols = 10;
dmlc::TemporaryDirectory tempdir;
auto tempdir = std::filesystem::temp_directory_path();
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
@@ -178,19 +179,19 @@ TEST(CAPI, JsonModelIO) {
learner->UpdateOneIter(0, p_dmat);
BoosterHandle handle = learner.get();
std::string modelfile_0 = tempdir.path + "/model_0.json";
XGBoosterSaveModel(handle, modelfile_0.c_str());
XGBoosterLoadModel(handle, modelfile_0.c_str());
auto modelfile_0 = tempdir / std::filesystem::u8path(u8"모델_0.json");
XGBoosterSaveModel(handle, modelfile_0.u8string().c_str());
XGBoosterLoadModel(handle, modelfile_0.u8string().c_str());
bst_ulong num_feature {0};
ASSERT_EQ(XGBoosterGetNumFeature(handle, &num_feature), 0);
ASSERT_EQ(num_feature, kCols);
std::string modelfile_1 = tempdir.path + "/model_1.json";
XGBoosterSaveModel(handle, modelfile_1.c_str());
auto modelfile_1 = tempdir / "model_1.json";
XGBoosterSaveModel(handle, modelfile_1.u8string().c_str());
auto model_str_0 = common::LoadSequentialFile(modelfile_0);
auto model_str_1 = common::LoadSequentialFile(modelfile_1);
auto model_str_0 = common::LoadSequentialFile(modelfile_0.u8string());
auto model_str_1 = common::LoadSequentialFile(modelfile_1.u8string());
ASSERT_EQ(model_str_0.front(), '{');
ASSERT_EQ(model_str_0, model_str_1);

View File

@@ -1,5 +1,6 @@
import json
import os
import pathlib
import tempfile
from pathlib import Path
@@ -167,6 +168,17 @@ class TestBasic:
with pytest.raises(xgb.core.XGBoostError):
xgb.Booster(model_file=u'不正なパス')
@pytest.mark.parametrize("path", ["모델.ubj", "がうる・ぐら.json"], ids=["path-0", "path-1"])
def test_unicode_path(self, tmpdir, path):
model_path = pathlib.Path(tmpdir) / path
dtrain, _ = tm.load_agaricus(__file__)
param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
bst = xgb.train(param, dtrain, num_boost_round=2)
bst.save_model(model_path)
bst2 = xgb.Booster(model_file=model_path)
assert bst.get_dump(dump_format="text") == bst2.get_dump(dump_format="text")
def test_dmatrix_numpy_init_omp(self):
rows = [1000, 11326, 15000]