Fix CLI model IO. (#5535)
* Add test for comparing Python and CLI training result.
This commit is contained in:
parent
0676a19e70
commit
468b1594d3
2
Jenkinsfile
vendored
2
Jenkinsfile
vendored
@ -201,6 +201,7 @@ def BuildCPU() {
|
|||||||
${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} build/testxgboost
|
${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} build/testxgboost
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
stash name: 'xgboost_cli', includes: 'xgboost'
|
||||||
deleteDir()
|
deleteDir()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -282,6 +283,7 @@ def TestPythonCPU() {
|
|||||||
node('linux && cpu') {
|
node('linux && cpu') {
|
||||||
unstash name: 'xgboost_whl_cuda9'
|
unstash name: 'xgboost_whl_cuda9'
|
||||||
unstash name: 'srcs'
|
unstash name: 'srcs'
|
||||||
|
unstash name: 'xgboost_cli'
|
||||||
echo "Test Python CPU"
|
echo "Test Python CPU"
|
||||||
def container_type = "cpu"
|
def container_type = "cpu"
|
||||||
def docker_binary = "docker"
|
def docker_binary = "docker"
|
||||||
|
|||||||
@ -96,6 +96,7 @@ def BuildWin64() {
|
|||||||
s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', workingDir: 'python-package/dist', includePathPattern:'**/*.whl'
|
s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', workingDir: 'python-package/dist', includePathPattern:'**/*.whl'
|
||||||
echo 'Stashing C++ test executable (testxgboost)...'
|
echo 'Stashing C++ test executable (testxgboost)...'
|
||||||
stash name: 'xgboost_cpp_tests', includes: 'build/testxgboost.exe'
|
stash name: 'xgboost_cpp_tests', includes: 'build/testxgboost.exe'
|
||||||
|
stash name: 'xgboost_cli', includes: 'xgboost.exe'
|
||||||
deleteDir()
|
deleteDir()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -104,6 +105,7 @@ def TestWin64CPU() {
|
|||||||
node('win64 && cpu') {
|
node('win64 && cpu') {
|
||||||
unstash name: 'srcs'
|
unstash name: 'srcs'
|
||||||
unstash name: 'xgboost_whl'
|
unstash name: 'xgboost_whl'
|
||||||
|
unstash name: 'xgboost_cli'
|
||||||
echo "Test Win64 CPU"
|
echo "Test Win64 CPU"
|
||||||
echo "Installing Python wheel..."
|
echo "Installing Python wheel..."
|
||||||
bat "conda activate && (python -m pip uninstall -y xgboost || cd .)"
|
bat "conda activate && (python -m pip uninstall -y xgboost || cd .)"
|
||||||
|
|||||||
@ -138,14 +138,10 @@ struct CLIParam : public XGBoostParameter<CLIParam> {
|
|||||||
// constraint.
|
// constraint.
|
||||||
if (name_pred == "stdout") {
|
if (name_pred == "stdout") {
|
||||||
save_period = 0;
|
save_period = 0;
|
||||||
this->cfg.emplace_back(std::make_pair("silent", "0"));
|
|
||||||
}
|
}
|
||||||
if (dsplit == 0 && rabit::IsDistributed()) {
|
if (dsplit == 0 && rabit::IsDistributed()) {
|
||||||
dsplit = 2;
|
dsplit = 2;
|
||||||
}
|
}
|
||||||
if (rabit::GetRank() != 0) {
|
|
||||||
this->cfg.emplace_back(std::make_pair("silent", "1"));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -189,7 +185,7 @@ void CLITrain(const CLIParam& param) {
|
|||||||
if (param.model_in != "NULL") {
|
if (param.model_in != "NULL") {
|
||||||
std::unique_ptr<dmlc::Stream> fi(
|
std::unique_ptr<dmlc::Stream> fi(
|
||||||
dmlc::Stream::Create(param.model_in.c_str(), "r"));
|
dmlc::Stream::Create(param.model_in.c_str(), "r"));
|
||||||
learner->Load(fi.get());
|
learner->LoadModel(fi.get());
|
||||||
learner->SetParams(param.cfg);
|
learner->SetParams(param.cfg);
|
||||||
} else {
|
} else {
|
||||||
learner->SetParams(param.cfg);
|
learner->SetParams(param.cfg);
|
||||||
@ -229,7 +225,7 @@ void CLITrain(const CLIParam& param) {
|
|||||||
<< i + 1 << ".model";
|
<< i + 1 << ".model";
|
||||||
std::unique_ptr<dmlc::Stream> fo(
|
std::unique_ptr<dmlc::Stream> fo(
|
||||||
dmlc::Stream::Create(os.str().c_str(), "w"));
|
dmlc::Stream::Create(os.str().c_str(), "w"));
|
||||||
learner->Save(fo.get());
|
learner->SaveModel(fo.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (learner->AllowLazyCheckPoint()) {
|
if (learner->AllowLazyCheckPoint()) {
|
||||||
@ -255,7 +251,7 @@ void CLITrain(const CLIParam& param) {
|
|||||||
}
|
}
|
||||||
std::unique_ptr<dmlc::Stream> fo(
|
std::unique_ptr<dmlc::Stream> fo(
|
||||||
dmlc::Stream::Create(os.str().c_str(), "w"));
|
dmlc::Stream::Create(os.str().c_str(), "w"));
|
||||||
learner->Save(fo.get());
|
learner->SaveModel(fo.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
double elapsed = dmlc::GetTime() - start;
|
double elapsed = dmlc::GetTime() - start;
|
||||||
@ -277,7 +273,7 @@ void CLIDumpModel(const CLIParam& param) {
|
|||||||
std::unique_ptr<dmlc::Stream> fi(
|
std::unique_ptr<dmlc::Stream> fi(
|
||||||
dmlc::Stream::Create(param.model_in.c_str(), "r"));
|
dmlc::Stream::Create(param.model_in.c_str(), "r"));
|
||||||
learner->SetParams(param.cfg);
|
learner->SetParams(param.cfg);
|
||||||
learner->Load(fi.get());
|
learner->LoadModel(fi.get());
|
||||||
// dump data
|
// dump data
|
||||||
std::vector<std::string> dump = learner->DumpModel(
|
std::vector<std::string> dump = learner->DumpModel(
|
||||||
fmap, param.dump_stats, param.dump_format);
|
fmap, param.dump_stats, param.dump_format);
|
||||||
@ -316,7 +312,7 @@ void CLIPredict(const CLIParam& param) {
|
|||||||
std::unique_ptr<Learner> learner(Learner::Create({}));
|
std::unique_ptr<Learner> learner(Learner::Create({}));
|
||||||
std::unique_ptr<dmlc::Stream> fi(
|
std::unique_ptr<dmlc::Stream> fi(
|
||||||
dmlc::Stream::Create(param.model_in.c_str(), "r"));
|
dmlc::Stream::Create(param.model_in.c_str(), "r"));
|
||||||
learner->Load(fi.get());
|
learner->LoadModel(fi.get());
|
||||||
learner->SetParams(param.cfg);
|
learner->SetParams(param.cfg);
|
||||||
|
|
||||||
LOG(INFO) << "start prediction...";
|
LOG(INFO) << "start prediction...";
|
||||||
|
|||||||
91
tests/python/test_cli.py
Normal file
91
tests/python/test_cli.py
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
import platform
|
||||||
|
import xgboost
|
||||||
|
import subprocess
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
|
||||||
|
class TestCLI(unittest.TestCase):
|
||||||
|
template = '''
|
||||||
|
booster = gbtree
|
||||||
|
objective = reg:squarederror
|
||||||
|
eta = 1.0
|
||||||
|
gamma = 1.0
|
||||||
|
seed = 0
|
||||||
|
min_child_weight = 0
|
||||||
|
max_depth = 3
|
||||||
|
task = {task}
|
||||||
|
model_in = {model_in}
|
||||||
|
model_out = {model_out}
|
||||||
|
test_path = {test_path}
|
||||||
|
name_pred = {name_pred}
|
||||||
|
|
||||||
|
num_round = 10
|
||||||
|
data = {data_path}
|
||||||
|
eval[test] = {data_path}
|
||||||
|
'''
|
||||||
|
|
||||||
|
def test_cli_model(self):
|
||||||
|
curdir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
|
||||||
|
project_root = os.path.normpath(
|
||||||
|
os.path.join(curdir, os.path.pardir, os.path.pardir))
|
||||||
|
data_path = "{root}/demo/data/agaricus.txt.train?format=libsvm".format(
|
||||||
|
root=project_root)
|
||||||
|
|
||||||
|
if platform.system() == 'Windows':
|
||||||
|
exe = 'xgboost.exe'
|
||||||
|
else:
|
||||||
|
exe = 'xgboost'
|
||||||
|
exe = os.path.join(project_root, exe)
|
||||||
|
assert os.path.exists(exe)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
model_out = os.path.join(tmpdir, 'test_load_cli_model')
|
||||||
|
config_path = os.path.join(tmpdir, 'test_load_cli_model.conf')
|
||||||
|
|
||||||
|
train_conf = self.template.format(data_path=data_path,
|
||||||
|
task='train',
|
||||||
|
model_in='NULL',
|
||||||
|
model_out=model_out,
|
||||||
|
test_path='NULL',
|
||||||
|
name_pred='NULL')
|
||||||
|
with open(config_path, 'w') as fd:
|
||||||
|
fd.write(train_conf)
|
||||||
|
|
||||||
|
subprocess.run([exe, config_path])
|
||||||
|
|
||||||
|
predict_out = os.path.join(tmpdir,
|
||||||
|
'test_load_cli_model-prediction')
|
||||||
|
predict_conf = self.template.format(task='pred',
|
||||||
|
data_path=data_path,
|
||||||
|
model_in=model_out,
|
||||||
|
model_out='NULL',
|
||||||
|
test_path=data_path,
|
||||||
|
name_pred=predict_out)
|
||||||
|
with open(config_path, 'w') as fd:
|
||||||
|
fd.write(predict_conf)
|
||||||
|
|
||||||
|
subprocess.run([exe, config_path])
|
||||||
|
|
||||||
|
cli_predt = numpy.loadtxt(predict_out)
|
||||||
|
|
||||||
|
parameters = {
|
||||||
|
'booster': 'gbtree',
|
||||||
|
'objective': 'reg:squarederror',
|
||||||
|
'eta': 1.0,
|
||||||
|
'gamma': 1.0,
|
||||||
|
'seed': 0,
|
||||||
|
'min_child_weight': 0,
|
||||||
|
'max_depth': 3
|
||||||
|
}
|
||||||
|
data = xgboost.DMatrix(data_path)
|
||||||
|
booster = xgboost.train(parameters, data, num_boost_round=10)
|
||||||
|
py_predt = booster.predict(data)
|
||||||
|
|
||||||
|
numpy.testing.assert_allclose(cli_predt, py_predt)
|
||||||
|
|
||||||
|
cli_model = xgboost.Booster(model_file=model_out)
|
||||||
|
cli_predt = cli_model.predict(data)
|
||||||
|
numpy.testing.assert_allclose(cli_predt, py_predt)
|
||||||
Loading…
x
Reference in New Issue
Block a user