Fix CLI model IO. (#5535)

* Add test for comparing Python and CLI training result.
2020-04-16 07:48:47 +08:00 · 2020-04-16 07:48:47 +08:00 · 468b1594d3
commit 468b1594d3
parent 0676a19e70
4 changed files with 100 additions and 9 deletions
--- a/2
+++ b/2
@ -201,6 +201,7 @@ def BuildCPU() {
    ${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} build/testxgboost
    """

+    stash name: 'xgboost_cli', includes: 'xgboost'
    deleteDir()
  }
 }
@ -282,6 +283,7 @@ def TestPythonCPU() {
  node('linux && cpu') {
    unstash name: 'xgboost_whl_cuda9'
    unstash name: 'srcs'
+    unstash name: 'xgboost_cli'
    echo "Test Python CPU"
    def container_type = "cpu"
    def docker_binary = "docker"
--- a/2
+++ b/2
@ -96,6 +96,7 @@ def BuildWin64() {
    s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', workingDir: 'python-package/dist', includePathPattern:'**/*.whl'
    echo 'Stashing C++ test executable (testxgboost)...'
    stash name: 'xgboost_cpp_tests', includes: 'build/testxgboost.exe'
+    stash name: 'xgboost_cli', includes: 'xgboost.exe'
    deleteDir()
  }
 }
@ -104,6 +105,7 @@ def TestWin64CPU() {
  node('win64 && cpu') {
    unstash name: 'srcs'
    unstash name: 'xgboost_whl'
+    unstash name: 'xgboost_cli'
    echo "Test Win64 CPU"
    echo "Installing Python wheel..."
    bat "conda activate && (python -m pip uninstall -y xgboost || cd .)"
--- a/src/cli_main.cc
+++ b/src/cli_main.cc
@ -138,14 +138,10 @@ struct CLIParam : public XGBoostParameter<CLIParam> {
    // constraint.
    if (name_pred == "stdout") {
      save_period = 0;
-      this->cfg.emplace_back(std::make_pair("silent", "0"));
    }
    if (dsplit == 0 && rabit::IsDistributed()) {
      dsplit = 2;
    }
-    if (rabit::GetRank() != 0) {
-      this->cfg.emplace_back(std::make_pair("silent", "1"));
-    }
  }
 };

@ -189,7 +185,7 @@ void CLITrain(const CLIParam& param) {
    if (param.model_in != "NULL") {
      std::unique_ptr<dmlc::Stream> fi(
          dmlc::Stream::Create(param.model_in.c_str(), "r"));
-      learner->Load(fi.get());
+      learner->LoadModel(fi.get());
      learner->SetParams(param.cfg);
    } else {
      learner->SetParams(param.cfg);
@ -229,7 +225,7 @@ void CLITrain(const CLIParam& param) {
         << i + 1 << ".model";
      std::unique_ptr<dmlc::Stream> fo(
          dmlc::Stream::Create(os.str().c_str(), "w"));
-      learner->Save(fo.get());
+      learner->SaveModel(fo.get());
    }

    if (learner->AllowLazyCheckPoint()) {
@ -255,7 +251,7 @@ void CLITrain(const CLIParam& param) {
    }
    std::unique_ptr<dmlc::Stream> fo(
        dmlc::Stream::Create(os.str().c_str(), "w"));
-    learner->Save(fo.get());
+    learner->SaveModel(fo.get());
  }

  double elapsed = dmlc::GetTime() - start;
@ -277,7 +273,7 @@ void CLIDumpModel(const CLIParam& param) {
  std::unique_ptr<dmlc::Stream> fi(
      dmlc::Stream::Create(param.model_in.c_str(), "r"));
  learner->SetParams(param.cfg);
-  learner->Load(fi.get());
+  learner->LoadModel(fi.get());
  // dump data
  std::vector<std::string> dump = learner->DumpModel(
      fmap, param.dump_stats, param.dump_format);
@ -316,7 +312,7 @@ void CLIPredict(const CLIParam& param) {
  std::unique_ptr<Learner> learner(Learner::Create({}));
  std::unique_ptr<dmlc::Stream> fi(
      dmlc::Stream::Create(param.model_in.c_str(), "r"));
-  learner->Load(fi.get());
+  learner->LoadModel(fi.get());
  learner->SetParams(param.cfg);

  LOG(INFO) << "start prediction...";
--- a/tests/python/test_cli.py
+++ b/tests/python/test_cli.py
@ -0,0 +1,91 @@
+import os
+import tempfile
+import unittest
+import platform
+import xgboost
+import subprocess
+import numpy
+
+
+class TestCLI(unittest.TestCase):
+    template = '''
+booster = gbtree
+objective = reg:squarederror
+eta = 1.0
+gamma = 1.0
+seed = 0
+min_child_weight = 0
+max_depth = 3
+task = {task}
+model_in = {model_in}
+model_out = {model_out}
+test_path = {test_path}
+name_pred = {name_pred}
+
+num_round = 10
+data = {data_path}
+eval[test] = {data_path}
+'''
+
+    def test_cli_model(self):
+        curdir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+        project_root = os.path.normpath(
+            os.path.join(curdir, os.path.pardir, os.path.pardir))
+        data_path = "{root}/demo/data/agaricus.txt.train?format=libsvm".format(
+            root=project_root)
+
+        if platform.system() == 'Windows':
+            exe = 'xgboost.exe'
+        else:
+            exe = 'xgboost'
+        exe = os.path.join(project_root, exe)
+        assert os.path.exists(exe)
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            model_out = os.path.join(tmpdir, 'test_load_cli_model')
+            config_path = os.path.join(tmpdir, 'test_load_cli_model.conf')
+
+            train_conf = self.template.format(data_path=data_path,
+                                              task='train',
+                                              model_in='NULL',
+                                              model_out=model_out,
+                                              test_path='NULL',
+                                              name_pred='NULL')
+            with open(config_path, 'w') as fd:
+                fd.write(train_conf)
+
+            subprocess.run([exe, config_path])
+
+            predict_out = os.path.join(tmpdir,
+                                       'test_load_cli_model-prediction')
+            predict_conf = self.template.format(task='pred',
+                                                data_path=data_path,
+                                                model_in=model_out,
+                                                model_out='NULL',
+                                                test_path=data_path,
+                                                name_pred=predict_out)
+            with open(config_path, 'w') as fd:
+                fd.write(predict_conf)
+
+            subprocess.run([exe, config_path])
+
+            cli_predt = numpy.loadtxt(predict_out)
+
+            parameters = {
+                'booster': 'gbtree',
+                'objective': 'reg:squarederror',
+                'eta': 1.0,
+                'gamma': 1.0,
+                'seed': 0,
+                'min_child_weight': 0,
+                'max_depth': 3
+            }
+            data = xgboost.DMatrix(data_path)
+            booster = xgboost.train(parameters, data, num_boost_round=10)
+            py_predt = booster.predict(data)
+
+            numpy.testing.assert_allclose(cli_predt, py_predt)
+
+            cli_model = xgboost.Booster(model_file=model_out)
+            cli_predt = cli_model.predict(data)
+            numpy.testing.assert_allclose(cli_predt, py_predt)