JSON configuration IO. (#5111)

* Add saving/loading JSON configuration. * Implement Python pickle interface with new IO routines. * Basic tests for training continuation.
2019-12-15 17:31:53 +08:00
parent 5aa007d7b2
commit 3136185bc5
24 changed files with 761 additions and 390 deletions
--- a/tests/python-gpu/load_pickle.py
+++ b/tests/python-gpu/load_pickle.py
@@ -1,20 +1,39 @@
-'''Loading a pickled model generated by test_pickling.py'''
-import pickle
+'''Loading a pickled model generated by test_pickling.py, only used by
+`test_gpu_with_dask.py`'''
 import unittest
 import os
 import xgboost as xgb
-import sys
+import json

-sys.path.append("tests/python")
-from test_pickling import build_dataset, model_path
+from test_gpu_pickling import build_dataset, model_path, load_pickle


 class TestLoadPickle(unittest.TestCase):
    def test_load_pkl(self):
-        assert os.environ['CUDA_VISIBLE_DEVICES'] == ''
-        with open(model_path, 'rb') as fd:
-            bst = pickle.load(fd)
+        '''Test whether prediction is correct.'''
+        assert os.environ['CUDA_VISIBLE_DEVICES'] == '-1'
+        bst = load_pickle(model_path)
        x, y = build_dataset()
        test_x = xgb.DMatrix(x)
        res = bst.predict(test_x)
        assert len(res) == 10
+
+    def test_predictor_type_is_auto(self):
+        '''Under invalid CUDA_VISIBLE_DEVICES, predictor should be set to
+        auto'''
+        assert os.environ['CUDA_VISIBLE_DEVICES'] == '-1'
+        bst = load_pickle(model_path)
+        config = bst.save_config()
+        config = json.loads(config)
+        assert config['learner']['gradient_booster']['gbtree_train_param'][
+            'predictor'] == 'auto'
+
+    def test_predictor_type_is_gpu(self):
+        '''When CUDA_VISIBLE_DEVICES is not specified, keep using
+        `gpu_predictor`'''
+        assert 'CUDA_VISIBLE_DEVICES' not in os.environ.keys()
+        bst = load_pickle(model_path)
+        config = bst.save_config()
+        config = json.loads(config)
+        assert config['learner']['gradient_booster']['gbtree_train_param'][
+            'predictor'] == 'gpu_predictor'
--- a/tests/python-gpu/test_gpu_pickling.py
+++ b/tests/python-gpu/test_gpu_pickling.py
@@ -4,7 +4,7 @@ import unittest
 import numpy as np
 import subprocess
 import os
-import sys
+import json
 import xgboost as xgb
 from xgboost import XGBClassifier

@@ -39,18 +39,17 @@ class TestPickling(unittest.TestCase):
        bst = xgb.train(param, train_x)

        save_pickle(bst, model_path)
-        args = ["pytest",
-                "--verbose",
-                "-s",
-                "--fulltrace",
-                "./tests/python-gpu/load_pickle.py"]
+        args = [
+            "pytest", "--verbose", "-s", "--fulltrace",
+            "./tests/python-gpu/load_pickle.py::TestLoadPickle::test_load_pkl"
+        ]
        command = ''
        for arg in args:
            command += arg
            command += ' '

-        cuda_environment = {'CUDA_VISIBLE_DEVICES': ''}
-        env = os.environ
+        cuda_environment = {'CUDA_VISIBLE_DEVICES': '-1'}
+        env = os.environ.copy()
        # Passing new_environment directly to `env' argument results
        # in failure on Windows:
        #    Fatal Python error: _Py_HashRandomization_Init: failed to
@@ -62,12 +61,55 @@ class TestPickling(unittest.TestCase):
        assert status == 0
        os.remove(model_path)

+    def test_pickled_predictor(self):
+        args_templae = [
+            "pytest",
+            "--verbose",
+            "-s",
+            "--fulltrace"]
+
+        x, y = build_dataset()
+        train_x = xgb.DMatrix(x, label=y)
+
+        param = {'tree_method': 'gpu_hist',
+                 'verbosity': 1, 'predictor': 'gpu_predictor'}
+        bst = xgb.train(param, train_x)
+        config = json.loads(bst.save_config())
+        assert config['learner']['gradient_booster']['gbtree_train_param'][
+            'predictor'] == 'gpu_predictor'
+
+        save_pickle(bst, model_path)
+
+        args = args_templae.copy()
+        args.append(
+            "./tests/python-gpu/"
+            "load_pickle.py::TestLoadPickle::test_predictor_type_is_auto")
+
+        cuda_environment = {'CUDA_VISIBLE_DEVICES': '-1'}
+        env = os.environ.copy()
+        env.update(cuda_environment)
+
+        # Load model in a CPU only environment.
+        status = subprocess.call(args, env=env)
+        assert status == 0
+
+        args = args_templae.copy()
+        args.append(
+            "./tests/python-gpu/"
+            "load_pickle.py::TestLoadPickle::test_predictor_type_is_gpu")
+
+        # Load in environment that has GPU.
+        env = os.environ.copy()
+        assert 'CUDA_VISIBLE_DEVICES' not in env.keys()
+        status = subprocess.call(args, env=env)
+        assert status == 0
+
    def test_predict_sklearn_pickle(self):
        x, y = build_dataset()

        kwargs = {'tree_method': 'gpu_hist',
                  'predictor': 'gpu_predictor',
-                  'verbosity': 2,
+                  'verbosity': 1,
                  'objective': 'binary:logistic',
                  'n_estimators': 10}

--- a/tests/python-gpu/test_gpu_training_continuation.py
+++ b/tests/python-gpu/test_gpu_training_continuation.py
@@ -7,23 +7,25 @@ rng = np.random.RandomState(1994)


 class TestGPUTrainingContinuation(unittest.TestCase):
-    def test_training_continuation_binary(self):
-        kRows = 32
-        kCols = 16
+    def run_training_continuation(self, use_json):
+        kRows = 64
+        kCols = 32
        X = np.random.randn(kRows, kCols)
        y = np.random.randn(kRows)
        dtrain = xgb.DMatrix(X, y)
-        params = {'tree_method': 'gpu_hist', 'max_depth': '2'}
-        bst_0 = xgb.train(params, dtrain, num_boost_round=4)
+        params = {'tree_method': 'gpu_hist', 'max_depth': '2',
+                  'gamma': '0.1', 'alpha': '0.01',
+                  'enable_experimental_json_serialization': use_json}
+        bst_0 = xgb.train(params, dtrain, num_boost_round=64)
        dump_0 = bst_0.get_dump(dump_format='json')

-        bst_1 = xgb.train(params, dtrain, num_boost_round=2)
-        bst_1 = xgb.train(params, dtrain, num_boost_round=2, xgb_model=bst_1)
+        bst_1 = xgb.train(params, dtrain, num_boost_round=32)
+        bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
        dump_1 = bst_1.get_dump(dump_format='json')

        def recursive_compare(obj_0, obj_1):
            if isinstance(obj_0, float):
-                assert np.isclose(obj_0, obj_1)
+                assert np.isclose(obj_0, obj_1, atol=1e-6)
            elif isinstance(obj_0, str):
                assert obj_0 == obj_1
            elif isinstance(obj_0, int):
@@ -42,7 +44,14 @@ class TestGPUTrainingContinuation(unittest.TestCase):
                for i in range(len(obj_0)):
                    recursive_compare(obj_0[i], obj_1[i])

+        assert len(dump_0) == len(dump_1)
        for i in range(len(dump_0)):
            obj_0 = json.loads(dump_0[i])
            obj_1 = json.loads(dump_1[i])
            recursive_compare(obj_0, obj_1)
+
+    def test_gpu_training_continuation_binary(self):
+        self.run_training_continuation(False)
+
+    def test_gpu_training_continuation_json(self):
+        self.run_training_continuation(True)