JSON configuration IO. (#5111)

* Add saving/loading JSON configuration.
* Implement Python pickle interface with new IO routines.
* Basic tests for training continuation.
This commit is contained in:
Jiaming Yuan
2019-12-15 17:31:53 +08:00
committed by GitHub
parent 5aa007d7b2
commit 3136185bc5
24 changed files with 761 additions and 390 deletions

View File

@@ -1,20 +1,39 @@
'''Loading a pickled model generated by test_pickling.py'''
import pickle
'''Loading a pickled model generated by test_pickling.py, only used by
`test_gpu_with_dask.py`'''
import unittest
import os
import xgboost as xgb
import sys
import json
sys.path.append("tests/python")
from test_pickling import build_dataset, model_path
from test_gpu_pickling import build_dataset, model_path, load_pickle
class TestLoadPickle(unittest.TestCase):
def test_load_pkl(self):
assert os.environ['CUDA_VISIBLE_DEVICES'] == ''
with open(model_path, 'rb') as fd:
bst = pickle.load(fd)
'''Test whether prediction is correct.'''
assert os.environ['CUDA_VISIBLE_DEVICES'] == '-1'
bst = load_pickle(model_path)
x, y = build_dataset()
test_x = xgb.DMatrix(x)
res = bst.predict(test_x)
assert len(res) == 10
def test_predictor_type_is_auto(self):
'''Under invalid CUDA_VISIBLE_DEVICES, predictor should be set to
auto'''
assert os.environ['CUDA_VISIBLE_DEVICES'] == '-1'
bst = load_pickle(model_path)
config = bst.save_config()
config = json.loads(config)
assert config['learner']['gradient_booster']['gbtree_train_param'][
'predictor'] == 'auto'
def test_predictor_type_is_gpu(self):
'''When CUDA_VISIBLE_DEVICES is not specified, keep using
`gpu_predictor`'''
assert 'CUDA_VISIBLE_DEVICES' not in os.environ.keys()
bst = load_pickle(model_path)
config = bst.save_config()
config = json.loads(config)
assert config['learner']['gradient_booster']['gbtree_train_param'][
'predictor'] == 'gpu_predictor'

View File

@@ -4,7 +4,7 @@ import unittest
import numpy as np
import subprocess
import os
import sys
import json
import xgboost as xgb
from xgboost import XGBClassifier
@@ -39,18 +39,17 @@ class TestPickling(unittest.TestCase):
bst = xgb.train(param, train_x)
save_pickle(bst, model_path)
args = ["pytest",
"--verbose",
"-s",
"--fulltrace",
"./tests/python-gpu/load_pickle.py"]
args = [
"pytest", "--verbose", "-s", "--fulltrace",
"./tests/python-gpu/load_pickle.py::TestLoadPickle::test_load_pkl"
]
command = ''
for arg in args:
command += arg
command += ' '
cuda_environment = {'CUDA_VISIBLE_DEVICES': ''}
env = os.environ
cuda_environment = {'CUDA_VISIBLE_DEVICES': '-1'}
env = os.environ.copy()
# Passing new_environment directly to `env' argument results
# in failure on Windows:
# Fatal Python error: _Py_HashRandomization_Init: failed to
@@ -62,12 +61,55 @@ class TestPickling(unittest.TestCase):
assert status == 0
os.remove(model_path)
def test_pickled_predictor(self):
args_templae = [
"pytest",
"--verbose",
"-s",
"--fulltrace"]
x, y = build_dataset()
train_x = xgb.DMatrix(x, label=y)
param = {'tree_method': 'gpu_hist',
'verbosity': 1, 'predictor': 'gpu_predictor'}
bst = xgb.train(param, train_x)
config = json.loads(bst.save_config())
assert config['learner']['gradient_booster']['gbtree_train_param'][
'predictor'] == 'gpu_predictor'
save_pickle(bst, model_path)
args = args_templae.copy()
args.append(
"./tests/python-gpu/"
"load_pickle.py::TestLoadPickle::test_predictor_type_is_auto")
cuda_environment = {'CUDA_VISIBLE_DEVICES': '-1'}
env = os.environ.copy()
env.update(cuda_environment)
# Load model in a CPU only environment.
status = subprocess.call(args, env=env)
assert status == 0
args = args_templae.copy()
args.append(
"./tests/python-gpu/"
"load_pickle.py::TestLoadPickle::test_predictor_type_is_gpu")
# Load in environment that has GPU.
env = os.environ.copy()
assert 'CUDA_VISIBLE_DEVICES' not in env.keys()
status = subprocess.call(args, env=env)
assert status == 0
def test_predict_sklearn_pickle(self):
x, y = build_dataset()
kwargs = {'tree_method': 'gpu_hist',
'predictor': 'gpu_predictor',
'verbosity': 2,
'verbosity': 1,
'objective': 'binary:logistic',
'n_estimators': 10}

View File

@@ -7,23 +7,25 @@ rng = np.random.RandomState(1994)
class TestGPUTrainingContinuation(unittest.TestCase):
def test_training_continuation_binary(self):
kRows = 32
kCols = 16
def run_training_continuation(self, use_json):
kRows = 64
kCols = 32
X = np.random.randn(kRows, kCols)
y = np.random.randn(kRows)
dtrain = xgb.DMatrix(X, y)
params = {'tree_method': 'gpu_hist', 'max_depth': '2'}
bst_0 = xgb.train(params, dtrain, num_boost_round=4)
params = {'tree_method': 'gpu_hist', 'max_depth': '2',
'gamma': '0.1', 'alpha': '0.01',
'enable_experimental_json_serialization': use_json}
bst_0 = xgb.train(params, dtrain, num_boost_round=64)
dump_0 = bst_0.get_dump(dump_format='json')
bst_1 = xgb.train(params, dtrain, num_boost_round=2)
bst_1 = xgb.train(params, dtrain, num_boost_round=2, xgb_model=bst_1)
bst_1 = xgb.train(params, dtrain, num_boost_round=32)
bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
dump_1 = bst_1.get_dump(dump_format='json')
def recursive_compare(obj_0, obj_1):
if isinstance(obj_0, float):
assert np.isclose(obj_0, obj_1)
assert np.isclose(obj_0, obj_1, atol=1e-6)
elif isinstance(obj_0, str):
assert obj_0 == obj_1
elif isinstance(obj_0, int):
@@ -42,7 +44,14 @@ class TestGPUTrainingContinuation(unittest.TestCase):
for i in range(len(obj_0)):
recursive_compare(obj_0[i], obj_1[i])
assert len(dump_0) == len(dump_1)
for i in range(len(dump_0)):
obj_0 = json.loads(dump_0[i])
obj_1 = json.loads(dump_1[i])
recursive_compare(obj_0, obj_1)
def test_gpu_training_continuation_binary(self):
self.run_training_continuation(False)
def test_gpu_training_continuation_json(self):
self.run_training_continuation(True)