JSON configuration IO. (#5111)
* Add saving/loading JSON configuration. * Implement Python pickle interface with new IO routines. * Basic tests for training continuation.
This commit is contained in:
@@ -1,20 +1,39 @@
|
||||
'''Loading a pickled model generated by test_pickling.py'''
|
||||
import pickle
|
||||
'''Loading a pickled model generated by test_pickling.py, only used by
|
||||
`test_gpu_with_dask.py`'''
|
||||
import unittest
|
||||
import os
|
||||
import xgboost as xgb
|
||||
import sys
|
||||
import json
|
||||
|
||||
sys.path.append("tests/python")
|
||||
from test_pickling import build_dataset, model_path
|
||||
from test_gpu_pickling import build_dataset, model_path, load_pickle
|
||||
|
||||
|
||||
class TestLoadPickle(unittest.TestCase):
|
||||
def test_load_pkl(self):
|
||||
assert os.environ['CUDA_VISIBLE_DEVICES'] == ''
|
||||
with open(model_path, 'rb') as fd:
|
||||
bst = pickle.load(fd)
|
||||
'''Test whether prediction is correct.'''
|
||||
assert os.environ['CUDA_VISIBLE_DEVICES'] == '-1'
|
||||
bst = load_pickle(model_path)
|
||||
x, y = build_dataset()
|
||||
test_x = xgb.DMatrix(x)
|
||||
res = bst.predict(test_x)
|
||||
assert len(res) == 10
|
||||
|
||||
def test_predictor_type_is_auto(self):
|
||||
'''Under invalid CUDA_VISIBLE_DEVICES, predictor should be set to
|
||||
auto'''
|
||||
assert os.environ['CUDA_VISIBLE_DEVICES'] == '-1'
|
||||
bst = load_pickle(model_path)
|
||||
config = bst.save_config()
|
||||
config = json.loads(config)
|
||||
assert config['learner']['gradient_booster']['gbtree_train_param'][
|
||||
'predictor'] == 'auto'
|
||||
|
||||
def test_predictor_type_is_gpu(self):
|
||||
'''When CUDA_VISIBLE_DEVICES is not specified, keep using
|
||||
`gpu_predictor`'''
|
||||
assert 'CUDA_VISIBLE_DEVICES' not in os.environ.keys()
|
||||
bst = load_pickle(model_path)
|
||||
config = bst.save_config()
|
||||
config = json.loads(config)
|
||||
assert config['learner']['gradient_booster']['gbtree_train_param'][
|
||||
'predictor'] == 'gpu_predictor'
|
||||
|
||||
@@ -4,7 +4,7 @@ import unittest
|
||||
import numpy as np
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import xgboost as xgb
|
||||
from xgboost import XGBClassifier
|
||||
|
||||
@@ -39,18 +39,17 @@ class TestPickling(unittest.TestCase):
|
||||
bst = xgb.train(param, train_x)
|
||||
|
||||
save_pickle(bst, model_path)
|
||||
args = ["pytest",
|
||||
"--verbose",
|
||||
"-s",
|
||||
"--fulltrace",
|
||||
"./tests/python-gpu/load_pickle.py"]
|
||||
args = [
|
||||
"pytest", "--verbose", "-s", "--fulltrace",
|
||||
"./tests/python-gpu/load_pickle.py::TestLoadPickle::test_load_pkl"
|
||||
]
|
||||
command = ''
|
||||
for arg in args:
|
||||
command += arg
|
||||
command += ' '
|
||||
|
||||
cuda_environment = {'CUDA_VISIBLE_DEVICES': ''}
|
||||
env = os.environ
|
||||
cuda_environment = {'CUDA_VISIBLE_DEVICES': '-1'}
|
||||
env = os.environ.copy()
|
||||
# Passing new_environment directly to `env' argument results
|
||||
# in failure on Windows:
|
||||
# Fatal Python error: _Py_HashRandomization_Init: failed to
|
||||
@@ -62,12 +61,55 @@ class TestPickling(unittest.TestCase):
|
||||
assert status == 0
|
||||
os.remove(model_path)
|
||||
|
||||
def test_pickled_predictor(self):
|
||||
args_templae = [
|
||||
"pytest",
|
||||
"--verbose",
|
||||
"-s",
|
||||
"--fulltrace"]
|
||||
|
||||
x, y = build_dataset()
|
||||
train_x = xgb.DMatrix(x, label=y)
|
||||
|
||||
param = {'tree_method': 'gpu_hist',
|
||||
'verbosity': 1, 'predictor': 'gpu_predictor'}
|
||||
bst = xgb.train(param, train_x)
|
||||
config = json.loads(bst.save_config())
|
||||
assert config['learner']['gradient_booster']['gbtree_train_param'][
|
||||
'predictor'] == 'gpu_predictor'
|
||||
|
||||
save_pickle(bst, model_path)
|
||||
|
||||
args = args_templae.copy()
|
||||
args.append(
|
||||
"./tests/python-gpu/"
|
||||
"load_pickle.py::TestLoadPickle::test_predictor_type_is_auto")
|
||||
|
||||
cuda_environment = {'CUDA_VISIBLE_DEVICES': '-1'}
|
||||
env = os.environ.copy()
|
||||
env.update(cuda_environment)
|
||||
|
||||
# Load model in a CPU only environment.
|
||||
status = subprocess.call(args, env=env)
|
||||
assert status == 0
|
||||
|
||||
args = args_templae.copy()
|
||||
args.append(
|
||||
"./tests/python-gpu/"
|
||||
"load_pickle.py::TestLoadPickle::test_predictor_type_is_gpu")
|
||||
|
||||
# Load in environment that has GPU.
|
||||
env = os.environ.copy()
|
||||
assert 'CUDA_VISIBLE_DEVICES' not in env.keys()
|
||||
status = subprocess.call(args, env=env)
|
||||
assert status == 0
|
||||
|
||||
def test_predict_sklearn_pickle(self):
|
||||
x, y = build_dataset()
|
||||
|
||||
kwargs = {'tree_method': 'gpu_hist',
|
||||
'predictor': 'gpu_predictor',
|
||||
'verbosity': 2,
|
||||
'verbosity': 1,
|
||||
'objective': 'binary:logistic',
|
||||
'n_estimators': 10}
|
||||
|
||||
@@ -7,23 +7,25 @@ rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
class TestGPUTrainingContinuation(unittest.TestCase):
|
||||
def test_training_continuation_binary(self):
|
||||
kRows = 32
|
||||
kCols = 16
|
||||
def run_training_continuation(self, use_json):
|
||||
kRows = 64
|
||||
kCols = 32
|
||||
X = np.random.randn(kRows, kCols)
|
||||
y = np.random.randn(kRows)
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
params = {'tree_method': 'gpu_hist', 'max_depth': '2'}
|
||||
bst_0 = xgb.train(params, dtrain, num_boost_round=4)
|
||||
params = {'tree_method': 'gpu_hist', 'max_depth': '2',
|
||||
'gamma': '0.1', 'alpha': '0.01',
|
||||
'enable_experimental_json_serialization': use_json}
|
||||
bst_0 = xgb.train(params, dtrain, num_boost_round=64)
|
||||
dump_0 = bst_0.get_dump(dump_format='json')
|
||||
|
||||
bst_1 = xgb.train(params, dtrain, num_boost_round=2)
|
||||
bst_1 = xgb.train(params, dtrain, num_boost_round=2, xgb_model=bst_1)
|
||||
bst_1 = xgb.train(params, dtrain, num_boost_round=32)
|
||||
bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
|
||||
dump_1 = bst_1.get_dump(dump_format='json')
|
||||
|
||||
def recursive_compare(obj_0, obj_1):
|
||||
if isinstance(obj_0, float):
|
||||
assert np.isclose(obj_0, obj_1)
|
||||
assert np.isclose(obj_0, obj_1, atol=1e-6)
|
||||
elif isinstance(obj_0, str):
|
||||
assert obj_0 == obj_1
|
||||
elif isinstance(obj_0, int):
|
||||
@@ -42,7 +44,14 @@ class TestGPUTrainingContinuation(unittest.TestCase):
|
||||
for i in range(len(obj_0)):
|
||||
recursive_compare(obj_0[i], obj_1[i])
|
||||
|
||||
assert len(dump_0) == len(dump_1)
|
||||
for i in range(len(dump_0)):
|
||||
obj_0 = json.loads(dump_0[i])
|
||||
obj_1 = json.loads(dump_1[i])
|
||||
recursive_compare(obj_0, obj_1)
|
||||
|
||||
def test_gpu_training_continuation_binary(self):
|
||||
self.run_training_continuation(False)
|
||||
|
||||
def test_gpu_training_continuation_json(self):
|
||||
self.run_training_continuation(True)
|
||||
|
||||
Reference in New Issue
Block a user