* Use typed array for models. * Change the memory snapshot format. * Add new C API for saving to raw format.
66 lines
1.7 KiB
Python
66 lines
1.7 KiB
Python
import pickle
|
|
import numpy as np
|
|
import xgboost as xgb
|
|
import os
|
|
import json
|
|
|
|
|
|
kRows = 100
|
|
kCols = 10
|
|
|
|
|
|
def generate_data():
|
|
X = np.random.randn(kRows, kCols)
|
|
y = np.random.randn(kRows)
|
|
return X, y
|
|
|
|
|
|
class TestPickling:
|
|
def run_model_pickling(self, xgb_params) -> str:
|
|
X, y = generate_data()
|
|
dtrain = xgb.DMatrix(X, y)
|
|
bst = xgb.train(xgb_params, dtrain)
|
|
|
|
dump_0 = bst.get_dump(dump_format='json')
|
|
assert dump_0
|
|
config_0 = bst.save_config()
|
|
|
|
filename = 'model.pkl'
|
|
|
|
with open(filename, 'wb') as fd:
|
|
pickle.dump(bst, fd)
|
|
|
|
with open(filename, 'rb') as fd:
|
|
bst = pickle.load(fd)
|
|
|
|
with open(filename, 'wb') as fd:
|
|
pickle.dump(bst, fd)
|
|
|
|
with open(filename, 'rb') as fd:
|
|
bst = pickle.load(fd)
|
|
|
|
assert bst.get_dump(dump_format='json') == dump_0
|
|
|
|
if os.path.exists(filename):
|
|
os.remove(filename)
|
|
|
|
config_1 = bst.save_config()
|
|
assert config_0 == config_1
|
|
return json.loads(config_0)
|
|
|
|
def test_model_pickling_json(self):
|
|
def check(config):
|
|
updater = config["learner"]["gradient_booster"]["updater"]
|
|
if params["tree_method"] == "exact":
|
|
subsample = updater["grow_colmaker"]["train_param"]["subsample"]
|
|
else:
|
|
subsample = updater["grow_quantile_histmaker"]["train_param"]["subsample"]
|
|
assert float(subsample) == 0.5
|
|
|
|
params = {"nthread": 8, "tree_method": "hist", "subsample": 0.5}
|
|
config = self.run_model_pickling(params)
|
|
check(config)
|
|
params = {"nthread": 8, "tree_method": "exact", "subsample": 0.5}
|
|
config = self.run_model_pickling(params)
|
|
check(config)
|