[breaking] Change internal model serialization to UBJSON. (#7556)

* Use typed array for models.
* Change the memory snapshot format.
* Add new C API for saving to raw format.
This commit is contained in:
Jiaming Yuan
2022-01-16 02:11:53 +08:00
committed by GitHub
parent 13b0fa4b97
commit a1bcd33a3b
24 changed files with 566 additions and 255 deletions

View File

@@ -2,6 +2,7 @@ import pickle
import numpy as np
import xgboost as xgb
import os
import json
kRows = 100
@@ -15,13 +16,14 @@ def generate_data():
class TestPickling:
def run_model_pickling(self, xgb_params):
def run_model_pickling(self, xgb_params) -> str:
X, y = generate_data()
dtrain = xgb.DMatrix(X, y)
bst = xgb.train(xgb_params, dtrain)
dump_0 = bst.get_dump(dump_format='json')
assert dump_0
config_0 = bst.save_config()
filename = 'model.pkl'
@@ -42,9 +44,22 @@ class TestPickling:
if os.path.exists(filename):
os.remove(filename)
config_1 = bst.save_config()
assert config_0 == config_1
return json.loads(config_0)
def test_model_pickling_json(self):
params = {
'nthread': 1,
'tree_method': 'hist',
}
self.run_model_pickling(params)
def check(config):
updater = config["learner"]["gradient_booster"]["updater"]
if params["tree_method"] == "exact":
subsample = updater["grow_colmaker"]["train_param"]["subsample"]
else:
subsample = updater["grow_quantile_histmaker"]["train_param"]["subsample"]
assert float(subsample) == 0.5
params = {"nthread": 8, "tree_method": "hist", "subsample": 0.5}
config = self.run_model_pickling(params)
check(config)
params = {"nthread": 8, "tree_method": "exact", "subsample": 0.5}
config = self.run_model_pickling(params)
check(config)