JSON configuration IO. (#5111)
* Add saving/loading JSON configuration. * Implement Python pickle interface with new IO routines. * Basic tests for training continuation.
This commit is contained in:
@@ -8,6 +8,7 @@
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/common/io.h"
|
||||
|
||||
|
||||
TEST(c_api, XGDMatrixCreateFromMatDT) {
|
||||
std::vector<int> col0 = {0, -1, 3};
|
||||
std::vector<float> col1 = {-4.0f, 2.0f, 0.0f};
|
||||
@@ -77,7 +78,41 @@ TEST(c_api, Version) {
|
||||
ASSERT_EQ(patch, XGBOOST_VER_PATCH);
|
||||
}
|
||||
|
||||
TEST(c_api, Json_ModelIO){
|
||||
TEST(c_api, ConfigIO) {
|
||||
size_t constexpr kRows = 10;
|
||||
auto pp_dmat = CreateDMatrix(kRows, 10, 0);
|
||||
auto p_dmat = *pp_dmat;
|
||||
std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
|
||||
std::vector<bst_float> labels(kRows);
|
||||
for (size_t i = 0; i < labels.size(); ++i) {
|
||||
labels[i] = i;
|
||||
}
|
||||
p_dmat->Info().labels_.HostVector() = labels;
|
||||
|
||||
std::shared_ptr<Learner> learner { Learner::Create(mat) };
|
||||
|
||||
BoosterHandle handle = learner.get();
|
||||
learner->UpdateOneIter(0, p_dmat.get());
|
||||
|
||||
char const* out[1];
|
||||
bst_ulong len {0};
|
||||
XGBoosterSaveJsonConfig(handle, &len, out);
|
||||
|
||||
std::string config_str_0 { out[0] };
|
||||
auto config_0 = Json::Load({config_str_0.c_str(), config_str_0.size()});
|
||||
XGBoosterLoadJsonConfig(handle, out[0]);
|
||||
|
||||
bst_ulong len_1 {0};
|
||||
std::string config_str_1 { out[0] };
|
||||
XGBoosterSaveJsonConfig(handle, &len_1, out);
|
||||
auto config_1 = Json::Load({config_str_1.c_str(), config_str_1.size()});
|
||||
|
||||
ASSERT_EQ(config_0, config_1);
|
||||
|
||||
delete pp_dmat;
|
||||
}
|
||||
|
||||
TEST(c_api, Json_ModelIO) {
|
||||
size_t constexpr kRows = 10;
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
|
||||
|
||||
@@ -117,15 +117,28 @@ TEST(GBTree, Json_IO) {
|
||||
CreateTrainedGBM("gbtree", Args{}, kRows, kCols, &mparam, &gparam) };
|
||||
|
||||
Json model {Object()};
|
||||
model["model"] = Object();
|
||||
auto& j_model = model["model"];
|
||||
|
||||
gbm->SaveModel(&model);
|
||||
model["config"] = Object();
|
||||
auto& j_param = model["config"];
|
||||
|
||||
gbm->SaveModel(&j_model);
|
||||
gbm->SaveConfig(&j_param);
|
||||
|
||||
std::string model_str;
|
||||
Json::Dump(model, &model_str);
|
||||
|
||||
auto loaded_model = Json::Load(StringView{model_str.c_str(), model_str.size()});
|
||||
ASSERT_EQ(get<String>(loaded_model["name"]), "gbtree");
|
||||
ASSERT_TRUE(IsA<Object>(loaded_model["model"]["gbtree_model_param"]));
|
||||
model = Json::Load({model_str.c_str(), model_str.size()});
|
||||
ASSERT_EQ(get<String>(model["model"]["name"]), "gbtree");
|
||||
|
||||
auto const& gbtree_model = model["model"]["model"];
|
||||
ASSERT_EQ(get<Array>(gbtree_model["trees"]).size(), 1);
|
||||
ASSERT_EQ(get<Integer>(get<Object>(get<Array>(gbtree_model["trees"]).front()).at("id")), 0);
|
||||
ASSERT_EQ(get<Array>(gbtree_model["tree_info"]).size(), 1);
|
||||
|
||||
auto j_train_param = model["config"]["gbtree_train_param"];
|
||||
ASSERT_EQ(get<String>(j_train_param["num_parallel_tree"]), "1");
|
||||
}
|
||||
|
||||
TEST(Dart, Json_IO) {
|
||||
@@ -145,20 +158,21 @@ TEST(Dart, Json_IO) {
|
||||
Json model {Object()};
|
||||
model["model"] = Object();
|
||||
auto& j_model = model["model"];
|
||||
model["parameters"] = Object();
|
||||
model["config"] = Object();
|
||||
|
||||
auto& j_param = model["config"];
|
||||
|
||||
gbm->SaveModel(&j_model);
|
||||
gbm->SaveConfig(&j_param);
|
||||
|
||||
std::string model_str;
|
||||
Json::Dump(model, &model_str);
|
||||
|
||||
model = Json::Load({model_str.c_str(), model_str.size()});
|
||||
|
||||
{
|
||||
auto const& gbtree = model["model"]["gbtree"];
|
||||
ASSERT_TRUE(IsA<Object>(gbtree));
|
||||
ASSERT_EQ(get<String>(model["model"]["name"]), "dart");
|
||||
ASSERT_NE(get<Array>(model["model"]["weight_drop"]).size(), 0);
|
||||
}
|
||||
ASSERT_EQ(get<String>(model["model"]["name"]), "dart") << model;
|
||||
ASSERT_EQ(get<String>(model["config"]["name"]), "dart");
|
||||
ASSERT_TRUE(IsA<Object>(model["model"]["gbtree"]));
|
||||
ASSERT_NE(get<Array>(model["model"]["weight_drop"]).size(), 0);
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -13,23 +13,6 @@
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/gbm/gbtree_model.h"
|
||||
|
||||
namespace {
|
||||
|
||||
inline void CheckCAPICall(int ret) {
|
||||
ASSERT_EQ(ret, 0) << XGBGetLastError();
|
||||
}
|
||||
|
||||
} // namespace anonymous
|
||||
|
||||
const std::map<std::string, std::string>&
|
||||
QueryBoosterConfigurationArguments(BoosterHandle handle) {
|
||||
CHECK_NE(handle, static_cast<void*>(nullptr));
|
||||
auto* bst = static_cast<xgboost::Learner*>(handle);
|
||||
bst->Configure();
|
||||
return bst->GetConfigurationArguments();
|
||||
}
|
||||
|
||||
|
||||
namespace xgboost {
|
||||
namespace predictor {
|
||||
|
||||
@@ -110,77 +93,5 @@ TEST(gpu_predictor, ExternalMemoryTest) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test whether pickling preserves predictor parameters
|
||||
TEST(gpu_predictor, PicklingTest) {
|
||||
int const gpuid = 0;
|
||||
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateBigTestData(tmp_file, 600);
|
||||
|
||||
DMatrixHandle dmat[1];
|
||||
BoosterHandle bst, bst2;
|
||||
std::vector<bst_float> label;
|
||||
for (int i = 0; i < 200; ++i) {
|
||||
label.push_back((i % 2 ? 1 : 0));
|
||||
}
|
||||
|
||||
// Load data matrix
|
||||
ASSERT_EQ(XGDMatrixCreateFromFile(
|
||||
tmp_file.c_str(), 0, &dmat[0]), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGDMatrixSetFloatInfo(
|
||||
dmat[0], "label", label.data(), 200), 0) << XGBGetLastError();
|
||||
// Create booster
|
||||
ASSERT_EQ(XGBoosterCreate(dmat, 1, &bst), 0) << XGBGetLastError();
|
||||
// Set parameters
|
||||
ASSERT_EQ(XGBoosterSetParam(bst, "seed", "0"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(bst, "base_score", "0.5"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(bst, "booster", "gbtree"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(bst, "learning_rate", "0.01"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(bst, "max_depth", "8"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(
|
||||
bst, "objective", "binary:logistic"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(bst, "seed", "123"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(
|
||||
bst, "tree_method", "gpu_hist"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(
|
||||
bst, "gpu_id", std::to_string(gpuid).c_str()), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(bst, "predictor", "gpu_predictor"), 0) << XGBGetLastError();
|
||||
|
||||
// Run boosting iterations
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
ASSERT_EQ(XGBoosterUpdateOneIter(bst, i, dmat[0]), 0) << XGBGetLastError();
|
||||
}
|
||||
|
||||
// Delete matrix
|
||||
CheckCAPICall(XGDMatrixFree(dmat[0]));
|
||||
|
||||
// Pickle
|
||||
const char* dptr;
|
||||
bst_ulong len;
|
||||
std::string buf;
|
||||
CheckCAPICall(XGBoosterGetModelRaw(bst, &len, &dptr));
|
||||
buf = std::string(dptr, len);
|
||||
CheckCAPICall(XGBoosterFree(bst));
|
||||
|
||||
// Unpickle
|
||||
CheckCAPICall(XGBoosterCreate(nullptr, 0, &bst2));
|
||||
CheckCAPICall(XGBoosterLoadModelFromBuffer(bst2, buf.c_str(), len));
|
||||
|
||||
{ // Query predictor
|
||||
const auto& kwargs = QueryBoosterConfigurationArguments(bst2);
|
||||
ASSERT_EQ(kwargs.at("predictor"), "gpu_predictor");
|
||||
ASSERT_EQ(kwargs.at("gpu_id"), std::to_string(gpuid).c_str());
|
||||
}
|
||||
|
||||
{ // Change predictor and query again
|
||||
CheckCAPICall(XGBoosterSetParam(bst2, "predictor", "cpu_predictor"));
|
||||
const auto& kwargs = QueryBoosterConfigurationArguments(bst2);
|
||||
ASSERT_EQ(kwargs.at("predictor"), "cpu_predictor");
|
||||
}
|
||||
|
||||
CheckCAPICall(XGBoosterFree(bst2));
|
||||
}
|
||||
} // namespace predictor
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,20 +1,39 @@
|
||||
'''Loading a pickled model generated by test_pickling.py'''
|
||||
import pickle
|
||||
'''Loading a pickled model generated by test_pickling.py, only used by
|
||||
`test_gpu_with_dask.py`'''
|
||||
import unittest
|
||||
import os
|
||||
import xgboost as xgb
|
||||
import sys
|
||||
import json
|
||||
|
||||
sys.path.append("tests/python")
|
||||
from test_pickling import build_dataset, model_path
|
||||
from test_gpu_pickling import build_dataset, model_path, load_pickle
|
||||
|
||||
|
||||
class TestLoadPickle(unittest.TestCase):
|
||||
def test_load_pkl(self):
|
||||
assert os.environ['CUDA_VISIBLE_DEVICES'] == ''
|
||||
with open(model_path, 'rb') as fd:
|
||||
bst = pickle.load(fd)
|
||||
'''Test whether prediction is correct.'''
|
||||
assert os.environ['CUDA_VISIBLE_DEVICES'] == '-1'
|
||||
bst = load_pickle(model_path)
|
||||
x, y = build_dataset()
|
||||
test_x = xgb.DMatrix(x)
|
||||
res = bst.predict(test_x)
|
||||
assert len(res) == 10
|
||||
|
||||
def test_predictor_type_is_auto(self):
|
||||
'''Under invalid CUDA_VISIBLE_DEVICES, predictor should be set to
|
||||
auto'''
|
||||
assert os.environ['CUDA_VISIBLE_DEVICES'] == '-1'
|
||||
bst = load_pickle(model_path)
|
||||
config = bst.save_config()
|
||||
config = json.loads(config)
|
||||
assert config['learner']['gradient_booster']['gbtree_train_param'][
|
||||
'predictor'] == 'auto'
|
||||
|
||||
def test_predictor_type_is_gpu(self):
|
||||
'''When CUDA_VISIBLE_DEVICES is not specified, keep using
|
||||
`gpu_predictor`'''
|
||||
assert 'CUDA_VISIBLE_DEVICES' not in os.environ.keys()
|
||||
bst = load_pickle(model_path)
|
||||
config = bst.save_config()
|
||||
config = json.loads(config)
|
||||
assert config['learner']['gradient_booster']['gbtree_train_param'][
|
||||
'predictor'] == 'gpu_predictor'
|
||||
|
||||
@@ -4,7 +4,7 @@ import unittest
|
||||
import numpy as np
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import xgboost as xgb
|
||||
from xgboost import XGBClassifier
|
||||
|
||||
@@ -39,18 +39,17 @@ class TestPickling(unittest.TestCase):
|
||||
bst = xgb.train(param, train_x)
|
||||
|
||||
save_pickle(bst, model_path)
|
||||
args = ["pytest",
|
||||
"--verbose",
|
||||
"-s",
|
||||
"--fulltrace",
|
||||
"./tests/python-gpu/load_pickle.py"]
|
||||
args = [
|
||||
"pytest", "--verbose", "-s", "--fulltrace",
|
||||
"./tests/python-gpu/load_pickle.py::TestLoadPickle::test_load_pkl"
|
||||
]
|
||||
command = ''
|
||||
for arg in args:
|
||||
command += arg
|
||||
command += ' '
|
||||
|
||||
cuda_environment = {'CUDA_VISIBLE_DEVICES': ''}
|
||||
env = os.environ
|
||||
cuda_environment = {'CUDA_VISIBLE_DEVICES': '-1'}
|
||||
env = os.environ.copy()
|
||||
# Passing new_environment directly to `env' argument results
|
||||
# in failure on Windows:
|
||||
# Fatal Python error: _Py_HashRandomization_Init: failed to
|
||||
@@ -62,12 +61,55 @@ class TestPickling(unittest.TestCase):
|
||||
assert status == 0
|
||||
os.remove(model_path)
|
||||
|
||||
def test_pickled_predictor(self):
|
||||
args_templae = [
|
||||
"pytest",
|
||||
"--verbose",
|
||||
"-s",
|
||||
"--fulltrace"]
|
||||
|
||||
x, y = build_dataset()
|
||||
train_x = xgb.DMatrix(x, label=y)
|
||||
|
||||
param = {'tree_method': 'gpu_hist',
|
||||
'verbosity': 1, 'predictor': 'gpu_predictor'}
|
||||
bst = xgb.train(param, train_x)
|
||||
config = json.loads(bst.save_config())
|
||||
assert config['learner']['gradient_booster']['gbtree_train_param'][
|
||||
'predictor'] == 'gpu_predictor'
|
||||
|
||||
save_pickle(bst, model_path)
|
||||
|
||||
args = args_templae.copy()
|
||||
args.append(
|
||||
"./tests/python-gpu/"
|
||||
"load_pickle.py::TestLoadPickle::test_predictor_type_is_auto")
|
||||
|
||||
cuda_environment = {'CUDA_VISIBLE_DEVICES': '-1'}
|
||||
env = os.environ.copy()
|
||||
env.update(cuda_environment)
|
||||
|
||||
# Load model in a CPU only environment.
|
||||
status = subprocess.call(args, env=env)
|
||||
assert status == 0
|
||||
|
||||
args = args_templae.copy()
|
||||
args.append(
|
||||
"./tests/python-gpu/"
|
||||
"load_pickle.py::TestLoadPickle::test_predictor_type_is_gpu")
|
||||
|
||||
# Load in environment that has GPU.
|
||||
env = os.environ.copy()
|
||||
assert 'CUDA_VISIBLE_DEVICES' not in env.keys()
|
||||
status = subprocess.call(args, env=env)
|
||||
assert status == 0
|
||||
|
||||
def test_predict_sklearn_pickle(self):
|
||||
x, y = build_dataset()
|
||||
|
||||
kwargs = {'tree_method': 'gpu_hist',
|
||||
'predictor': 'gpu_predictor',
|
||||
'verbosity': 2,
|
||||
'verbosity': 1,
|
||||
'objective': 'binary:logistic',
|
||||
'n_estimators': 10}
|
||||
|
||||
@@ -7,23 +7,25 @@ rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
class TestGPUTrainingContinuation(unittest.TestCase):
|
||||
def test_training_continuation_binary(self):
|
||||
kRows = 32
|
||||
kCols = 16
|
||||
def run_training_continuation(self, use_json):
|
||||
kRows = 64
|
||||
kCols = 32
|
||||
X = np.random.randn(kRows, kCols)
|
||||
y = np.random.randn(kRows)
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
params = {'tree_method': 'gpu_hist', 'max_depth': '2'}
|
||||
bst_0 = xgb.train(params, dtrain, num_boost_round=4)
|
||||
params = {'tree_method': 'gpu_hist', 'max_depth': '2',
|
||||
'gamma': '0.1', 'alpha': '0.01',
|
||||
'enable_experimental_json_serialization': use_json}
|
||||
bst_0 = xgb.train(params, dtrain, num_boost_round=64)
|
||||
dump_0 = bst_0.get_dump(dump_format='json')
|
||||
|
||||
bst_1 = xgb.train(params, dtrain, num_boost_round=2)
|
||||
bst_1 = xgb.train(params, dtrain, num_boost_round=2, xgb_model=bst_1)
|
||||
bst_1 = xgb.train(params, dtrain, num_boost_round=32)
|
||||
bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
|
||||
dump_1 = bst_1.get_dump(dump_format='json')
|
||||
|
||||
def recursive_compare(obj_0, obj_1):
|
||||
if isinstance(obj_0, float):
|
||||
assert np.isclose(obj_0, obj_1)
|
||||
assert np.isclose(obj_0, obj_1, atol=1e-6)
|
||||
elif isinstance(obj_0, str):
|
||||
assert obj_0 == obj_1
|
||||
elif isinstance(obj_0, int):
|
||||
@@ -42,7 +44,14 @@ class TestGPUTrainingContinuation(unittest.TestCase):
|
||||
for i in range(len(obj_0)):
|
||||
recursive_compare(obj_0[i], obj_1[i])
|
||||
|
||||
assert len(dump_0) == len(dump_1)
|
||||
for i in range(len(dump_0)):
|
||||
obj_0 = json.loads(dump_0[i])
|
||||
obj_1 = json.loads(dump_1[i])
|
||||
recursive_compare(obj_0, obj_1)
|
||||
|
||||
def test_gpu_training_continuation_binary(self):
|
||||
self.run_training_continuation(False)
|
||||
|
||||
def test_gpu_training_continuation_json(self):
|
||||
self.run_training_continuation(True)
|
||||
|
||||
@@ -203,7 +203,7 @@ class TestModels(unittest.TestCase):
|
||||
self.assertRaises(ValueError, bst.predict, dm1)
|
||||
bst.predict(dm2) # success
|
||||
|
||||
def test_json_model_io(self):
|
||||
def test_model_json_io(self):
|
||||
X = np.random.random((10, 3))
|
||||
y = np.random.randint(2, size=(10,))
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ import pickle
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import os
|
||||
import unittest
|
||||
|
||||
|
||||
kRows = 100
|
||||
@@ -14,35 +15,45 @@ def generate_data():
|
||||
return X, y
|
||||
|
||||
|
||||
def test_model_pickling():
|
||||
xgb_params = {
|
||||
'verbosity': 0,
|
||||
'nthread': 1,
|
||||
'tree_method': 'hist'
|
||||
}
|
||||
class TestPickling(unittest.TestCase):
|
||||
def run_model_pickling(self, xgb_params):
|
||||
X, y = generate_data()
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
bst = xgb.train(xgb_params, dtrain)
|
||||
|
||||
X, y = generate_data()
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
bst = xgb.train(xgb_params, dtrain)
|
||||
dump_0 = bst.get_dump(dump_format='json')
|
||||
assert dump_0
|
||||
|
||||
dump_0 = bst.get_dump(dump_format='json')
|
||||
assert dump_0
|
||||
filename = 'model.pkl'
|
||||
|
||||
filename = 'model.pkl'
|
||||
with open(filename, 'wb') as fd:
|
||||
pickle.dump(bst, fd)
|
||||
|
||||
with open(filename, 'wb') as fd:
|
||||
pickle.dump(bst, fd)
|
||||
with open(filename, 'rb') as fd:
|
||||
bst = pickle.load(fd)
|
||||
|
||||
with open(filename, 'rb') as fd:
|
||||
bst = pickle.load(fd)
|
||||
with open(filename, 'wb') as fd:
|
||||
pickle.dump(bst, fd)
|
||||
|
||||
with open(filename, 'wb') as fd:
|
||||
pickle.dump(bst, fd)
|
||||
with open(filename, 'rb') as fd:
|
||||
bst = pickle.load(fd)
|
||||
|
||||
with open(filename, 'rb') as fd:
|
||||
bst = pickle.load(fd)
|
||||
assert bst.get_dump(dump_format='json') == dump_0
|
||||
|
||||
assert bst.get_dump(dump_format='json') == dump_0
|
||||
if os.path.exists(filename):
|
||||
os.remove(filename)
|
||||
|
||||
if os.path.exists(filename):
|
||||
os.remove(filename)
|
||||
def test_model_pickling_binary(self):
|
||||
params = {
|
||||
'nthread': 1,
|
||||
'tree_method': 'hist'
|
||||
}
|
||||
self.run_model_pickling(params)
|
||||
|
||||
def test_model_pickling_json(self):
|
||||
params = {
|
||||
'nthread': 1,
|
||||
'tree_method': 'hist',
|
||||
'enable_experimental_json_serialization': True
|
||||
}
|
||||
self.run_model_pickling(params)
|
||||
|
||||
@@ -10,26 +10,35 @@ rng = np.random.RandomState(1337)
|
||||
class TestTrainingContinuation(unittest.TestCase):
|
||||
num_parallel_tree = 3
|
||||
|
||||
xgb_params_01 = {
|
||||
'verbosity': 0,
|
||||
'nthread': 1,
|
||||
}
|
||||
def generate_parameters(self, use_json):
|
||||
xgb_params_01_binary = {
|
||||
'nthread': 1,
|
||||
}
|
||||
|
||||
xgb_params_02 = {
|
||||
'verbosity': 0,
|
||||
'nthread': 1,
|
||||
'num_parallel_tree': num_parallel_tree
|
||||
}
|
||||
xgb_params_02_binary = {
|
||||
'nthread': 1,
|
||||
'num_parallel_tree': self.num_parallel_tree
|
||||
}
|
||||
|
||||
xgb_params_03 = {
|
||||
'verbosity': 0,
|
||||
'nthread': 1,
|
||||
'num_class': 5,
|
||||
'num_parallel_tree': num_parallel_tree
|
||||
}
|
||||
xgb_params_03_binary = {
|
||||
'nthread': 1,
|
||||
'num_class': 5,
|
||||
'num_parallel_tree': self.num_parallel_tree
|
||||
}
|
||||
if use_json:
|
||||
xgb_params_01_binary[
|
||||
'enable_experimental_json_serialization'] = True
|
||||
xgb_params_02_binary[
|
||||
'enable_experimental_json_serialization'] = True
|
||||
xgb_params_03_binary[
|
||||
'enable_experimental_json_serialization'] = True
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_training_continuation(self):
|
||||
return [
|
||||
xgb_params_01_binary, xgb_params_02_binary, xgb_params_03_binary
|
||||
]
|
||||
|
||||
def run_training_continuation(self, xgb_params_01, xgb_params_02,
|
||||
xgb_params_03):
|
||||
from sklearn.datasets import load_digits
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
@@ -45,18 +54,18 @@ class TestTrainingContinuation(unittest.TestCase):
|
||||
dtrain_2class = xgb.DMatrix(X_2class, label=y_2class)
|
||||
dtrain_5class = xgb.DMatrix(X_5class, label=y_5class)
|
||||
|
||||
gbdt_01 = xgb.train(self.xgb_params_01, dtrain_2class,
|
||||
gbdt_01 = xgb.train(xgb_params_01, dtrain_2class,
|
||||
num_boost_round=10)
|
||||
ntrees_01 = len(gbdt_01.get_dump())
|
||||
assert ntrees_01 == 10
|
||||
|
||||
gbdt_02 = xgb.train(self.xgb_params_01, dtrain_2class,
|
||||
gbdt_02 = xgb.train(xgb_params_01, dtrain_2class,
|
||||
num_boost_round=0)
|
||||
gbdt_02.save_model('xgb_tc.model')
|
||||
|
||||
gbdt_02a = xgb.train(self.xgb_params_01, dtrain_2class,
|
||||
gbdt_02a = xgb.train(xgb_params_01, dtrain_2class,
|
||||
num_boost_round=10, xgb_model=gbdt_02)
|
||||
gbdt_02b = xgb.train(self.xgb_params_01, dtrain_2class,
|
||||
gbdt_02b = xgb.train(xgb_params_01, dtrain_2class,
|
||||
num_boost_round=10, xgb_model="xgb_tc.model")
|
||||
ntrees_02a = len(gbdt_02a.get_dump())
|
||||
ntrees_02b = len(gbdt_02b.get_dump())
|
||||
@@ -71,13 +80,13 @@ class TestTrainingContinuation(unittest.TestCase):
|
||||
res2 = mean_squared_error(y_2class, gbdt_02b.predict(dtrain_2class))
|
||||
assert res1 == res2
|
||||
|
||||
gbdt_03 = xgb.train(self.xgb_params_01, dtrain_2class,
|
||||
gbdt_03 = xgb.train(xgb_params_01, dtrain_2class,
|
||||
num_boost_round=3)
|
||||
gbdt_03.save_model('xgb_tc.model')
|
||||
|
||||
gbdt_03a = xgb.train(self.xgb_params_01, dtrain_2class,
|
||||
gbdt_03a = xgb.train(xgb_params_01, dtrain_2class,
|
||||
num_boost_round=7, xgb_model=gbdt_03)
|
||||
gbdt_03b = xgb.train(self.xgb_params_01, dtrain_2class,
|
||||
gbdt_03b = xgb.train(xgb_params_01, dtrain_2class,
|
||||
num_boost_round=7, xgb_model="xgb_tc.model")
|
||||
ntrees_03a = len(gbdt_03a.get_dump())
|
||||
ntrees_03b = len(gbdt_03b.get_dump())
|
||||
@@ -88,7 +97,7 @@ class TestTrainingContinuation(unittest.TestCase):
|
||||
res2 = mean_squared_error(y_2class, gbdt_03b.predict(dtrain_2class))
|
||||
assert res1 == res2
|
||||
|
||||
gbdt_04 = xgb.train(self.xgb_params_02, dtrain_2class,
|
||||
gbdt_04 = xgb.train(xgb_params_02, dtrain_2class,
|
||||
num_boost_round=3)
|
||||
assert gbdt_04.best_ntree_limit == (gbdt_04.best_iteration +
|
||||
1) * self.num_parallel_tree
|
||||
@@ -100,7 +109,7 @@ class TestTrainingContinuation(unittest.TestCase):
|
||||
ntree_limit=gbdt_04.best_ntree_limit))
|
||||
assert res1 == res2
|
||||
|
||||
gbdt_04 = xgb.train(self.xgb_params_02, dtrain_2class,
|
||||
gbdt_04 = xgb.train(xgb_params_02, dtrain_2class,
|
||||
num_boost_round=7, xgb_model=gbdt_04)
|
||||
assert gbdt_04.best_ntree_limit == (
|
||||
gbdt_04.best_iteration + 1) * self.num_parallel_tree
|
||||
@@ -112,11 +121,11 @@ class TestTrainingContinuation(unittest.TestCase):
|
||||
ntree_limit=gbdt_04.best_ntree_limit))
|
||||
assert res1 == res2
|
||||
|
||||
gbdt_05 = xgb.train(self.xgb_params_03, dtrain_5class,
|
||||
gbdt_05 = xgb.train(xgb_params_03, dtrain_5class,
|
||||
num_boost_round=7)
|
||||
assert gbdt_05.best_ntree_limit == (
|
||||
gbdt_05.best_iteration + 1) * self.num_parallel_tree
|
||||
gbdt_05 = xgb.train(self.xgb_params_03,
|
||||
gbdt_05 = xgb.train(xgb_params_03,
|
||||
dtrain_5class,
|
||||
num_boost_round=3,
|
||||
xgb_model=gbdt_05)
|
||||
@@ -127,3 +136,32 @@ class TestTrainingContinuation(unittest.TestCase):
|
||||
res2 = gbdt_05.predict(dtrain_5class,
|
||||
ntree_limit=gbdt_05.best_ntree_limit)
|
||||
np.testing.assert_almost_equal(res1, res2)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_training_continuation_binary(self):
|
||||
params = self.generate_parameters(False)
|
||||
self.run_training_continuation(params[0], params[1], params[2])
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_training_continuation_json(self):
|
||||
params = self.generate_parameters(True)
|
||||
for p in params:
|
||||
p['enable_experimental_json_serialization'] = True
|
||||
self.run_training_continuation(params[0], params[1], params[2])
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_training_continuation_updaters_binary(self):
|
||||
updaters = 'grow_colmaker,prune,refresh'
|
||||
params = self.generate_parameters(False)
|
||||
for p in params:
|
||||
p['updater'] = updaters
|
||||
self.run_training_continuation(params[0], params[1], params[2])
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_training_continuation_updaters_json(self):
|
||||
# Picked up from R tests.
|
||||
updaters = 'grow_colmaker,prune,refresh'
|
||||
params = self.generate_parameters(True)
|
||||
for p in params:
|
||||
p['updater'] = updaters
|
||||
self.run_training_continuation(params[0], params[1], params[2])
|
||||
|
||||
Reference in New Issue
Block a user