Serialise booster after training to reset state (#5484)

* Serialise booster after training to reset state

* Prevent process_type being set on load

* Check for correct updater sequence
This commit is contained in:
Rory Mitchell 2020-04-11 16:27:12 +12:00 committed by GitHub
parent 4a0c8ef237
commit 093e2227e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 27 additions and 7 deletions

View File

@ -109,7 +109,9 @@ def _train_internal(params, dtrain,
else:
bst.best_iteration = nboost - 1
bst.best_ntree_limit = (bst.best_iteration + 1) * num_parallel_tree
return bst
# Copy to serialise and unserialise booster to reset state and free training memory
return bst.copy()
def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,

View File

@ -267,6 +267,11 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
// create the trees
for (int i = 0; i < tparam_.num_parallel_tree; ++i) {
if (tparam_.process_type == TreeProcessType::kDefault) {
CHECK(!updaters_.front()->CanModifyTree())
<< "Updater: `" << updaters_.front()->Name() << "` "
<< "can not be used to create new trees. "
<< "Set `process_type` to `update` if you want to update existing "
"trees.";
// create new tree
std::unique_ptr<RegTree> ptr(new RegTree());
ptr->param.UpdateAllowUnknown(this->cfg_);
@ -319,6 +324,10 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
void GBTree::LoadConfig(Json const& in) {
CHECK_EQ(get<String>(in["name"]), "gbtree");
FromJson(in["gbtree_train_param"], &tparam_);
// Process type cannot be kUpdate from loaded model
// This would cause all trees to be pushed to trees_to_update
// e.g. updating a model, then saving and loading it would result in an empty model
tparam_.process_type = TreeProcessType::kDefault;
int32_t const n_gpus = xgboost::common::AllVisibleGPUs();
if (n_gpus == 0 && tparam_.predictor == PredictorType::kGPUPredictor) {
LOG(WARNING)
@ -348,6 +357,13 @@ void GBTree::SaveConfig(Json* p_out) const {
auto& out = *p_out;
out["name"] = String("gbtree");
out["gbtree_train_param"] = ToJson(tparam_);
// Process type cannot be kUpdate from loaded model
// This would cause all trees to be pushed to trees_to_update
// e.g. updating a model, then saving and loading it would result in an empty
// model
out["gbtree_train_param"]["process_type"] = String("default");
out["updater"] = Object();
auto& j_updaters = out["updater"];

View File

@ -63,6 +63,10 @@ TEST(GBTree, WrongUpdater) {
// Hist can not be used for updating tree.
learner->SetParams(Args{{"tree_method", "hist"}, {"process_type", "update"}});
ASSERT_THROW(learner->UpdateOneIter(0, p_dmat), dmlc::Error);
// Prune can not be used for learning new tree.
learner->SetParams(
Args{{"tree_method", "prune"}, {"process_type", "default"}});
ASSERT_THROW(learner->UpdateOneIter(0, p_dmat), dmlc::Error);
}
#ifdef XGBOOST_USE_CUDA

View File

@ -20,9 +20,8 @@ num_round = 20
bst = xgb.train(param, dtrain, num_round, watchlist, early_stopping_rounds=2)
# Save the model, only ask process 0 to save the model.
if xgb.rabit.get_rank() == 0:
bst.save_model("test.model")
xgb.rabit.tracker_print("Finished training\n")
bst.save_model("test.model{}".format(xgb.rabit.get_rank()))
xgb.rabit.tracker_print("Finished training\n")
# Notify the tracker all training has been successful
# This is only needed in distributed training.

View File

@ -70,9 +70,8 @@ watchlist = [(dtrain,'train')]
num_round = 2
bst = xgb.train(param, dtrain, num_round, watchlist)
if xgb.rabit.get_rank() == 0:
bst.save_model("test_issue3402.model")
xgb.rabit.tracker_print("Finished training\n")
bst.save_model("test_issue3402.model{}".format(xgb.rabit.get_rank()))
xgb.rabit.tracker_print("Finished training\n")
# Notify the tracker all training has been successful
# This is only needed in distributed training.