Pass pointer to model parameters. (#5101)

* Pass pointer to model parameters.

This PR de-duplicates most of the model parameters except the one in
`tree_model.h`.  One difficulty is `base_score` is a model property but can be
changed at runtime by objective function.  Hence when performing model IO, we
need to save the one provided by users, instead of the one transformed by
objective.  Here we created an immutable version of `LearnerModelParam` that
represents the value of model parameter after configuration.
This commit is contained in:
Jiaming Yuan
2019-12-10 12:11:22 +08:00
committed by GitHub
parent 979f74d51a
commit e089e16e3d
33 changed files with 623 additions and 404 deletions

View File

@@ -12,62 +12,55 @@ TEST(GBTree, SelectTreeMethod) {
GenericParameter generic_param;
generic_param.UpdateAllowUnknown(Args{});
LearnerModelParam mparam;
mparam.base_score = 0.5;
mparam.num_feature = kCols;
mparam.num_output_group = 1;
std::vector<std::shared_ptr<DMatrix> > caches;
std::unique_ptr<GradientBooster> p_gbm{
GradientBooster::Create("gbtree", &generic_param, {}, 0)};
GradientBooster::Create("gbtree", &generic_param, &mparam, caches)};
auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
// Test if `tree_method` can be set
std::string n_feat = std::to_string(kCols);
Args args {{"tree_method", "approx"}, {"num_feature", n_feat}};
Args args {{"tree_method", "approx"}};
gbtree.Configure({args.cbegin(), args.cend()});
gbtree.Configure(args);
auto const& tparam = gbtree.GetTrainParam();
gbtree.Configure({{"tree_method", "approx"}, {"num_feature", n_feat}});
gbtree.Configure({{"tree_method", "approx"}});
ASSERT_EQ(tparam.updater_seq, "grow_histmaker,prune");
gbtree.Configure({{"tree_method", "exact"}, {"num_feature", n_feat}});
gbtree.Configure({{"tree_method", "exact"}});
ASSERT_EQ(tparam.updater_seq, "grow_colmaker,prune");
gbtree.Configure({{"tree_method", "hist"}, {"num_feature", n_feat}});
gbtree.Configure({{"tree_method", "hist"}});
ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
gbtree.Configure({{"booster", "dart"}, {"tree_method", "hist"},
{"num_feature", n_feat}});
gbtree.Configure({{"booster", "dart"}, {"tree_method", "hist"}});
ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
#ifdef XGBOOST_USE_CUDA
generic_param.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
gbtree.Configure({{"tree_method", "gpu_hist"}, {"num_feature", n_feat}});
gbtree.Configure({{"tree_method", "gpu_hist"}});
ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
gbtree.Configure({{"booster", "dart"}, {"tree_method", "gpu_hist"},
{"num_feature", n_feat}});
gbtree.Configure({{"booster", "dart"}, {"tree_method", "gpu_hist"}});
ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
#endif
#endif // XGBOOST_USE_CUDA
}
#ifdef XGBOOST_USE_CUDA
TEST(GBTree, ChoosePredictor) {
size_t constexpr kNumRows = 17;
size_t constexpr kRows = 17;
size_t constexpr kCols = 15;
auto pp_mat = CreateDMatrix(kNumRows, kCols, 0);
auto& p_mat = *pp_mat;
std::vector<bst_float> labels (kNumRows);
for (size_t i = 0; i < kNumRows; ++i) {
labels[i] = i % 2;
}
p_mat->Info().SetInfo("label", labels.data(), DataType::kFloat32, kNumRows);
auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
std::shared_ptr<DMatrix> p_dmat {*pp_dmat};
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_mat};
std::string n_feat = std::to_string(kCols);
Args args {{"tree_method", "approx"}, {"num_feature", n_feat}};
GenericParameter generic_param;
generic_param.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
auto& data = (*(p_dmat->GetBatches<SparsePage>().begin())).data;
p_dmat->Info().labels_.Resize(kRows);
auto& data = (*(p_mat->GetBatches<SparsePage>().begin())).data;
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
learner->SetParams(Args{{"tree_method", "gpu_hist"}});
auto learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
for (size_t i = 0; i < 4; ++i) {
learner->UpdateOneIter(i, p_mat.get());
learner->UpdateOneIter(i, p_dmat.get());
}
ASSERT_TRUE(data.HostCanWrite());
dmlc::TemporaryDirectory tempdir;
@@ -79,14 +72,14 @@ TEST(GBTree, ChoosePredictor) {
}
// a new learner
learner = std::unique_ptr<Learner>(Learner::Create(mat));
learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
{
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r"));
learner->Load(fi.get());
}
learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
for (size_t i = 0; i < 4; ++i) {
learner->UpdateOneIter(i, p_mat.get());
learner->UpdateOneIter(i, p_dmat.get());
}
ASSERT_TRUE(data.HostCanWrite());
@@ -96,10 +89,10 @@ TEST(GBTree, ChoosePredictor) {
ASSERT_FALSE(data.HostCanWrite());
// another new learner
learner = std::unique_ptr<Learner>(Learner::Create(mat));
learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
for (size_t i = 0; i < 4; ++i) {
learner->UpdateOneIter(i, p_mat.get());
learner->UpdateOneIter(i, p_dmat.get());
}
// data is not pulled back into host
ASSERT_FALSE(data.HostCanWrite());