Pass pointer to model parameters. (#5101)
* Pass pointer to model parameters. This PR de-duplicates most of the model parameters except the one in `tree_model.h`. One difficulty is `base_score` is a model property but can be changed at runtime by objective function. Hence when performing model IO, we need to save the one provided by users, instead of the one transformed by objective. Here we created an immutable version of `LearnerModelParam` that represents the value of model parameter after configuration.
This commit is contained in:
@@ -252,7 +252,7 @@ class CyclicFeatureSelector : public FeatureSelector {
|
||||
int NextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda) override {
|
||||
return iteration % model.param.num_feature;
|
||||
return iteration % model.learner_model_param_->num_feature;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -266,7 +266,7 @@ class ShuffleFeatureSelector : public FeatureSelector {
|
||||
const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda, int param) override {
|
||||
if (feat_index_.size() == 0) {
|
||||
feat_index_.resize(model.param.num_feature);
|
||||
feat_index_.resize(model.learner_model_param_->num_feature);
|
||||
std::iota(feat_index_.begin(), feat_index_.end(), 0);
|
||||
}
|
||||
std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom());
|
||||
@@ -275,7 +275,7 @@ class ShuffleFeatureSelector : public FeatureSelector {
|
||||
int NextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda) override {
|
||||
return feat_index_[iteration % model.param.num_feature];
|
||||
return feat_index_[iteration % model.learner_model_param_->num_feature];
|
||||
}
|
||||
|
||||
protected:
|
||||
@@ -291,7 +291,7 @@ class RandomFeatureSelector : public FeatureSelector {
|
||||
int NextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda) override {
|
||||
return common::GlobalRandom()() % model.param.num_feature;
|
||||
return common::GlobalRandom()() % model.learner_model_param_->num_feature;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -310,11 +310,11 @@ class GreedyFeatureSelector : public FeatureSelector {
|
||||
const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda, int param) override {
|
||||
top_k_ = static_cast<bst_uint>(param);
|
||||
const bst_uint ngroup = model.param.num_output_group;
|
||||
const bst_uint ngroup = model.learner_model_param_->num_output_group;
|
||||
if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
|
||||
if (counter_.size() == 0) {
|
||||
counter_.resize(ngroup);
|
||||
gpair_sums_.resize(model.param.num_feature * ngroup);
|
||||
gpair_sums_.resize(model.learner_model_param_->num_feature * ngroup);
|
||||
}
|
||||
for (bst_uint gid = 0u; gid < ngroup; ++gid) {
|
||||
counter_[gid] = 0u;
|
||||
@@ -327,10 +327,10 @@ class GreedyFeatureSelector : public FeatureSelector {
|
||||
// k-th selected feature for a group
|
||||
auto k = counter_[group_idx]++;
|
||||
// stop after either reaching top-K or going through all the features in a group
|
||||
if (k >= top_k_ || counter_[group_idx] == model.param.num_feature) return -1;
|
||||
if (k >= top_k_ || counter_[group_idx] == model.learner_model_param_->num_feature) return -1;
|
||||
|
||||
const int ngroup = model.param.num_output_group;
|
||||
const bst_omp_uint nfeat = model.param.num_feature;
|
||||
const int ngroup = model.learner_model_param_->num_output_group;
|
||||
const bst_omp_uint nfeat = model.learner_model_param_->num_feature;
|
||||
// Calculate univariate gradient sums
|
||||
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
@@ -387,8 +387,8 @@ class ThriftyFeatureSelector : public FeatureSelector {
|
||||
DMatrix *p_fmat, float alpha, float lambda, int param) override {
|
||||
top_k_ = static_cast<bst_uint>(param);
|
||||
if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
|
||||
const bst_uint ngroup = model.param.num_output_group;
|
||||
const bst_omp_uint nfeat = model.param.num_feature;
|
||||
const bst_uint ngroup = model.learner_model_param_->num_output_group;
|
||||
const bst_omp_uint nfeat = model.learner_model_param_->num_feature;
|
||||
|
||||
if (deltaw_.size() == 0) {
|
||||
deltaw_.resize(nfeat * ngroup);
|
||||
@@ -444,9 +444,9 @@ class ThriftyFeatureSelector : public FeatureSelector {
|
||||
// k-th selected feature for a group
|
||||
auto k = counter_[group_idx]++;
|
||||
// stop after either reaching top-N or going through all the features in a group
|
||||
if (k >= top_k_ || counter_[group_idx] == model.param.num_feature) return -1;
|
||||
if (k >= top_k_ || counter_[group_idx] == model.learner_model_param_->num_feature) return -1;
|
||||
// note that sorted_idx stores the "long" indices
|
||||
const size_t grp_offset = group_idx * model.param.num_feature;
|
||||
const size_t grp_offset = group_idx * model.learner_model_param_->num_feature;
|
||||
return static_cast<int>(sorted_idx_[grp_offset + k] - grp_offset);
|
||||
}
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ class CoordinateUpdater : public LinearUpdater {
|
||||
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
|
||||
gbm::GBLinearModel *model, double sum_instance_weight) override {
|
||||
tparam_.DenormalizePenalties(sum_instance_weight);
|
||||
const int ngroup = model->param.num_output_group;
|
||||
const int ngroup = model->learner_model_param_->num_output_group;
|
||||
// update bias
|
||||
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
|
||||
auto grad = GetBiasGradientParallel(group_idx, ngroup,
|
||||
@@ -52,7 +52,7 @@ class CoordinateUpdater : public LinearUpdater {
|
||||
tparam_.reg_lambda_denorm, cparam_.top_k);
|
||||
// update weights
|
||||
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
|
||||
for (unsigned i = 0U; i < model->param.num_feature; i++) {
|
||||
for (unsigned i = 0U; i < model->learner_model_param_->num_feature; i++) {
|
||||
int fidx = selector_->NextFeature
|
||||
(i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
|
||||
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
|
||||
@@ -65,7 +65,7 @@ class CoordinateUpdater : public LinearUpdater {
|
||||
|
||||
inline void UpdateFeature(int fidx, int group_idx, std::vector<GradientPair> *in_gpair,
|
||||
DMatrix *p_fmat, gbm::GBLinearModel *model) {
|
||||
const int ngroup = model->param.num_output_group;
|
||||
const int ngroup = model->learner_model_param_->num_output_group;
|
||||
bst_float &w = (*model)[fidx][group_idx];
|
||||
auto gradient =
|
||||
GetGradientParallel(group_idx, ngroup, fidx, *in_gpair, p_fmat);
|
||||
|
||||
@@ -41,7 +41,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
monitor_.Init("GPUCoordinateUpdater");
|
||||
}
|
||||
|
||||
void LazyInitDevice(DMatrix *p_fmat, const gbm::GBLinearModelParam &model_param) {
|
||||
void LazyInitDevice(DMatrix *p_fmat, const LearnerModelParam &model_param) {
|
||||
if (learner_param_->gpu_id < 0) return;
|
||||
|
||||
num_row_ = static_cast<size_t>(p_fmat->Info().num_row_);
|
||||
@@ -88,14 +88,14 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
gbm::GBLinearModel *model, double sum_instance_weight) override {
|
||||
tparam_.DenormalizePenalties(sum_instance_weight);
|
||||
monitor_.Start("LazyInitDevice");
|
||||
this->LazyInitDevice(p_fmat, model->param);
|
||||
this->LazyInitDevice(p_fmat, *(model->learner_model_param_));
|
||||
monitor_.Stop("LazyInitDevice");
|
||||
|
||||
monitor_.Start("UpdateGpair");
|
||||
auto &in_gpair_host = in_gpair->ConstHostVector();
|
||||
// Update gpair
|
||||
if (learner_param_->gpu_id >= 0) {
|
||||
this->UpdateGpair(in_gpair_host, model->param);
|
||||
this->UpdateGpair(in_gpair_host);
|
||||
}
|
||||
monitor_.Stop("UpdateGpair");
|
||||
|
||||
@@ -107,8 +107,9 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm,
|
||||
coord_param_.top_k);
|
||||
monitor_.Start("UpdateFeature");
|
||||
for (auto group_idx = 0; group_idx < model->param.num_output_group; ++group_idx) {
|
||||
for (auto i = 0U; i < model->param.num_feature; i++) {
|
||||
for (auto group_idx = 0; group_idx < model->learner_model_param_->num_output_group;
|
||||
++group_idx) {
|
||||
for (auto i = 0U; i < model->learner_model_param_->num_feature; i++) {
|
||||
auto fidx = selector_->NextFeature(
|
||||
i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
|
||||
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
|
||||
@@ -120,11 +121,12 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
}
|
||||
|
||||
void UpdateBias(DMatrix *p_fmat, gbm::GBLinearModel *model) {
|
||||
for (int group_idx = 0; group_idx < model->param.num_output_group; ++group_idx) {
|
||||
for (int group_idx = 0; group_idx < model->learner_model_param_->num_output_group;
|
||||
++group_idx) {
|
||||
// Get gradient
|
||||
auto grad = GradientPair(0, 0);
|
||||
if (learner_param_->gpu_id >= 0) {
|
||||
grad = GetBiasGradient(group_idx, model->param.num_output_group);
|
||||
grad = GetBiasGradient(group_idx, model->learner_model_param_->num_output_group);
|
||||
}
|
||||
auto dbias = static_cast<float>(
|
||||
tparam_.learning_rate *
|
||||
@@ -133,7 +135,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
|
||||
// Update residual
|
||||
if (learner_param_->gpu_id >= 0) {
|
||||
UpdateBiasResidual(dbias, group_idx, model->param.num_output_group);
|
||||
UpdateBiasResidual(dbias, group_idx, model->learner_model_param_->num_output_group);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -145,7 +147,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
// Get gradient
|
||||
auto grad = GradientPair(0, 0);
|
||||
if (learner_param_->gpu_id >= 0) {
|
||||
grad = GetGradient(group_idx, model->param.num_output_group, fidx);
|
||||
grad = GetGradient(group_idx, model->learner_model_param_->num_output_group, fidx);
|
||||
}
|
||||
auto dw = static_cast<float>(tparam_.learning_rate *
|
||||
CoordinateDelta(grad.GetGrad(), grad.GetHess(),
|
||||
@@ -154,7 +156,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
w += dw;
|
||||
|
||||
if (learner_param_->gpu_id >= 0) {
|
||||
UpdateResidual(dw, group_idx, model->param.num_output_group, fidx);
|
||||
UpdateResidual(dw, group_idx, model->learner_model_param_->num_output_group, fidx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -217,8 +219,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
return num_row_ == 0;
|
||||
}
|
||||
|
||||
void UpdateGpair(const std::vector<GradientPair> &host_gpair,
|
||||
const gbm::GBLinearModelParam &model_param) {
|
||||
void UpdateGpair(const std::vector<GradientPair> &host_gpair) {
|
||||
dh::safe_cuda(cudaMemcpyAsync(
|
||||
gpair_.data(),
|
||||
host_gpair.data(),
|
||||
|
||||
@@ -27,7 +27,7 @@ class ShotgunUpdater : public LinearUpdater {
|
||||
gbm::GBLinearModel *model, double sum_instance_weight) override {
|
||||
auto &gpair = in_gpair->HostVector();
|
||||
param_.DenormalizePenalties(sum_instance_weight);
|
||||
const int ngroup = model->param.num_output_group;
|
||||
const int ngroup = model->learner_model_param_->num_output_group;
|
||||
|
||||
// update bias
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
|
||||
Reference in New Issue
Block a user