Add the missing max_delta_step (#3668)
* add max_delta_step to SplitEvaluator * test for max_delta_step * missing x2 factor for L1 term * remove gamma from ElasticNet
This commit is contained in:
parent
d1e75d615e
commit
ad3a0bbab8
@ -223,3 +223,17 @@ test_that("train and predict with non-strict classes", {
|
|||||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||||
expect_equal(pr0, pr)
|
expect_equal(pr0, pr)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test_that("max_delta_step works", {
|
||||||
|
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||||
|
watchlist <- list(train = dtrain)
|
||||||
|
param <- list(objective = "binary:logistic", eval_metric="logloss", max_depth = 2, nthread = 2, eta = 0.5)
|
||||||
|
nrounds = 5
|
||||||
|
# model with no restriction on max_delta_step
|
||||||
|
bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
|
||||||
|
# model with restricted max_delta_step
|
||||||
|
bst2 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1, max_delta_step = 1)
|
||||||
|
# the no-restriction model is expected to have consistently lower loss during the initial interations
|
||||||
|
expect_true(all(bst1$evaluation_log$train_logloss < bst2$evaluation_log$train_logloss))
|
||||||
|
expect_lt(mean(bst1$evaluation_log$train_logloss)/mean(bst2$evaluation_log$train_logloss), 0.8)
|
||||||
|
})
|
||||||
|
|||||||
@ -63,7 +63,10 @@ bst_float SplitEvaluator::ComputeSplitScore(bst_uint nodeid,
|
|||||||
struct ElasticNetParams : public dmlc::Parameter<ElasticNetParams> {
|
struct ElasticNetParams : public dmlc::Parameter<ElasticNetParams> {
|
||||||
bst_float reg_lambda;
|
bst_float reg_lambda;
|
||||||
bst_float reg_alpha;
|
bst_float reg_alpha;
|
||||||
bst_float reg_gamma;
|
// maximum delta update we can add in weight estimation
|
||||||
|
// this parameter can be used to stabilize update
|
||||||
|
// default=0 means no constraint on weight delta
|
||||||
|
float max_delta_step;
|
||||||
|
|
||||||
DMLC_DECLARE_PARAMETER(ElasticNetParams) {
|
DMLC_DECLARE_PARAMETER(ElasticNetParams) {
|
||||||
DMLC_DECLARE_FIELD(reg_lambda)
|
DMLC_DECLARE_FIELD(reg_lambda)
|
||||||
@ -74,13 +77,13 @@ struct ElasticNetParams : public dmlc::Parameter<ElasticNetParams> {
|
|||||||
.set_lower_bound(0.0)
|
.set_lower_bound(0.0)
|
||||||
.set_default(0.0)
|
.set_default(0.0)
|
||||||
.describe("L1 regularization on leaf weight");
|
.describe("L1 regularization on leaf weight");
|
||||||
DMLC_DECLARE_FIELD(reg_gamma)
|
DMLC_DECLARE_FIELD(max_delta_step)
|
||||||
.set_lower_bound(0.0)
|
.set_lower_bound(0.0f)
|
||||||
.set_default(0.0)
|
.set_default(0.0f)
|
||||||
.describe("Cost incurred by adding a new leaf node to the tree");
|
.describe("Maximum delta step we allow each tree's weight estimate to be. "\
|
||||||
|
"If the value is set to 0, it means there is no constraint");
|
||||||
DMLC_DECLARE_ALIAS(reg_lambda, lambda);
|
DMLC_DECLARE_ALIAS(reg_lambda, lambda);
|
||||||
DMLC_DECLARE_ALIAS(reg_alpha, alpha);
|
DMLC_DECLARE_ALIAS(reg_alpha, alpha);
|
||||||
DMLC_DECLARE_ALIAS(reg_gamma, gamma);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -127,17 +130,25 @@ class ElasticNet final : public SplitEvaluator {
|
|||||||
const override {
|
const override {
|
||||||
auto loss = weight * (2.0 * stats.sum_grad + stats.sum_hess * weight
|
auto loss = weight * (2.0 * stats.sum_grad + stats.sum_hess * weight
|
||||||
+ params_.reg_lambda * weight)
|
+ params_.reg_lambda * weight)
|
||||||
+ params_.reg_alpha * std::abs(weight);
|
+ 2.0 * params_.reg_alpha * std::abs(weight);
|
||||||
return -loss;
|
return -loss;
|
||||||
}
|
}
|
||||||
|
|
||||||
bst_float ComputeScore(bst_uint parentID, const GradStats &stats) const {
|
bst_float ComputeScore(bst_uint parentID, const GradStats &stats) const {
|
||||||
|
if (params_.max_delta_step == 0.0f) {
|
||||||
return Sqr(ThresholdL1(stats.sum_grad)) / (stats.sum_hess + params_.reg_lambda);
|
return Sqr(ThresholdL1(stats.sum_grad)) / (stats.sum_hess + params_.reg_lambda);
|
||||||
|
} else {
|
||||||
|
return ComputeScore(parentID, stats, ComputeWeight(parentID, stats));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bst_float ComputeWeight(bst_uint parentID, const GradStats& stats)
|
bst_float ComputeWeight(bst_uint parentID, const GradStats& stats)
|
||||||
const override {
|
const override {
|
||||||
return -ThresholdL1(stats.sum_grad) / (stats.sum_hess + params_.reg_lambda);
|
bst_float w = -ThresholdL1(stats.sum_grad) / (stats.sum_hess + params_.reg_lambda);
|
||||||
|
if (params_.max_delta_step != 0.0f && std::abs(w) > params_.max_delta_step) {
|
||||||
|
w = std::copysign(params_.max_delta_step, w);
|
||||||
|
}
|
||||||
|
return w;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -155,7 +166,7 @@ class ElasticNet final : public SplitEvaluator {
|
|||||||
};
|
};
|
||||||
|
|
||||||
XGBOOST_REGISTER_SPLIT_EVALUATOR(ElasticNet, "elastic_net")
|
XGBOOST_REGISTER_SPLIT_EVALUATOR(ElasticNet, "elastic_net")
|
||||||
.describe("Use an elastic net regulariser and a cost per leaf node")
|
.describe("Use an elastic net regulariser")
|
||||||
.set_body([](std::unique_ptr<SplitEvaluator> inner) {
|
.set_body([](std::unique_ptr<SplitEvaluator> inner) {
|
||||||
return new ElasticNet(std::move(inner));
|
return new ElasticNet(std::move(inner));
|
||||||
});
|
});
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user