From ad3a0bbab8eb813cfca897945bba01c688ff59d3 Mon Sep 17 00:00:00 2001 From: Vadim Khotilovich Date: Wed, 12 Sep 2018 08:43:41 -0500 Subject: [PATCH] Add the missing max_delta_step (#3668) * add max_delta_step to SplitEvaluator * test for max_delta_step * missing x2 factor for L1 term * remove gamma from ElasticNet --- R-package/tests/testthat/test_basic.R | 14 ++++++++++++ src/tree/split_evaluator.cc | 31 ++++++++++++++++++--------- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index e7a6679d3..c9cf73581 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -223,3 +223,17 @@ test_that("train and predict with non-strict classes", { expect_error(pr <- predict(bst, train_dense), regexp = NA) expect_equal(pr0, pr) }) + +test_that("max_delta_step works", { + dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) + watchlist <- list(train = dtrain) + param <- list(objective = "binary:logistic", eval_metric="logloss", max_depth = 2, nthread = 2, eta = 0.5) + nrounds = 5 + # model with no restriction on max_delta_step + bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1) + # model with restricted max_delta_step + bst2 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1, max_delta_step = 1) + # the no-restriction model is expected to have consistently lower loss during the initial interations + expect_true(all(bst1$evaluation_log$train_logloss < bst2$evaluation_log$train_logloss)) + expect_lt(mean(bst1$evaluation_log$train_logloss)/mean(bst2$evaluation_log$train_logloss), 0.8) +}) diff --git a/src/tree/split_evaluator.cc b/src/tree/split_evaluator.cc index 1c161d7ea..8b67ab107 100644 --- a/src/tree/split_evaluator.cc +++ b/src/tree/split_evaluator.cc @@ -63,7 +63,10 @@ bst_float SplitEvaluator::ComputeSplitScore(bst_uint nodeid, struct ElasticNetParams : public dmlc::Parameter { bst_float reg_lambda; bst_float reg_alpha; - bst_float reg_gamma; + // maximum delta update we can add in weight estimation + // this parameter can be used to stabilize update + // default=0 means no constraint on weight delta + float max_delta_step; DMLC_DECLARE_PARAMETER(ElasticNetParams) { DMLC_DECLARE_FIELD(reg_lambda) @@ -74,13 +77,13 @@ struct ElasticNetParams : public dmlc::Parameter { .set_lower_bound(0.0) .set_default(0.0) .describe("L1 regularization on leaf weight"); - DMLC_DECLARE_FIELD(reg_gamma) - .set_lower_bound(0.0) - .set_default(0.0) - .describe("Cost incurred by adding a new leaf node to the tree"); + DMLC_DECLARE_FIELD(max_delta_step) + .set_lower_bound(0.0f) + .set_default(0.0f) + .describe("Maximum delta step we allow each tree's weight estimate to be. "\ + "If the value is set to 0, it means there is no constraint"); DMLC_DECLARE_ALIAS(reg_lambda, lambda); DMLC_DECLARE_ALIAS(reg_alpha, alpha); - DMLC_DECLARE_ALIAS(reg_gamma, gamma); } }; @@ -127,17 +130,25 @@ class ElasticNet final : public SplitEvaluator { const override { auto loss = weight * (2.0 * stats.sum_grad + stats.sum_hess * weight + params_.reg_lambda * weight) - + params_.reg_alpha * std::abs(weight); + + 2.0 * params_.reg_alpha * std::abs(weight); return -loss; } bst_float ComputeScore(bst_uint parentID, const GradStats &stats) const { - return Sqr(ThresholdL1(stats.sum_grad)) / (stats.sum_hess + params_.reg_lambda); + if (params_.max_delta_step == 0.0f) { + return Sqr(ThresholdL1(stats.sum_grad)) / (stats.sum_hess + params_.reg_lambda); + } else { + return ComputeScore(parentID, stats, ComputeWeight(parentID, stats)); + } } bst_float ComputeWeight(bst_uint parentID, const GradStats& stats) const override { - return -ThresholdL1(stats.sum_grad) / (stats.sum_hess + params_.reg_lambda); + bst_float w = -ThresholdL1(stats.sum_grad) / (stats.sum_hess + params_.reg_lambda); + if (params_.max_delta_step != 0.0f && std::abs(w) > params_.max_delta_step) { + w = std::copysign(params_.max_delta_step, w); + } + return w; } private: @@ -155,7 +166,7 @@ class ElasticNet final : public SplitEvaluator { }; XGBOOST_REGISTER_SPLIT_EVALUATOR(ElasticNet, "elastic_net") -.describe("Use an elastic net regulariser and a cost per leaf node") +.describe("Use an elastic net regulariser") .set_body([](std::unique_ptr inner) { return new ElasticNet(std::move(inner)); });