diff --git a/src/tree/param.h b/src/tree/param.h index 405a29c5a..5621c3e8d 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -187,7 +187,7 @@ struct TrainParam : public dmlc::Parameter { .set_default(1) .describe("Number of GPUs to use for multi-gpu algorithms: -1=use all GPUs"); DMLC_DECLARE_FIELD(split_evaluator) - .set_default("monotonic") + .set_default("elastic_net,monotonic") .describe("The criteria to use for ranking splits"); // add alias of parameters DMLC_DECLARE_ALIAS(reg_lambda, lambda); diff --git a/src/tree/split_evaluator.cc b/src/tree/split_evaluator.cc index b1951e261..f30433b0d 100644 --- a/src/tree/split_evaluator.cc +++ b/src/tree/split_evaluator.cc @@ -8,13 +8,12 @@ #include #include #include +#include #include #include "param.h" #include "../common/common.h" #include "../common/host_device_vector.h" -#define ROOT_PARENT_ID (-1 & ((1U << 31) - 1)) - namespace dmlc { DMLC_REGISTRY_ENABLE(::xgboost::tree::SplitEvaluatorReg); } // namespace dmlc @@ -23,12 +22,19 @@ namespace xgboost { namespace tree { SplitEvaluator* SplitEvaluator::Create(const std::string& name) { - auto* e = ::dmlc::Registry< ::xgboost::tree::SplitEvaluatorReg> - ::Get()->Find(name); - if (e == nullptr) { - LOG(FATAL) << "Unknown SplitEvaluator " << name; + std::stringstream ss(name); + std::string item; + SplitEvaluator* eval = nullptr; + // Construct a chain of SplitEvaluators. This allows one to specify multiple constraints. + while (std::getline(ss, item, ',')) { + auto* e = ::dmlc::Registry< ::xgboost::tree::SplitEvaluatorReg> + ::Get()->Find(item); + if (e == nullptr) { + LOG(FATAL) << "Unknown SplitEvaluator " << name; + } + eval = (e->body)(std::unique_ptr(eval)); } - return (e->body)(); + return eval; } // Default implementations of some virtual methods that aren't always needed @@ -41,38 +47,57 @@ void SplitEvaluator::AddSplit(bst_uint nodeid, bst_uint featureid, bst_float leftweight, bst_float rightweight) {} +bst_float SplitEvaluator::ComputeSplitScore(bst_uint nodeid, + bst_uint featureid, + const GradStats& left_stats, + const GradStats& right_stats) const { + bst_float left_weight = ComputeWeight(nodeid, left_stats); + bst_float right_weight = ComputeWeight(nodeid, right_stats); + return ComputeSplitScore(nodeid, featureid, left_stats, right_stats, left_weight, right_weight); +} -//! \brief Encapsulates the parameters for by the RidgePenalty -struct RidgePenaltyParams : public dmlc::Parameter { - float reg_lambda; - float reg_gamma; +//! \brief Encapsulates the parameters for ElasticNet +struct ElasticNetParams : public dmlc::Parameter { + bst_float reg_lambda; + bst_float reg_alpha; + bst_float reg_gamma; - DMLC_DECLARE_PARAMETER(RidgePenaltyParams) { + DMLC_DECLARE_PARAMETER(ElasticNetParams) { DMLC_DECLARE_FIELD(reg_lambda) .set_lower_bound(0.0) .set_default(1.0) .describe("L2 regularization on leaf weight"); + DMLC_DECLARE_FIELD(reg_alpha) + .set_lower_bound(0.0) + .set_default(0.0) + .describe("L1 regularization on leaf weight"); DMLC_DECLARE_FIELD(reg_gamma) - .set_lower_bound(0.0f) - .set_default(0.0f) + .set_lower_bound(0.0) + .set_default(0.0) .describe("Cost incurred by adding a new leaf node to the tree"); DMLC_DECLARE_ALIAS(reg_lambda, lambda); + DMLC_DECLARE_ALIAS(reg_alpha, alpha); DMLC_DECLARE_ALIAS(reg_gamma, gamma); } }; -DMLC_REGISTER_PARAMETER(RidgePenaltyParams); +DMLC_REGISTER_PARAMETER(ElasticNetParams); -/*! \brief Applies an L2 penalty and per-leaf penalty. */ -class RidgePenalty final : public SplitEvaluator { +/*! \brief Applies an elastic net penalty and per-leaf penalty. */ +class ElasticNet final : public SplitEvaluator { public: + explicit ElasticNet(std::unique_ptr inner) { + if (inner) { + LOG(FATAL) << "ElasticNet does not accept an inner SplitEvaluator"; + } + } void Init( const std::vector >& args) override { params_.InitAllowUnknown(args); } SplitEvaluator* GetHostClone() const override { - auto r = new RidgePenalty(); + auto r = new ElasticNet(nullptr); r->params_ = this->params_; return r; @@ -80,31 +105,55 @@ class RidgePenalty final : public SplitEvaluator { bst_float ComputeSplitScore(bst_uint nodeid, bst_uint featureid, - const GradStats& left, - const GradStats& right) const override { - // parentID is not needed for this split evaluator. Just use 0. - return ComputeScore(0, left) + ComputeScore(0, right); + const GradStats& left_stats, + const GradStats& right_stats, + bst_float left_weight, + bst_float right_weight) const override { + return ComputeScore(nodeid, left_stats, left_weight) + + ComputeScore(nodeid, right_stats, right_weight); } - bst_float ComputeScore(bst_uint parentID, const GradStats& stats) + bst_float ComputeSplitScore(bst_uint nodeid, + bst_uint featureid, + const GradStats& left_stats, + const GradStats& right_stats) const override { + return ComputeScore(nodeid, left_stats) + ComputeScore(nodeid, right_stats); + } + + bst_float ComputeScore(bst_uint parentID, const GradStats &stats, bst_float weight) const override { - return (stats.sum_grad * stats.sum_grad) - / (stats.sum_hess + params_.reg_lambda) - params_.reg_gamma; + auto loss = weight * (2.0 * stats.sum_grad + stats.sum_hess * weight + + params_.reg_lambda * weight) + + params_.reg_alpha * std::abs(weight); + return -loss; + } + + bst_float ComputeScore(bst_uint parentID, const GradStats &stats) const { + return Sqr(ThresholdL1(stats.sum_grad)) / (stats.sum_hess + params_.reg_lambda); } bst_float ComputeWeight(bst_uint parentID, const GradStats& stats) const override { - return -stats.sum_grad / (stats.sum_hess + params_.reg_lambda); + return -ThresholdL1(stats.sum_grad) / (stats.sum_hess + params_.reg_lambda); } private: - RidgePenaltyParams params_; + ElasticNetParams params_; + + inline double ThresholdL1(double g) const { + if (g > params_.reg_alpha) { + g = g - params_.reg_alpha; + } else if (g < -params_.reg_alpha) { + g = g + params_.reg_alpha; + } + return g; + } }; -XGBOOST_REGISTER_SPLIT_EVALUATOR(RidgePenalty, "ridge") -.describe("Use an L2 penalty term for the weights and a cost per leaf node") -.set_body([]() { - return new RidgePenalty(); +XGBOOST_REGISTER_SPLIT_EVALUATOR(ElasticNet, "elastic_net") +.describe("Use an elastic net regulariser and a cost per leaf node") +.set_body([](std::unique_ptr inner) { + return new ElasticNet(std::move(inner)); }); /*! \brief Encapsulates the parameters required by the MonotonicConstraint @@ -113,23 +162,11 @@ XGBOOST_REGISTER_SPLIT_EVALUATOR(RidgePenalty, "ridge") struct MonotonicConstraintParams : public dmlc::Parameter { std::vector monotone_constraints; - float reg_lambda; - float reg_gamma; DMLC_DECLARE_PARAMETER(MonotonicConstraintParams) { - DMLC_DECLARE_FIELD(reg_lambda) - .set_lower_bound(0.0) - .set_default(1.0) - .describe("L2 regularization on leaf weight"); - DMLC_DECLARE_FIELD(reg_gamma) - .set_lower_bound(0.0f) - .set_default(0.0f) - .describe("Cost incurred by adding a new leaf node to the tree"); DMLC_DECLARE_FIELD(monotone_constraints) .set_default(std::vector()) .describe("Constraint of variable monotonicity"); - DMLC_DECLARE_ALIAS(reg_lambda, lambda); - DMLC_DECLARE_ALIAS(reg_gamma, gamma); } }; @@ -140,8 +177,16 @@ DMLC_REGISTER_PARAMETER(MonotonicConstraintParams); */ class MonotonicConstraint final : public SplitEvaluator { public: + explicit MonotonicConstraint(std::unique_ptr inner) { + if (!inner) { + LOG(FATAL) << "MonotonicConstraint must be given an inner evaluator"; + } + inner_ = std::move(inner); + } + void Init(const std::vector >& args) override { + inner_->Init(args); params_.InitAllowUnknown(args); Reset(); } @@ -153,22 +198,11 @@ class MonotonicConstraint final : public SplitEvaluator { SplitEvaluator* GetHostClone() const override { if (params_.monotone_constraints.size() == 0) { - // No monotone constraints specified, make a RidgePenalty evaluator - using std::pair; - using std::string; - using std::to_string; - using std::vector; - auto c = new RidgePenalty(); - vector > args; - args.emplace_back( - pair("reg_lambda", to_string(params_.reg_lambda))); - args.emplace_back( - pair("reg_gamma", to_string(params_.reg_gamma))); - c->Init(args); - c->Reset(); - return c; + // No monotone constraints specified, just return a clone of inner to speed things up + return inner_->GetHostClone(); } else { - auto c = new MonotonicConstraint(); + auto c = new MonotonicConstraint( + std::unique_ptr(inner_->GetHostClone())); c->params_ = this->params_; c->Reset(); return c; @@ -177,35 +211,32 @@ class MonotonicConstraint final : public SplitEvaluator { bst_float ComputeSplitScore(bst_uint nodeid, bst_uint featureid, - const GradStats& left, - const GradStats& right) const override { + const GradStats& left_stats, + const GradStats& right_stats, + bst_float left_weight, + bst_float right_weight) const override { bst_float infinity = std::numeric_limits::infinity(); bst_int constraint = GetConstraint(featureid); - - bst_float score = ComputeScore(nodeid, left) + ComputeScore(nodeid, right); - bst_float leftweight = ComputeWeight(nodeid, left); - bst_float rightweight = ComputeWeight(nodeid, right); + bst_float score = inner_->ComputeSplitScore( + nodeid, featureid, left_stats, right_stats, left_weight, right_weight); if (constraint == 0) { return score; } else if (constraint > 0) { - return leftweight <= rightweight ? score : -infinity; + return left_weight <= right_weight ? score : -infinity; } else { - return leftweight >= rightweight ? score : -infinity; + return left_weight >= right_weight ? score : -infinity; } } - bst_float ComputeScore(bst_uint parentID, const GradStats& stats) + bst_float ComputeScore(bst_uint parentID, const GradStats& stats, bst_float weight) const override { - bst_float w = ComputeWeight(parentID, stats); - - return -(2.0 * stats.sum_grad * w + (stats.sum_hess + params_.reg_lambda) - * w * w); + return inner_->ComputeScore(parentID, stats, weight); } bst_float ComputeWeight(bst_uint parentID, const GradStats& stats) const override { - bst_float weight = -stats.sum_grad / (stats.sum_hess + params_.reg_lambda); + bst_float weight = inner_->ComputeWeight(parentID, stats); if (parentID == ROOT_PARENT_ID) { // This is the root node @@ -225,6 +256,7 @@ class MonotonicConstraint final : public SplitEvaluator { bst_uint featureid, bst_float leftweight, bst_float rightweight) override { + inner_->AddSplit(nodeid, leftid, rightid, featureid, leftweight, rightweight); bst_uint newsize = std::max(leftid, rightid) + 1; lower_.resize(newsize); upper_.resize(newsize); @@ -250,6 +282,7 @@ class MonotonicConstraint final : public SplitEvaluator { private: MonotonicConstraintParams params_; + std::unique_ptr inner_; std::vector lower_; std::vector upper_; @@ -265,8 +298,8 @@ class MonotonicConstraint final : public SplitEvaluator { XGBOOST_REGISTER_SPLIT_EVALUATOR(MonotonicConstraint, "monotonic") .describe("Enforces that the tree is monotonically increasing/decreasing " "w.r.t. specified features") -.set_body([]() { - return new MonotonicConstraint(); +.set_body([](std::unique_ptr inner) { + return new MonotonicConstraint(std::move(inner)); }); } // namespace tree diff --git a/src/tree/split_evaluator.h b/src/tree/split_evaluator.h index 9af363806..968d6fe77 100644 --- a/src/tree/split_evaluator.h +++ b/src/tree/split_evaluator.h @@ -15,6 +15,8 @@ #include #include +#define ROOT_PARENT_ID (-1 & ((1U << 31) - 1)) + namespace xgboost { namespace tree { @@ -40,13 +42,21 @@ class SplitEvaluator { // Computes the score (negative loss) resulting from performing this split virtual bst_float ComputeSplitScore(bst_uint nodeid, - bst_uint featureid, - const GradStats& left, - const GradStats& right) const = 0; + bst_uint featureid, + const GradStats& left_stats, + const GradStats& right_stats, + bst_float left_weight, + bst_float right_weight) const = 0; + + virtual bst_float ComputeSplitScore(bst_uint nodeid, + bst_uint featureid, + const GradStats& left_stats, + const GradStats& right_stats) const; // Compute the Score for a node with the given stats - virtual bst_float ComputeScore(bst_uint parentid, const GradStats& stats) - const = 0; + virtual bst_float ComputeScore(bst_uint parentid, + const GradStats &stats, + bst_float weight) const = 0; // Compute the weight for a node with the given stats virtual bst_float ComputeWeight(bst_uint parentid, const GradStats& stats) @@ -62,7 +72,7 @@ class SplitEvaluator { struct SplitEvaluatorReg : public dmlc::FunctionRegEntryBase > {}; + std::function)> > {}; /*! * \brief Macro to register tree split evaluator. diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc index d77fc7d9c..df9a9a453 100644 --- a/src/tree/updater_colmaker.cc +++ b/src/tree/updater_colmaker.cc @@ -243,10 +243,10 @@ class ColMaker: public TreeUpdater { // calculating the weights for (int nid : qexpand) { bst_uint parentid = tree[nid].Parent(); - snode_[nid].root_gain = static_cast( - spliteval_->ComputeScore(parentid, snode_[nid].stats)); snode_[nid].weight = static_cast( spliteval_->ComputeWeight(parentid, snode_[nid].stats)); + snode_[nid].root_gain = static_cast( + spliteval_->ComputeScore(parentid, snode_[nid].stats, snode_[nid].weight)); } } /*! \brief update queue expand add in new leaves */ diff --git a/src/tree/updater_fast_hist.cc b/src/tree/updater_fast_hist.cc index 2fb9f80b1..4610bdd38 100644 --- a/src/tree/updater_fast_hist.cc +++ b/src/tree/updater_fast_hist.cc @@ -752,10 +752,10 @@ class FastHistMaker: public TreeUpdater { // calculating the weights { bst_uint parentid = tree[nid].Parent(); - snode_[nid].root_gain = static_cast( - spliteval_->ComputeScore(parentid, snode_[nid].stats)); snode_[nid].weight = static_cast( spliteval_->ComputeWeight(parentid, snode_[nid].stats)); + snode_[nid].root_gain = static_cast( + spliteval_->ComputeScore(parentid, snode_[nid].stats, snode_[nid].weight)); } } diff --git a/tests/python/test_tree_regularization.py b/tests/python/test_tree_regularization.py new file mode 100644 index 000000000..2b342d687 --- /dev/null +++ b/tests/python/test_tree_regularization.py @@ -0,0 +1,60 @@ +import numpy as np +import unittest +import xgboost as xgb + +from numpy.testing import assert_approx_equal + +train_data = xgb.DMatrix(np.array([[1]]), label=np.array([1])) + + +class TestTreeRegularization(unittest.TestCase): + def test_alpha(self): + params = { + 'tree_method': 'exact', 'silent': 1, 'objective': 'reg:linear', + 'eta': 1, + 'lambda': 0, + 'alpha': 0.1 + } + + model = xgb.train(params, train_data, 1) + preds = model.predict(train_data) + + # Default prediction (with no trees) is 0.5 + # sum_grad = (0.5 - 1.0) + # sum_hess = 1.0 + # 0.9 = 0.5 - (sum_grad - alpha * sgn(sum_grad)) / sum_hess + assert_approx_equal(preds[0], 0.9) + + def test_lambda(self): + params = { + 'tree_method': 'exact', 'silent': 1, 'objective': 'reg:linear', + 'eta': 1, + 'lambda': 1, + 'alpha': 0 + } + + model = xgb.train(params, train_data, 1) + preds = model.predict(train_data) + + # Default prediction (with no trees) is 0.5 + # sum_grad = (0.5 - 1.0) + # sum_hess = 1.0 + # 0.75 = 0.5 - sum_grad / (sum_hess + lambda) + assert_approx_equal(preds[0], 0.75) + + def test_alpha_and_lambda(self): + params = { + 'tree_method': 'exact', 'silent': 1, 'objective': 'reg:linear', + 'eta': 1, + 'lambda': 1, + 'alpha': 0.1 + } + + model = xgb.train(params, train_data, 1) + preds = model.predict(train_data) + + # Default prediction (with no trees) is 0.5 + # sum_grad = (0.5 - 1.0) + # sum_hess = 1.0 + # 0.7 = 0.5 - (sum_grad - alpha * sgn(sum_grad)) / (sum_hess + lambda) + assert_approx_equal(preds[0], 0.7)