* Refactor configuration [Part II].
* General changes:
** Remove `Init` methods to avoid ambiguity.
** Remove `Configure(std::map<>)` to avoid redundant copying and prepare for
parameter validation. (`std::vector` is returned from `InitAllowUnknown`).
** Add name to tree updaters for easier debugging.
* Learner changes:
** Make `LearnerImpl` the only source of configuration.
All configurations are stored and carried out by `LearnerImpl::Configure()`.
** Remove booster in C API.
Originally kept for "compatibility reason", but did not state why. So here
we just remove it.
** Add a `metric_names_` field in `LearnerImpl`.
** Remove `LazyInit`. Configuration will always be lazy.
** Run `Configure` before every iteration.
* Predictor changes:
** Allocate both cpu and gpu predictor.
** Remove cpu_predictor from gpu_predictor.
`GBTree` is now used to dispatch the predictor.
** Remove some GPU Predictor tests.
* IO
No IO changes. The binary model format stability is tested by comparing
hashing value of save models between two commits
97 lines
2.7 KiB
C++
97 lines
2.7 KiB
C++
/*!
|
|
* Copyright 2014 by Contributors
|
|
* \file updater_prune.cc
|
|
* \brief prune a tree given the statistics
|
|
* \author Tianqi Chen
|
|
*/
|
|
#include <rabit/rabit.h>
|
|
#include <xgboost/tree_updater.h>
|
|
|
|
#include <string>
|
|
#include <memory>
|
|
|
|
#include "./param.h"
|
|
#include "../common/io.h"
|
|
|
|
namespace xgboost {
|
|
namespace tree {
|
|
|
|
DMLC_REGISTRY_FILE_TAG(updater_prune);
|
|
|
|
/*! \brief pruner that prunes a tree after growing finishes */
|
|
class TreePruner: public TreeUpdater {
|
|
public:
|
|
TreePruner() {
|
|
syncher_.reset(TreeUpdater::Create("sync", tparam_));
|
|
}
|
|
char const* Name() const override {
|
|
return "prune";
|
|
}
|
|
|
|
// set training parameter
|
|
void Configure(const Args& args) override {
|
|
param_.InitAllowUnknown(args);
|
|
syncher_->Configure(args);
|
|
}
|
|
// update the tree, do pruning
|
|
void Update(HostDeviceVector<GradientPair> *gpair,
|
|
DMatrix *p_fmat,
|
|
const std::vector<RegTree*> &trees) override {
|
|
// rescale learning rate according to size of trees
|
|
float lr = param_.learning_rate;
|
|
param_.learning_rate = lr / trees.size();
|
|
for (auto tree : trees) {
|
|
this->DoPrune(*tree);
|
|
}
|
|
param_.learning_rate = lr;
|
|
syncher_->Update(gpair, p_fmat, trees);
|
|
}
|
|
|
|
private:
|
|
// try to prune off current leaf
|
|
inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*)
|
|
if (tree[nid].IsRoot()) return npruned;
|
|
int pid = tree[nid].Parent();
|
|
RTreeNodeStat &s = tree.Stat(pid);
|
|
++s.leaf_child_cnt;
|
|
if (s.leaf_child_cnt >= 2 && param_.NeedPrune(s.loss_chg, depth - 1)) {
|
|
// need to be pruned
|
|
tree.ChangeToLeaf(pid, param_.learning_rate * s.base_weight);
|
|
// tail recursion
|
|
return this->TryPruneLeaf(tree, pid, depth - 1, npruned + 2);
|
|
} else {
|
|
return npruned;
|
|
}
|
|
}
|
|
/*! \brief do pruning of a tree */
|
|
inline void DoPrune(RegTree &tree) { // NOLINT(*)
|
|
int npruned = 0;
|
|
// initialize auxiliary statistics
|
|
for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
|
|
tree.Stat(nid).leaf_child_cnt = 0;
|
|
}
|
|
for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
|
|
if (tree[nid].IsLeaf()) {
|
|
npruned = this->TryPruneLeaf(tree, nid, tree.GetDepth(nid), npruned);
|
|
}
|
|
}
|
|
LOG(INFO) << "tree pruning end, " << tree.param.num_roots << " roots, "
|
|
<< tree.NumExtraNodes() << " extra nodes, " << npruned
|
|
<< " pruned nodes, max_depth=" << tree.MaxDepth();
|
|
}
|
|
|
|
private:
|
|
// synchronizer
|
|
std::unique_ptr<TreeUpdater> syncher_;
|
|
// training parameter
|
|
TrainParam param_;
|
|
};
|
|
|
|
XGBOOST_REGISTER_TREE_UPDATER(TreePruner, "prune")
|
|
.describe("Pruner that prune the tree according to statistics.")
|
|
.set_body([]() {
|
|
return new TreePruner();
|
|
});
|
|
} // namespace tree
|
|
} // namespace xgboost
|