[TREE] Enable updater registry

This commit is contained in:
tqchen
2016-01-01 03:32:40 -08:00
parent a62a66d545
commit c8ccb61b9e
13 changed files with 172 additions and 189 deletions

View File

@@ -1,63 +0,0 @@
/*!
* Copyright 2014 by Contributors
* \file updater.h
* \brief interface to update the tree
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_H_
#define XGBOOST_TREE_UPDATER_H_
#include <vector>
#include "../data.h"
#include "./model.h"
namespace xgboost {
namespace tree {
/*!
* \brief interface of tree update module, that performs update of a tree
*/
class IUpdater {
public:
/*!
* \brief set parameters from outside
* \param name name of the parameter
* \param val value of the parameter
*/
virtual void SetParam(const char *name, const char *val) = 0;
/*!
* \brief perform update to the tree models
* \param gpair the gradient pair statistics of the data
* \param p_fmat feature matrix that provide access to features
* \param info extra side information that may be need, such as root index
* \param trees references the trees to be updated, updater will change the content of trees
* note: all the trees in the vector are updated, with the same statistics,
* but maybe different random seeds, usually one tree is passed in at a time,
* there can be multiple trees when we train random forest style model
*/
virtual void Update(const std::vector<bst_gpair> &gpair,
IFMatrix *p_fmat,
const BoosterInfo &info,
const std::vector<RegTree*> &trees) = 0;
/*!
* \brief this is simply a function for optimizing performance
* this function asks the updater to return the leaf position of each instance in the p_fmat,
* if it is cached in the updater, if it is not available, return NULL
* \return array of leaf position of each instance in the last updated tree
*/
virtual const int* GetLeafPosition(void) const {
return NULL;
}
// destructor
virtual ~IUpdater(void) {}
};
/*!
* \brief create an updater based on name
* \param name name of updater
* \return return the updater instance
*/
IUpdater* CreateUpdater(const char *name);
} // namespace tree
} // namespace xgboost
#endif // XGBOOST_TREE_UPDATER_H_

View File

@@ -1,56 +0,0 @@
/*!
* Copyright 2014 by Contributors
* \file updater_sync-inl.hpp
* \brief synchronize the tree in all distributed nodes
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
#define XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
#include <vector>
#include <string>
#include <limits>
#include "../sync/sync.h"
#include "./updater.h"
namespace xgboost {
namespace tree {
/*!
* \brief syncher that synchronize the tree in all distributed nodes
* can implement various strategies, so far it is always set to node 0's tree
*/
class TreeSyncher: public IUpdater {
public:
virtual ~TreeSyncher(void) {}
virtual void SetParam(const char *name, const char *val) {
}
// update the tree, do pruning
virtual void Update(const std::vector<bst_gpair> &gpair,
IFMatrix *p_fmat,
const BoosterInfo &info,
const std::vector<RegTree*> &trees) {
this->SyncTrees(trees);
}
private:
// synchronize the trees in different nodes, take tree from rank 0
inline void SyncTrees(const std::vector<RegTree *> &trees) {
if (rabit::GetWorldSize() == 1) return;
std::string s_model;
utils::MemoryBufferStream fs(&s_model);
int rank = rabit::GetRank();
if (rank == 0) {
for (size_t i = 0; i < trees.size(); ++i) {
trees[i]->SaveModel(fs);
}
}
fs.Seek(0);
rabit::Broadcast(&s_model, 0);
for (size_t i = 0; i < trees.size(); ++i) {
trees[i]->LoadModel(fs);
}
}
};
} // namespace tree
} // namespace xgboost
#endif // XGBOOST_TREE_UPDATER_SYNC_INL_HPP_