[TREE] Enable updater registry
This commit is contained in:
@@ -1,63 +0,0 @@
|
||||
/*!
|
||||
* Copyright 2014 by Contributors
|
||||
* \file updater.h
|
||||
* \brief interface to update the tree
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#ifndef XGBOOST_TREE_UPDATER_H_
|
||||
#define XGBOOST_TREE_UPDATER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../data.h"
|
||||
#include "./model.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
/*!
|
||||
* \brief interface of tree update module, that performs update of a tree
|
||||
*/
|
||||
class IUpdater {
|
||||
public:
|
||||
/*!
|
||||
* \brief set parameters from outside
|
||||
* \param name name of the parameter
|
||||
* \param val value of the parameter
|
||||
*/
|
||||
virtual void SetParam(const char *name, const char *val) = 0;
|
||||
/*!
|
||||
* \brief perform update to the tree models
|
||||
* \param gpair the gradient pair statistics of the data
|
||||
* \param p_fmat feature matrix that provide access to features
|
||||
* \param info extra side information that may be need, such as root index
|
||||
* \param trees references the trees to be updated, updater will change the content of trees
|
||||
* note: all the trees in the vector are updated, with the same statistics,
|
||||
* but maybe different random seeds, usually one tree is passed in at a time,
|
||||
* there can be multiple trees when we train random forest style model
|
||||
*/
|
||||
virtual void Update(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const std::vector<RegTree*> &trees) = 0;
|
||||
|
||||
/*!
|
||||
* \brief this is simply a function for optimizing performance
|
||||
* this function asks the updater to return the leaf position of each instance in the p_fmat,
|
||||
* if it is cached in the updater, if it is not available, return NULL
|
||||
* \return array of leaf position of each instance in the last updated tree
|
||||
*/
|
||||
virtual const int* GetLeafPosition(void) const {
|
||||
return NULL;
|
||||
}
|
||||
// destructor
|
||||
virtual ~IUpdater(void) {}
|
||||
};
|
||||
/*!
|
||||
* \brief create an updater based on name
|
||||
* \param name name of updater
|
||||
* \return return the updater instance
|
||||
*/
|
||||
IUpdater* CreateUpdater(const char *name);
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_TREE_UPDATER_H_
|
||||
@@ -1,56 +0,0 @@
|
||||
/*!
|
||||
* Copyright 2014 by Contributors
|
||||
* \file updater_sync-inl.hpp
|
||||
* \brief synchronize the tree in all distributed nodes
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#ifndef XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
|
||||
#define XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <limits>
|
||||
#include "../sync/sync.h"
|
||||
#include "./updater.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
/*!
|
||||
* \brief syncher that synchronize the tree in all distributed nodes
|
||||
* can implement various strategies, so far it is always set to node 0's tree
|
||||
*/
|
||||
class TreeSyncher: public IUpdater {
|
||||
public:
|
||||
virtual ~TreeSyncher(void) {}
|
||||
virtual void SetParam(const char *name, const char *val) {
|
||||
}
|
||||
// update the tree, do pruning
|
||||
virtual void Update(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const std::vector<RegTree*> &trees) {
|
||||
this->SyncTrees(trees);
|
||||
}
|
||||
|
||||
private:
|
||||
// synchronize the trees in different nodes, take tree from rank 0
|
||||
inline void SyncTrees(const std::vector<RegTree *> &trees) {
|
||||
if (rabit::GetWorldSize() == 1) return;
|
||||
std::string s_model;
|
||||
utils::MemoryBufferStream fs(&s_model);
|
||||
int rank = rabit::GetRank();
|
||||
if (rank == 0) {
|
||||
for (size_t i = 0; i < trees.size(); ++i) {
|
||||
trees[i]->SaveModel(fs);
|
||||
}
|
||||
}
|
||||
fs.Seek(0);
|
||||
rabit::Broadcast(&s_model, 0);
|
||||
for (size_t i = 0; i < trees.size(); ++i) {
|
||||
trees[i]->LoadModel(fs);
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
|
||||
@@ -1,59 +0,0 @@
|
||||
/*!
|
||||
* Copyright 2014 by Contributors
|
||||
* \file io.h
|
||||
* \brief general stream interface for serialization, I/O
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
|
||||
#ifndef XGBOOST_UTILS_IO_H_
|
||||
#define XGBOOST_UTILS_IO_H_
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include "./utils.h"
|
||||
#include "../sync/sync.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace utils {
|
||||
// reuse the definitions of streams
|
||||
typedef rabit::Stream IStream;
|
||||
typedef rabit::utils::SeekStream ISeekStream;
|
||||
typedef rabit::utils::MemoryFixSizeBuffer MemoryFixSizeBuffer;
|
||||
typedef rabit::utils::MemoryBufferStream MemoryBufferStream;
|
||||
|
||||
/*! \brief implementation of file i/o stream */
|
||||
class FileStream : public ISeekStream {
|
||||
public:
|
||||
explicit FileStream(std::FILE *fp) : fp(fp) {}
|
||||
FileStream(void) {
|
||||
this->fp = NULL;
|
||||
}
|
||||
virtual size_t Read(void *ptr, size_t size) {
|
||||
return std::fread(ptr, size, 1, fp);
|
||||
}
|
||||
virtual void Write(const void *ptr, size_t size) {
|
||||
Check(std::fwrite(ptr, size, 1, fp) == 1, "FileStream::Write: fwrite error!");
|
||||
}
|
||||
virtual void Seek(size_t pos) {
|
||||
std::fseek(fp, static_cast<long>(pos), SEEK_SET); // NOLINT(*)
|
||||
}
|
||||
virtual size_t Tell(void) {
|
||||
return std::ftell(fp);
|
||||
}
|
||||
virtual bool AtEnd(void) const {
|
||||
return std::feof(fp) != 0;
|
||||
}
|
||||
inline void Close(void) {
|
||||
if (fp != NULL) {
|
||||
std::fclose(fp); fp = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::FILE *fp;
|
||||
};
|
||||
} // namespace utils
|
||||
} // namespace xgboost
|
||||
#include "./base64-inl.h"
|
||||
#endif // XGBOOST_UTILS_IO_H_
|
||||
Reference in New Issue
Block a user