complete refactor data.h, now replies on iterator to access column
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
*/
|
||||
#include <vector>
|
||||
#include "../data.h"
|
||||
|
||||
#include "../utils/io.h"
|
||||
namespace xgboost {
|
||||
namespace learner {
|
||||
/*!
|
||||
@@ -142,7 +142,6 @@ struct MetaInfo {
|
||||
* \brief data object used for learning,
|
||||
* \tparam FMatrix type of feature data source
|
||||
*/
|
||||
template<typename FMatrix>
|
||||
struct DMatrix {
|
||||
/*!
|
||||
* \brief magic number associated with this object
|
||||
@@ -152,7 +151,7 @@ struct DMatrix {
|
||||
/*! \brief meta information about the dataset */
|
||||
MetaInfo info;
|
||||
/*! \brief feature matrix about data content */
|
||||
FMatrix fmat;
|
||||
IFMatrix *fmat;
|
||||
/*!
|
||||
* \brief cache pointer to verify if the data structure is cached in some learner
|
||||
* used to verify if DMatrix is cached
|
||||
@@ -161,7 +160,9 @@ struct DMatrix {
|
||||
/*! \brief default constructor */
|
||||
explicit DMatrix(int magic) : magic(magic), cache_learner_ptr_(NULL) {}
|
||||
// virtual destructor
|
||||
virtual ~DMatrix(void){}
|
||||
virtual ~DMatrix(void){
|
||||
delete fmat;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace learner
|
||||
|
||||
@@ -21,7 +21,6 @@ namespace learner {
|
||||
* \brief learner that takes do gradient boosting on specific objective functions
|
||||
* and do training and prediction
|
||||
*/
|
||||
template<typename FMatrix>
|
||||
class BoostLearner {
|
||||
public:
|
||||
BoostLearner(void) {
|
||||
@@ -44,7 +43,7 @@ class BoostLearner {
|
||||
* data matrices to continue training otherwise it will cause error
|
||||
* \param mats array of pointers to matrix whose prediction result need to be cached
|
||||
*/
|
||||
inline void SetCacheData(const std::vector<DMatrix<FMatrix>*>& mats) {
|
||||
inline void SetCacheData(const std::vector<DMatrix*>& mats) {
|
||||
// estimate feature bound
|
||||
unsigned num_feature = 0;
|
||||
// assign buffer index
|
||||
@@ -158,15 +157,15 @@ class BoostLearner {
|
||||
* if not intialize it
|
||||
* \param p_train pointer to the matrix used by training
|
||||
*/
|
||||
inline void CheckInit(DMatrix<FMatrix> *p_train) {
|
||||
p_train->fmat.InitColAccess(prob_buffer_row);
|
||||
inline void CheckInit(DMatrix *p_train) {
|
||||
p_train->fmat->InitColAccess(prob_buffer_row);
|
||||
}
|
||||
/*!
|
||||
* \brief update the model for one iteration
|
||||
* \param iter current iteration number
|
||||
* \param p_train pointer to the data matrix
|
||||
*/
|
||||
inline void UpdateOneIter(int iter, const DMatrix<FMatrix> &train) {
|
||||
inline void UpdateOneIter(int iter, const DMatrix &train) {
|
||||
this->PredictRaw(train, &preds_);
|
||||
obj_->GetGradient(preds_, train.info, iter, &gpair_);
|
||||
gbm_->DoBoost(train.fmat, train.info.info, &gpair_);
|
||||
@@ -179,7 +178,7 @@ class BoostLearner {
|
||||
* \return a string corresponding to the evaluation result
|
||||
*/
|
||||
inline std::string EvalOneIter(int iter,
|
||||
const std::vector<const DMatrix<FMatrix>*> &evals,
|
||||
const std::vector<const DMatrix*> &evals,
|
||||
const std::vector<std::string> &evname) {
|
||||
std::string res;
|
||||
char tmp[256];
|
||||
@@ -198,7 +197,7 @@ class BoostLearner {
|
||||
* \param metric name of metric
|
||||
* \return a pair of <evaluation name, result>
|
||||
*/
|
||||
std::pair<std::string, float> Evaluate(const DMatrix<FMatrix> &data, std::string metric) {
|
||||
std::pair<std::string, float> Evaluate(const DMatrix &data, std::string metric) {
|
||||
if (metric == "auto") metric = obj_->DefaultEvalMetric();
|
||||
IEvaluator *ev = CreateEvaluator(metric.c_str());
|
||||
this->PredictRaw(data, &preds_);
|
||||
@@ -213,7 +212,7 @@ class BoostLearner {
|
||||
* \param output_margin whether to only predict margin value instead of transformed prediction
|
||||
* \param out_preds output vector that stores the prediction
|
||||
*/
|
||||
inline void Predict(const DMatrix<FMatrix> &data,
|
||||
inline void Predict(const DMatrix &data,
|
||||
bool output_margin,
|
||||
std::vector<float> *out_preds) const {
|
||||
this->PredictRaw(data, out_preds);
|
||||
@@ -235,7 +234,7 @@ class BoostLearner {
|
||||
if (obj_ != NULL) return;
|
||||
utils::Assert(gbm_ == NULL, "GBM and obj should be NULL");
|
||||
obj_ = CreateObjFunction(name_obj_.c_str());
|
||||
gbm_ = gbm::CreateGradBooster<FMatrix>(name_gbm_.c_str());
|
||||
gbm_ = gbm::CreateGradBooster(name_gbm_.c_str());
|
||||
for (size_t i = 0; i < cfg_.size(); ++i) {
|
||||
obj_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
|
||||
gbm_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
|
||||
@@ -247,7 +246,7 @@ class BoostLearner {
|
||||
* \param data training data matrix
|
||||
* \param out_preds output vector that stores the prediction
|
||||
*/
|
||||
inline void PredictRaw(const DMatrix<FMatrix> &data,
|
||||
inline void PredictRaw(const DMatrix &data,
|
||||
std::vector<float> *out_preds) const {
|
||||
gbm_->Predict(data.fmat, this->FindBufferOffset(data),
|
||||
data.info.info, out_preds);
|
||||
@@ -307,7 +306,7 @@ class BoostLearner {
|
||||
// model parameter
|
||||
ModelParam mparam;
|
||||
// gbm model that back everything
|
||||
gbm::IGradBooster<FMatrix> *gbm_;
|
||||
gbm::IGradBooster *gbm_;
|
||||
// name of gbm model used for training
|
||||
std::string name_gbm_;
|
||||
// objective fnction
|
||||
@@ -324,14 +323,14 @@ class BoostLearner {
|
||||
private:
|
||||
// cache entry object that helps handle feature caching
|
||||
struct CacheEntry {
|
||||
const DMatrix<FMatrix> *mat_;
|
||||
const DMatrix *mat_;
|
||||
size_t buffer_offset_;
|
||||
size_t num_row_;
|
||||
CacheEntry(const DMatrix<FMatrix> *mat, size_t buffer_offset, size_t num_row)
|
||||
CacheEntry(const DMatrix *mat, size_t buffer_offset, size_t num_row)
|
||||
:mat_(mat), buffer_offset_(buffer_offset), num_row_(num_row) {}
|
||||
};
|
||||
// find internal bufer offset for certain matrix, if not exist, return -1
|
||||
inline int64_t FindBufferOffset(const DMatrix<FMatrix> &mat) const {
|
||||
inline int64_t FindBufferOffset(const DMatrix &mat) const {
|
||||
for (size_t i = 0; i < cache_.size(); ++i) {
|
||||
if (cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this) {
|
||||
if (cache_[i].num_row_ == mat.info.num_row()) {
|
||||
|
||||
Reference in New Issue
Block a user