finish refactor, need debug

This commit is contained in:
tqchen@graphlab.com 2014-08-27 18:33:52 -07:00
parent 605269133e
commit f175e1cfb4
5 changed files with 34 additions and 26 deletions

View File

@ -1,19 +1,19 @@
export CC = gcc export CC = gcc
export CXX = g++ export CXX = g++
export LDFLAGS= -pthread -lm export LDFLAGS= -pthread -lm
# note for R module
# add include path to Rinternals.h here export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC
ifeq ($(no_omp),1) ifeq ($(no_omp),1)
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -DDISABLE_OPENMP -funroll-loops CFLAGS += -DDISABLE_OPENMP
else else
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp -funroll-loops CFLAGS += -fopenmp
endif endif
# specify tensor path # specify tensor path
BIN = BIN = xgboost
OBJ = updater.o gbm.o xgboost_main.o OBJ = updater.o gbm.o io.o
#SLIB = wrapper/libxgboostwrapper.so SLIB = wrapper/libxgboostwrapper.so
#RLIB = wrapper/libxgboostR.so #RLIB = wrapper/libxgboostR.so
.PHONY: clean all R python .PHONY: clean all R python
@ -22,10 +22,11 @@ all: $(BIN) $(OBJ)
#xgboost: src/xgboost_main.cpp src/io/io.cpp src/data.h src/tree/*.h src/tree/*.hpp src/gbm/*.h src/gbm/*.hpp src/utils/*.h src/learner/*.h src/learner/*.hpp #xgboost: src/xgboost_main.cpp src/io/io.cpp src/data.h src/tree/*.h src/tree/*.hpp src/gbm/*.h src/gbm/*.hpp src/utils/*.h src/learner/*.h src/learner/*.hpp
# now the wrapper takes in two files. io and wrapper part # now the wrapper takes in two files. io and wrapper part
#wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/io/io.cpp src/*.h src/*/*.hpp src/*/*.h wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp $(OBJ)
updater.o: src/tree/updater.cpp updater.o: src/tree/updater.cpp
gbm.o: src/gbm/gbm.cpp gbm.o: src/gbm/gbm.cpp
xgboost_main.o: src/xgboost_main.cpp io.o: src/io/io.cpp
xgboost: src/xgboost_main.cpp $(OBJ)
$(BIN) : $(BIN) :
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)

View File

@ -78,7 +78,7 @@ struct SparseBatch {
/*! \brief pointer to the elements*/ /*! \brief pointer to the elements*/
const Entry *data; const Entry *data;
/*! \brief length of the instance */ /*! \brief length of the instance */
const bst_uint length; bst_uint length;
/*! \brief constructor */ /*! \brief constructor */
Inst(const Entry *data, bst_uint length) : data(data), length(length) {} Inst(const Entry *data, bst_uint length) : data(data), length(length) {}
/*! \brief get i-th pair in the sparse vector*/ /*! \brief get i-th pair in the sparse vector*/
@ -147,7 +147,7 @@ class IFMatrix {
/*! \return number of columns in the FMatrix */ /*! \return number of columns in the FMatrix */
virtual size_t NumCol(void) const = 0; virtual size_t NumCol(void) const = 0;
/*! \brief get number of non-missing entries in column */ /*! \brief get number of non-missing entries in column */
virtual float GetColSize(size_t cidx) const = 0; virtual size_t GetColSize(size_t cidx) const = 0;
/*! \brief get column density */ /*! \brief get column density */
virtual float GetColDensity(size_t cidx) const = 0; virtual float GetColDensity(size_t cidx) const = 0;
/*! \brief reference of buffered rowset */ /*! \brief reference of buffered rowset */

View File

@ -16,6 +16,7 @@
#include "../utils/utils.h" #include "../utils/utils.h"
#include "../learner/dmatrix.h" #include "../learner/dmatrix.h"
#include "./io.h" #include "./io.h"
#include "./simple_fmatrix-inl.hpp"
namespace xgboost { namespace xgboost {
namespace io { namespace io {
@ -24,11 +25,16 @@ class DMatrixSimple : public DataMatrix {
public: public:
// constructor // constructor
DMatrixSimple(void) : DataMatrix(kMagic) { DMatrixSimple(void) : DataMatrix(kMagic) {
this->fmat.set_iter(new OneBatchIter(this)); fmat_ = new FMatrixS(new OneBatchIter(this));
this->Clear(); this->Clear();
} }
// virtual destructor // virtual destructor
virtual ~DMatrixSimple(void) {} virtual ~DMatrixSimple(void) {
delete fmat_;
}
virtual IFMatrix *fmat(void) const {
return fmat_;
}
/*! \brief clear the storage */ /*! \brief clear the storage */
inline void Clear(void) { inline void Clear(void) {
row_ptr_.clear(); row_ptr_.clear();
@ -41,7 +47,7 @@ class DMatrixSimple : public DataMatrix {
this->info = src.info; this->info = src.info;
this->Clear(); this->Clear();
// clone data content in thos matrix // clone data content in thos matrix
utils::IIterator<RowBatch> *iter = src.fmat.RowIterator(); utils::IIterator<RowBatch> *iter = src.fmat()->RowIterator();
iter->BeforeFirst(); iter->BeforeFirst();
while (iter->Next()) { while (iter->Next()) {
const RowBatch &batch = iter->Value(); const RowBatch &batch = iter->Value();
@ -145,7 +151,7 @@ class DMatrixSimple : public DataMatrix {
info.LoadBinary(fs); info.LoadBinary(fs);
FMatrixS::LoadBinary(fs, &row_ptr_, &row_data_); FMatrixS::LoadBinary(fs, &row_ptr_, &row_data_);
fmat.LoadColAccess(fs); fmat_->LoadColAccess(fs);
if (!silent) { if (!silent) {
printf("%lux%lu matrix with %lu entries is loaded", printf("%lux%lu matrix with %lu entries is loaded",
@ -172,7 +178,7 @@ class DMatrixSimple : public DataMatrix {
info.SaveBinary(fs); info.SaveBinary(fs);
FMatrixS::SaveBinary(fs, row_ptr_, row_data_); FMatrixS::SaveBinary(fs, row_ptr_, row_data_);
fmat.SaveColAccess(fs); fmat_->SaveColAccess(fs);
fs.Close(); fs.Close();
if (!silent) { if (!silent) {
@ -212,6 +218,8 @@ class DMatrixSimple : public DataMatrix {
std::vector<size_t> row_ptr_; std::vector<size_t> row_ptr_;
/*! \brief data in the row */ /*! \brief data in the row */
std::vector<RowBatch::Entry> row_data_; std::vector<RowBatch::Entry> row_data_;
/*! \brief the real fmatrix */
FMatrixS *fmat_;
/*! \brief magic number used to identify DMatrix */ /*! \brief magic number used to identify DMatrix */
static const int kMagic = 0xffffab01; static const int kMagic = 0xffffab01;

View File

@ -7,6 +7,7 @@
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#include <vector> #include <vector>
#include <cstring>
#include "../data.h" #include "../data.h"
#include "../utils/io.h" #include "../utils/io.h"
namespace xgboost { namespace xgboost {
@ -150,8 +151,6 @@ struct DMatrix {
const int magic; const int magic;
/*! \brief meta information about the dataset */ /*! \brief meta information about the dataset */
MetaInfo info; MetaInfo info;
/*! \brief feature matrix about data content */
IFMatrix *fmat;
/*! /*!
* \brief cache pointer to verify if the data structure is cached in some learner * \brief cache pointer to verify if the data structure is cached in some learner
* used to verify if DMatrix is cached * used to verify if DMatrix is cached
@ -159,10 +158,10 @@ struct DMatrix {
void *cache_learner_ptr_; void *cache_learner_ptr_;
/*! \brief default constructor */ /*! \brief default constructor */
explicit DMatrix(int magic) : magic(magic), cache_learner_ptr_(NULL) {} explicit DMatrix(int magic) : magic(magic), cache_learner_ptr_(NULL) {}
/*! \brief get feature matrix about data content */
virtual IFMatrix *fmat(void) const = 0;
// virtual destructor // virtual destructor
virtual ~DMatrix(void){ virtual ~DMatrix(void){}
delete fmat;
}
}; };
} // namespace learner } // namespace learner

View File

@ -158,7 +158,7 @@ class BoostLearner {
* \param p_train pointer to the matrix used by training * \param p_train pointer to the matrix used by training
*/ */
inline void CheckInit(DMatrix *p_train) { inline void CheckInit(DMatrix *p_train) {
p_train->fmat->InitColAccess(prob_buffer_row); p_train->fmat()->InitColAccess(prob_buffer_row);
} }
/*! /*!
* \brief update the model for one iteration * \brief update the model for one iteration
@ -168,7 +168,7 @@ class BoostLearner {
inline void UpdateOneIter(int iter, const DMatrix &train) { inline void UpdateOneIter(int iter, const DMatrix &train) {
this->PredictRaw(train, &preds_); this->PredictRaw(train, &preds_);
obj_->GetGradient(preds_, train.info, iter, &gpair_); obj_->GetGradient(preds_, train.info, iter, &gpair_);
gbm_->DoBoost(train.fmat, train.info.info, &gpair_); gbm_->DoBoost(train.fmat(), train.info.info, &gpair_);
} }
/*! /*!
* \brief evaluate the model for specific iteration * \brief evaluate the model for specific iteration
@ -248,7 +248,7 @@ class BoostLearner {
*/ */
inline void PredictRaw(const DMatrix &data, inline void PredictRaw(const DMatrix &data,
std::vector<float> *out_preds) const { std::vector<float> *out_preds) const {
gbm_->Predict(data.fmat, this->FindBufferOffset(data), gbm_->Predict(data.fmat(), this->FindBufferOffset(data),
data.info.info, out_preds); data.info.info, out_preds);
// add base margin // add base margin
std::vector<float> &preds = *out_preds; std::vector<float> &preds = *out_preds;