From f175e1cfb4a1ab125840f9f718abf3cdd2d4580c Mon Sep 17 00:00:00 2001 From: "tqchen@graphlab.com" Date: Wed, 27 Aug 2014 18:33:52 -0700 Subject: [PATCH] finish refactor, need debug --- Makefile | 21 +++++++++++---------- src/data.h | 4 ++-- src/io/simple_dmatrix-inl.hpp | 20 ++++++++++++++------ src/learner/dmatrix.h | 9 ++++----- src/learner/learner-inl.hpp | 6 +++--- 5 files changed, 34 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index 78115a4b2..ed0ce6b22 100644 --- a/Makefile +++ b/Makefile @@ -1,19 +1,19 @@ export CC = gcc export CXX = g++ export LDFLAGS= -pthread -lm -# note for R module -# add include path to Rinternals.h here + +export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC ifeq ($(no_omp),1) - export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -DDISABLE_OPENMP -funroll-loops -else - export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp -funroll-loops + CFLAGS += -DDISABLE_OPENMP +else + CFLAGS += -fopenmp endif # specify tensor path -BIN = -OBJ = updater.o gbm.o xgboost_main.o -#SLIB = wrapper/libxgboostwrapper.so +BIN = xgboost +OBJ = updater.o gbm.o io.o +SLIB = wrapper/libxgboostwrapper.so #RLIB = wrapper/libxgboostR.so .PHONY: clean all R python @@ -22,10 +22,11 @@ all: $(BIN) $(OBJ) #xgboost: src/xgboost_main.cpp src/io/io.cpp src/data.h src/tree/*.h src/tree/*.hpp src/gbm/*.h src/gbm/*.hpp src/utils/*.h src/learner/*.h src/learner/*.hpp # now the wrapper takes in two files. io and wrapper part -#wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/io/io.cpp src/*.h src/*/*.hpp src/*/*.h +wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp $(OBJ) updater.o: src/tree/updater.cpp gbm.o: src/gbm/gbm.cpp -xgboost_main.o: src/xgboost_main.cpp +io.o: src/io/io.cpp +xgboost: src/xgboost_main.cpp $(OBJ) $(BIN) : $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) diff --git a/src/data.h b/src/data.h index 97cc0212a..2ea5f222a 100644 --- a/src/data.h +++ b/src/data.h @@ -78,7 +78,7 @@ struct SparseBatch { /*! \brief pointer to the elements*/ const Entry *data; /*! \brief length of the instance */ - const bst_uint length; + bst_uint length; /*! \brief constructor */ Inst(const Entry *data, bst_uint length) : data(data), length(length) {} /*! \brief get i-th pair in the sparse vector*/ @@ -147,7 +147,7 @@ class IFMatrix { /*! \return number of columns in the FMatrix */ virtual size_t NumCol(void) const = 0; /*! \brief get number of non-missing entries in column */ - virtual float GetColSize(size_t cidx) const = 0; + virtual size_t GetColSize(size_t cidx) const = 0; /*! \brief get column density */ virtual float GetColDensity(size_t cidx) const = 0; /*! \brief reference of buffered rowset */ diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp index 77f36c971..47be8a41a 100644 --- a/src/io/simple_dmatrix-inl.hpp +++ b/src/io/simple_dmatrix-inl.hpp @@ -16,6 +16,7 @@ #include "../utils/utils.h" #include "../learner/dmatrix.h" #include "./io.h" +#include "./simple_fmatrix-inl.hpp" namespace xgboost { namespace io { @@ -24,11 +25,16 @@ class DMatrixSimple : public DataMatrix { public: // constructor DMatrixSimple(void) : DataMatrix(kMagic) { - this->fmat.set_iter(new OneBatchIter(this)); + fmat_ = new FMatrixS(new OneBatchIter(this)); this->Clear(); } // virtual destructor - virtual ~DMatrixSimple(void) {} + virtual ~DMatrixSimple(void) { + delete fmat_; + } + virtual IFMatrix *fmat(void) const { + return fmat_; + } /*! \brief clear the storage */ inline void Clear(void) { row_ptr_.clear(); @@ -41,7 +47,7 @@ class DMatrixSimple : public DataMatrix { this->info = src.info; this->Clear(); // clone data content in thos matrix - utils::IIterator *iter = src.fmat.RowIterator(); + utils::IIterator *iter = src.fmat()->RowIterator(); iter->BeforeFirst(); while (iter->Next()) { const RowBatch &batch = iter->Value(); @@ -145,7 +151,7 @@ class DMatrixSimple : public DataMatrix { info.LoadBinary(fs); FMatrixS::LoadBinary(fs, &row_ptr_, &row_data_); - fmat.LoadColAccess(fs); + fmat_->LoadColAccess(fs); if (!silent) { printf("%lux%lu matrix with %lu entries is loaded", @@ -172,7 +178,7 @@ class DMatrixSimple : public DataMatrix { info.SaveBinary(fs); FMatrixS::SaveBinary(fs, row_ptr_, row_data_); - fmat.SaveColAccess(fs); + fmat_->SaveColAccess(fs); fs.Close(); if (!silent) { @@ -212,6 +218,8 @@ class DMatrixSimple : public DataMatrix { std::vector row_ptr_; /*! \brief data in the row */ std::vector row_data_; + /*! \brief the real fmatrix */ + FMatrixS *fmat_; /*! \brief magic number used to identify DMatrix */ static const int kMagic = 0xffffab01; @@ -244,7 +252,7 @@ class DMatrixSimple : public DataMatrix { DMatrixSimple *parent_; // temporal space for batch RowBatch batch_; - }; + }; }; } // namespace io } // namespace xgboost diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h index 8c1658ab3..cd897f1d5 100644 --- a/src/learner/dmatrix.h +++ b/src/learner/dmatrix.h @@ -7,6 +7,7 @@ * \author Tianqi Chen */ #include +#include #include "../data.h" #include "../utils/io.h" namespace xgboost { @@ -150,8 +151,6 @@ struct DMatrix { const int magic; /*! \brief meta information about the dataset */ MetaInfo info; - /*! \brief feature matrix about data content */ - IFMatrix *fmat; /*! * \brief cache pointer to verify if the data structure is cached in some learner * used to verify if DMatrix is cached @@ -159,10 +158,10 @@ struct DMatrix { void *cache_learner_ptr_; /*! \brief default constructor */ explicit DMatrix(int magic) : magic(magic), cache_learner_ptr_(NULL) {} + /*! \brief get feature matrix about data content */ + virtual IFMatrix *fmat(void) const = 0; // virtual destructor - virtual ~DMatrix(void){ - delete fmat; - } + virtual ~DMatrix(void){} }; } // namespace learner diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp index ed92668da..c01e2ec15 100644 --- a/src/learner/learner-inl.hpp +++ b/src/learner/learner-inl.hpp @@ -158,7 +158,7 @@ class BoostLearner { * \param p_train pointer to the matrix used by training */ inline void CheckInit(DMatrix *p_train) { - p_train->fmat->InitColAccess(prob_buffer_row); + p_train->fmat()->InitColAccess(prob_buffer_row); } /*! * \brief update the model for one iteration @@ -168,7 +168,7 @@ class BoostLearner { inline void UpdateOneIter(int iter, const DMatrix &train) { this->PredictRaw(train, &preds_); obj_->GetGradient(preds_, train.info, iter, &gpair_); - gbm_->DoBoost(train.fmat, train.info.info, &gpair_); + gbm_->DoBoost(train.fmat(), train.info.info, &gpair_); } /*! * \brief evaluate the model for specific iteration @@ -248,7 +248,7 @@ class BoostLearner { */ inline void PredictRaw(const DMatrix &data, std::vector *out_preds) const { - gbm_->Predict(data.fmat, this->FindBufferOffset(data), + gbm_->Predict(data.fmat(), this->FindBufferOffset(data), data.info.info, out_preds); // add base margin std::vector &preds = *out_preds;