remake the wrapper

2014-08-17 17:43:46 -07:00
parent 2c969ecf14
commit af100dd869
18 changed files with 520 additions and 572 deletions
--- a/python/Makefile
+++ b/python/Makefile
@@ -1,26 +0,0 @@
-export CC  = gcc
-export CXX = g++
-export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas  -fopenmp
-
-# specify tensor path
-SLIB = libxgboostpy.so
-.PHONY: clean all
-
-all: $(SLIB)
-export LDFLAGS= -pthread -lm 
-
-libxgboostpy.so: xgboost_python.cpp ../regrank/*.h ../booster/*.h ../booster/*/*.hpp ../booster/*.hpp
-
-$(SLIB) :
-	$(CXX) $(CFLAGS) -fPIC $(LDFLAGS) -shared -o $@ $(filter %.cpp %.o %.c, $^)
-$(BIN) : 
-	$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
-
-$(OBJ) : 
-	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
-
-install:
-	cp -f -r $(BIN)  $(INSTALL_PATH)
-
-clean:
-	$(RM) $(OBJ) $(BIN) $(SLIB) *~
--- a/python/README.md
+++ b/python/README.md
@@ -1,3 +1,5 @@
 python wrapper for xgboost using ctypes

 see example for usage
+
+to make the python module, type make in the root directory of project
--- a/python/xgboost.py
+++ b/python/xgboost.py
@@ -8,11 +8,7 @@ import numpy.ctypeslib
 import scipy.sparse as scp

 # set this line correctly
-XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostpy.so'
-
-# entry type of sparse matrix
-class REntry(ctypes.Structure):
-    _fields_ = [("findex", ctypes.c_uint), ("fvalue", ctypes.c_float) ]
+XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostwrapper.so'

 # load in xgboost library
 xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH)
--- a/python/xgboost_python.cpp
+++ b/python/xgboost_python.cpp
@@ -1,297 +0,0 @@
-// implementations in ctypes
-#include "xgboost_python.h"
-#include "../regrank/xgboost_regrank.h"
-#include "../regrank/xgboost_regrank_data.h"
-
-namespace xgboost{
-    namespace python{
-        class DMatrix: public regrank::DMatrix{
-        public:
-            // whether column is initialized
-            bool init_col_;
-        public:
-            DMatrix(void){
-                init_col_ = false;
-            }            
-            ~DMatrix(void){}
-        public:            
-            inline void Load(const char *fname, bool silent){
-                this->CacheLoad(fname, silent);
-                init_col_ = this->data.HaveColAccess();
-            }
-            inline void Clear( void ){
-                this->data.Clear();
-                this->info.labels.clear();
-                this->info.weights.clear();
-                this->info.group_ptr.clear();
-            }
-            inline size_t NumRow( void ) const{
-                return this->data.NumRow();
-            }
-            inline void AddRow( const XGEntry *data, size_t len ){
-                xgboost::booster::FMatrixS &mat = this->data;
-                mat.row_data_.resize( mat.row_ptr_.back() + len );
-                memcpy( &mat.row_data_[mat.row_ptr_.back()], data, sizeof(XGEntry)*len );
-                mat.row_ptr_.push_back( mat.row_ptr_.back() + len );
-                init_col_ = false;
-            }
-            inline const XGEntry* GetRow(unsigned ridx, size_t* len) const{
-                const xgboost::booster::FMatrixS &mat = this->data;
-
-                *len = mat.row_ptr_[ridx+1] - mat.row_ptr_[ridx];
-                return &mat.row_data_[ mat.row_ptr_[ridx] ];
-            }
-            inline void ParseCSR( const size_t *indptr,
-                                  const unsigned *indices,
-                                  const float *data,
-                                  size_t nindptr,
-                                  size_t nelem ){
-                xgboost::booster::FMatrixS &mat = this->data;
-                mat.row_ptr_.resize( nindptr );
-                memcpy( &mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr );
-                mat.row_data_.resize( nelem );
-                for( size_t i = 0; i < nelem; ++ i ){
-                    mat.row_data_[i] = XGEntry(indices[i], data[i]);
-                }
-                this->data.InitData();
-                this->init_col_ = true;
-            }
-
-            inline void ParseMat( const float *data,
-                                  size_t nrow,
-                                  size_t ncol,
-                                  float  missing ){
-                xgboost::booster::FMatrixS &mat = this->data;
-                mat.Clear();
-                for( size_t i = 0; i < nrow; ++i, data += ncol ){
-                    size_t nelem = 0;
-                    for( size_t j = 0; j < ncol; ++j ){
-                        if( data[j] != missing ){                           
-                            mat.row_data_.push_back( XGEntry(j, data[j]) );
-                            ++ nelem;
-                        }
-                    }
-                    mat.row_ptr_.push_back( mat.row_ptr_.back() + nelem );
-                }
-                this->data.InitData();
-                this->init_col_ = true;
-            }
-            inline void SetLabel( const float *label, size_t len ){
-                this->info.labels.resize( len );
-                memcpy( &(this->info).labels[0], label, sizeof(float)*len );
-            }
-            inline void SetGroup( const unsigned *group, size_t len ){
-                this->info.group_ptr.resize( len + 1 );
-                this->info.group_ptr[0] = 0;
-                for( size_t i = 0; i < len; ++ i ){
-                    this->info.group_ptr[i+1] = this->info.group_ptr[i]+group[i];
-                }
-            }
-            inline void SetWeight( const float *weight, size_t len ){
-                this->info.weights.resize( len );
-                memcpy( &(this->info).weights[0], weight, sizeof(float)*len );
-            }
-            inline const float* GetLabel( size_t* len ) const{
-                *len = this->info.labels.size();
-                return &(this->info.labels[0]);
-            }
-            inline const float* GetWeight( size_t* len ) const{
-                *len = this->info.weights.size();
-                return &(this->info.weights[0]);
-            }
-            inline void CheckInit(void){
-                if(!init_col_){
-                    this->data.InitData();
-                    init_col_ = true;
-                }
-                utils::Assert( this->data.NumRow() == this->info.labels.size(), "DMatrix: number of labels must match number of rows in matrix");
-            }
-        };
-    
-        class Booster: public xgboost::regrank::RegRankBoostLearner{
-        private:
-            bool init_trainer, init_model;
-        public:
-            Booster(const std::vector<regrank::DMatrix *> mats){
-                silent = 1;
-                init_trainer = false;
-                init_model = false;
-                this->SetCacheData(mats);
-            }
-            inline void CheckInit(void){
-                if( !init_trainer ){
-                    this->InitTrainer(); init_trainer = true;
-                }
-                if( !init_model ){
-                    this->InitModel(); init_model = true;
-                }
-            }
-            inline void LoadModel( const char *fname ){
-                xgboost::regrank::RegRankBoostLearner::LoadModel(fname);
-                this->init_model = true;
-            }
-            inline void SetParam( const char *name, const char *val ){
-                if( !strcmp( name, "seed" ) ) random::Seed(atoi(val));
-                xgboost::regrank::RegRankBoostLearner::SetParam( name, val );
-            }
-            const float *Pred( const DMatrix &dmat, size_t *len, int bst_group ){
-                this->CheckInit();
-
-                this->Predict( this->preds_, dmat, bst_group );
-                *len = this->preds_.size();
-                return &this->preds_[0];
-            }
-            inline void BoostOneIter( const DMatrix &train, 
-                                      float *grad, float *hess, size_t len, int bst_group ){
-                this->grad_.resize( len ); this->hess_.resize( len );
-                memcpy( &this->grad_[0], grad, sizeof(float)*len );
-                memcpy( &this->hess_[0], hess, sizeof(float)*len );
-                
-                if( grad_.size() == train.Size() ){
-                    if( bst_group < 0 ) bst_group = 0;
-                    base_gbm.DoBoost(grad_, hess_, train.data, train.info.root_index, bst_group);
-                }else{
-                    utils::Assert( bst_group == -1, "must set bst_group to -1 to support all group boosting" );
-                    int ngroup = base_gbm.NumBoosterGroup();
-                    utils::Assert( grad_.size() == train.Size() * (size_t)ngroup, "BUG: UpdateOneIter: mclass" );
-                    std::vector<float> tgrad( train.Size() ), thess( train.Size() );
-                    for( int g = 0; g < ngroup; ++ g ){
-                        memcpy( &tgrad[0], &grad_[g*tgrad.size()], sizeof(float)*tgrad.size() );
-                        memcpy( &thess[0], &hess_[g*tgrad.size()], sizeof(float)*tgrad.size() );
-                        base_gbm.DoBoost(tgrad, thess, train.data, train.info.root_index, g );
-                    }
-                }                
-            }
-        };
-    };
-};
-
-using namespace xgboost::python;
-
-
-extern "C"{
-    void* XGDMatrixCreate( void ){
-        return new DMatrix();
-    }
-    void XGDMatrixFree( void *handle ){
-        delete static_cast<DMatrix*>(handle);
-    }
-    void XGDMatrixLoad( void *handle, const char *fname, int silent ){
-        static_cast<DMatrix*>(handle)->Load(fname, silent!=0);
-    }
-    void XGDMatrixSaveBinary( void *handle, const char *fname, int silent ){
-        static_cast<DMatrix*>(handle)->SaveBinary(fname, silent!=0);
-    }
-    void XGDMatrixParseCSR( void *handle, 
-                            const size_t *indptr,
-                            const unsigned *indices,
-                            const float *data,
-                            size_t nindptr,
-                            size_t nelem ){
-        static_cast<DMatrix*>(handle)->ParseCSR(indptr, indices, data, nindptr, nelem);
-    }
-    void XGDMatrixParseMat( void *handle, 
-                            const float *data,
-                            size_t nrow,
-                            size_t ncol,
-                            float  missing ){
-      static_cast<DMatrix*>(handle)->ParseMat(data, nrow, ncol, missing);
-    }
-    void XGDMatrixSetLabel( void *handle, const float *label, size_t len ){
-        static_cast<DMatrix*>(handle)->SetLabel(label,len);        
-    }
-    void XGDMatrixSetWeight( void *handle, const float *weight, size_t len ){
-        static_cast<DMatrix*>(handle)->SetWeight(weight,len);        
-    }
-    void XGDMatrixSetGroup( void *handle, const unsigned *group, size_t len ){
-        static_cast<DMatrix*>(handle)->SetGroup(group,len);        
-    }
-    const float* XGDMatrixGetLabel( const void *handle, size_t* len ){
-        return static_cast<const DMatrix*>(handle)->GetLabel(len);
-    }
-    const float* XGDMatrixGetWeight( const void *handle, size_t* len ){
-        return static_cast<const DMatrix*>(handle)->GetWeight(len);
-    }
-    void XGDMatrixClear(void *handle){
-        static_cast<DMatrix*>(handle)->Clear();
-    }
-    void XGDMatrixAddRow( void *handle, const XGEntry *data, size_t len ){
-        static_cast<DMatrix*>(handle)->AddRow(data, len);
-    }
-    size_t XGDMatrixNumRow(const void *handle){
-        return static_cast<const DMatrix*>(handle)->NumRow();
-    }
-    const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len){
-        return static_cast<DMatrix*>(handle)->GetRow(ridx, len);
-    }
-
-    // xgboost implementation
-    void *XGBoosterCreate( void *dmats[], size_t len ){
-        std::vector<xgboost::regrank::DMatrix*> mats;
-        for( size_t i = 0; i < len; ++i ){
-            DMatrix *dtr = static_cast<DMatrix*>(dmats[i]);
-            dtr->CheckInit();
-            mats.push_back( dtr );
-        }
-        return new Booster( mats );
-    }
-    void XGBoosterFree( void *handle ){
-        delete  static_cast<Booster*>(handle);
-    }
-    void XGBoosterSetParam( void *handle, const char *name, const char *value ){
-        static_cast<Booster*>(handle)->SetParam( name, value );
-    }
-    void XGBoosterUpdateOneIter( void *handle, void *dtrain ){
-        Booster *bst = static_cast<Booster*>(handle);
-        DMatrix *dtr = static_cast<DMatrix*>(dtrain);
-        bst->CheckInit(); dtr->CheckInit(); 
-        bst->UpdateOneIter( *dtr );
-    }    
-    void XGBoosterBoostOneIter( void *handle, void *dtrain, 
-                                float *grad, float *hess, size_t len, int bst_group ){
-        Booster *bst = static_cast<Booster*>(handle);
-        DMatrix *dtr = static_cast<DMatrix*>(dtrain);
-        bst->CheckInit(); dtr->CheckInit(); 
-        bst->BoostOneIter( *dtr, grad, hess, len, bst_group );
-    }      
-    void XGBoosterEvalOneIter( void *handle, int iter, void *dmats[], const char *evnames[], size_t len ){
-        Booster *bst = static_cast<Booster*>(handle);
-        bst->CheckInit();
-
-        std::vector<std::string> names;
-        std::vector<const xgboost::regrank::DMatrix*> mats;
-        for( size_t i = 0; i < len; ++i ){
-            mats.push_back( static_cast<DMatrix*>(dmats[i]) );
-            names.push_back( std::string( evnames[i]) );
-        }
-        bst->EvalOneIter( iter, mats, names, stderr );
-    }
-    const float *XGBoosterPredict( void *handle, void *dmat, size_t *len, int bst_group ){
-        return static_cast<Booster*>(handle)->Pred( *static_cast<DMatrix*>(dmat), len, bst_group );
-    }
-    void XGBoosterLoadModel( void *handle, const char *fname ){        
-        static_cast<Booster*>(handle)->LoadModel( fname );        
-    } 
-    void XGBoosterSaveModel( const void *handle, const char *fname ){
-        static_cast<const Booster*>(handle)->SaveModel( fname );
-    }
-    void XGBoosterDumpModel( void *handle, const char *fname, const char *fmap ){
-        using namespace xgboost::utils;
-        FILE *fo = FopenCheck( fname, "w" );
-        FeatMap featmap; 
-        if( strlen(fmap) != 0 ){ 
-            featmap.LoadText( fmap );
-        }
-        static_cast<Booster*>(handle)->DumpModel( fo, featmap, false );
-        fclose( fo );
-    }
-
-    void XGBoosterUpdateInteract( void *handle, void *dtrain, const char *action ){
-        Booster *bst = static_cast<Booster*>(handle);
-        DMatrix *dtr = static_cast<DMatrix*>(dtrain);        
-        bst->CheckInit(); dtr->CheckInit(); 
-        std::string act( action );
-        bst->UpdateInteract( act, *dtr );
-    }
-};
-
--- a/python/xgboost_python.h
+++ b/python/xgboost_python.h
@@ -1,209 +0,0 @@
-#ifndef XGBOOST_PYTHON_H
-#define XGBOOST_PYTHON_H
-/*!
- * \file xgboost_python.h
- * \author Tianqi Chen
- * \brief python wrapper for xgboost, using ctypes, 
- *        hides everything behind functions
- *      use c style interface
- */
-#include "../booster/xgboost_data.h"
-extern "C"{
-    /*! \brief type of row entry */
-    typedef xgboost::booster::FMatrixS::REntry XGEntry;
-    
-    /*! 
-     * \brief create a data matrix 
-     * \return a new data matrix
-     */
-    void* XGDMatrixCreate(void);
-    /*! 
-     * \brief free space in data matrix
-     */
-    void XGDMatrixFree(void *handle);
-    /*! 
-     * \brief load a data matrix from text file or buffer(if exists)
-     * \param handle a instance of data matrix
-     * \param fname file name 
-     * \param silent print statistics when loading
-     */
-    void XGDMatrixLoad(void *handle, const char *fname, int silent);
-    /*!
-     * \brief load a data matrix into binary file
-     * \param handle a instance of data matrix
-     * \param fname file name 
-     * \param silent print statistics when saving
-     */
-    void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
-    /*! 
-     * \brief set matrix content from csr format
-     * \param handle a instance of data matrix
-     * \param indptr pointer to row headers
-     * \param indices findex
-     * \param data    fvalue
-     * \param nindptr number of rows in the matix + 1 
-     * \param nelem number of nonzero elements in the matrix
-     */
-    void XGDMatrixParseCSR( void *handle, 
-                            const size_t *indptr,
-                            const unsigned *indices,
-                            const float *data,
-                            size_t nindptr,
-                            size_t nelem );
-    /*! 
-     * \brief set matrix content from data content
-     * \param handle a instance of data matrix
-     * \param data pointer to the data space
-     * \param nrow number of rows
-     * \param ncol number columns
-     * \param missing which value to represent missing value
-     */
-    void XGDMatrixParseMat( void *handle, 
-                            const float *data,
-                            size_t nrow,
-                            size_t ncol,
-                            float  missing );
-    /*! 
-     * \brief set label of the training matrix
-     * \param handle a instance of data matrix
-     * \param label pointer to label
-     * \param len length of array
-     */    
-    void XGDMatrixSetLabel( void *handle, const float *label, size_t len );        
-    /*! 
-     * \brief set label of the training matrix
-     * \param handle a instance of data matrix
-     * \param group pointer to group size
-     * \param len length of array
-     */    
-    void XGDMatrixSetGroup( void *handle, const unsigned *group, size_t len );        
-    /*! 
-     * \brief set weight of each instacne
-     * \param handle a instance of data matrix
-     * \param weight data pointer to weights
-     * \param len length of array
-     */    
-    void XGDMatrixSetWeight( void *handle, const float *weight, size_t len );        
-    /*! 
-     * \brief get label set from matrix
-     * \param handle a instance of data matrix
-     * \param len used to set result length
-     * \return pointer to the label
-     */
-    const float* XGDMatrixGetLabel( const void *handle, size_t* len );
-    /*! 
-     * \brief get weight set from matrix
-     * \param handle a instance of data matrix
-     * \param len used to set result length
-     * \return pointer to the weight
-     */
-    const float* XGDMatrixGetWeight( const void *handle, size_t* len );
-    /*! 
-     * \brief clear all the records, including feature matrix and label
-     * \param handle a instance of data matrix
-     */
-    void XGDMatrixClear(void *handle);
-    /*! 
-     * \brief return number of rows
-     */    
-    size_t XGDMatrixNumRow(const void *handle);
-    /*! 
-     * \brief add row 
-     * \param handle a instance of data matrix
-     * \param data array of row content 
-     * \param len length of array
-     */
-    void XGDMatrixAddRow(void *handle, const XGEntry *data, size_t len);
-    /*! 
-     * \brief get ridx-th row of sparse matrix
-     * \param handle handle
-     * \param ridx row index 
-     * \param len used to set result length
-     * \reurn pointer to the row
-     */    
-    const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len);
-    
-    // --- start XGBoost class
-    /*! 
-     * \brief create xgboost learner 
-     * \param dmats matrices that are set to be cached
-     * \param create a booster
-     */
-    void *XGBoosterCreate( void* dmats[], size_t len ); 
-    /*! 
-     * \brief free obj in handle 
-     * \param handle handle to be freed
-     */
-    void XGBoosterFree( void* handle ); 
-    /*! 
-     * \brief set parameters 
-     * \param handle handle
-     * \param name  parameter name
-     * \param val value of parameter
-     */    
-    void XGBoosterSetParam( void *handle, const char *name, const char *value );   
-    /*! 
-     * \brief update the model in one round using dtrain
-     * \param handle handle
-     * \param dtrain training data
-     */        
-    void XGBoosterUpdateOneIter( void *handle, void *dtrain );   
-    
-    /*!
-     * \brief update the model, by directly specify gradient and second order gradient, 
-     *        this can be used to replace UpdateOneIter, to support customized loss function
-     * \param handle handle
-     * \param dtrain training data
-     * \param grad gradient statistics
-     * \param hess second order gradient statistics
-     * \param len length of grad/hess array
-     * \param bst_group boost group we are working at, default = -1
-     */
-    void XGBoosterBoostOneIter( void *handle, void *dtrain, 
-                                float *grad, float *hess, size_t len, int bst_group );   
-    /*! 
-     * \brief print evaluation statistics to stdout for xgboost
-     * \param handle handle
-     * \param iter current iteration rounds
-     * \param dmats pointers to data to be evaluated
-     * \param evnames pointers to names of each data
-     * \param len  length of dmats
-     */        
-    void XGBoosterEvalOneIter( void *handle, int iter, void *dmats[], const char *evnames[], size_t len );   
-    /*! 
-     * \brief make prediction based on dmat
-     * \param handle handle
-     * \param dmat data matrix
-     * \param len used to store length of returning result
-     * \param bst_group booster group, if model contains multiple booster group, default = -1 means predict for all groups 
-     */    
-    const float *XGBoosterPredict( void *handle, void *dmat, size_t *len, int bst_group );
-    /*! 
-     * \brief load model from existing file
-     * \param handle handle
-     * \param fname file name
-     */    
-    void XGBoosterLoadModel( void *handle, const char *fname );
-    /*! 
-     * \brief save model into existing file
-     * \param handle handle
-     * \param fname file name
-     */    
-    void XGBoosterSaveModel( const void *handle, const char *fname );
-    /*! 
-     * \brief dump model into text file
-     * \param handle handle
-     * \param fname file name
-     * \param fmap  name to fmap can be empty string
-     */    
-    void XGBoosterDumpModel( void *handle, const char *fname, const char *fmap );
-    /*! 
-     * \brief interactively update model: beta
-     * \param handle handle
-     * \param dtrain training data
-     * \param action action name
-     */        
-    void XGBoosterUpdateInteract( void *handle, void *dtrain, const char* action );   
-};
-#endif
-
--- a/python/xgboost_wrapper.cpp
+++ b/python/xgboost_wrapper.cpp
@@ -0,0 +1,240 @@
+// implementations in ctypes
+#include <cstdio>
+#include <vector>
+#include <string>
+#include <cstring>
+#include <algorithm>
+#include "./xgboost_wrapper.h"
+#include "../src/data.h"
+#include "../src/learner/learner-inl.hpp"
+#include "../src/io/io.h"
+#include "../src/io/simple_dmatrix-inl.hpp"
+
+using namespace xgboost;
+using namespace xgboost::io;
+
+namespace xgboost {
+namespace wrapper {
+// booster wrapper class
+class Booster: public learner::BoostLearner<FMatrixS> {
+ public:
+  explicit Booster(const std::vector<DataMatrix*>& mats) {
+    this->silent = 1;
+    this->SetCacheData(mats);
+  }
+  const float *Pred(const DataMatrix &dmat, size_t *len) {
+    this->Predict(dmat, &this->preds_);
+    *len = this->preds_.size();
+    return &this->preds_[0];
+  }
+  inline void BoostOneIter(const DataMatrix &train,
+                           float *grad, float *hess, size_t len) {
+    this->gpair_.resize(len);
+    const unsigned ndata = static_cast<unsigned>(len);    
+    #pragma omp parallel for schedule(static)
+    for (unsigned j = 0; j < ndata; ++j) {
+      gpair_[j] = bst_gpair(grad[j], hess[j]);
+    }
+    gbm_->DoBoost(gpair_, train.fmat, train.info.root_index);
+  }
+  inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) {
+    model_dump = this->DumpModel(fmap, with_stats);
+    model_dump_cptr.resize(model_dump.size()); 
+    for (size_t i = 0; i < model_dump.size(); ++i) {
+      model_dump_cptr[i] = model_dump[i].c_str();
+    }
+    *len = model_dump.size();
+    return &model_dump_cptr[0];
+  }
+  // temporal fields
+  // temporal data to save evaluation dump
+  std::string eval_str;
+  // temporal space to save model dump
+  std::vector<std::string> model_dump;
+  std::vector<const char*> model_dump_cptr;
+};
+}  // namespace wrapper
+}  // namespace xgboost
+
+using namespace xgboost::wrapper;
+
+extern "C"{
+  void* XGDMatrixCreateFromFile(const char *fname, int silent) {
+    return LoadDataMatrix(fname, silent, false);
+  }
+  void* XGDMatrixCreateFromCSR(const size_t *indptr,
+                               const unsigned *indices,
+                               const float *data,
+                               size_t nindptr,
+                               size_t nelem) {
+    DMatrixSimple *p_mat = new DMatrixSimple();
+    DMatrixSimple &mat = *p_mat; 
+    mat.row_ptr_.resize(nindptr);
+    memcpy(&mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr);
+    mat.row_data_.resize(nelem);
+    for (size_t i = 0; i < nelem; ++ i) {
+      mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
+      mat.info.num_col = std::max(mat.info.num_col,
+                                  static_cast<size_t>(indices[i]+1));
+    }
+    mat.info.num_row = nindptr - 1;
+    return p_mat;
+  }
+  void* XGDMatrixCreateFromMat(const float *data,
+                               size_t nrow,
+                               size_t ncol,
+                               float  missing) {
+    DMatrixSimple *p_mat = new DMatrixSimple();
+    DMatrixSimple &mat = *p_mat;
+    mat.info.num_row = nrow;
+    mat.info.num_col = ncol;
+    for (size_t i = 0; i < nrow; ++i, data += ncol) {
+      size_t nelem = 0;
+      for (size_t j = 0; j < ncol; ++j) {
+        if (data[j] != missing) {
+          mat.row_data_.push_back(SparseBatch::Entry(j, data[j]));
+          ++nelem;
+        }
+      }
+      mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
+    }
+    return p_mat;
+  }
+  void* XGDMatrixSliceDMatrix(void *handle,
+                              const int *idxset,
+                              size_t len) {
+    DMatrixSimple tmp;
+    DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
+    if (dsrc.magic != DMatrixSimple::kMagic) {
+      tmp.CopyFrom(dsrc);
+    }
+    DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ?
+                       *static_cast<DMatrixSimple*>(handle): tmp);
+    DMatrixSimple *p_ret = new DMatrixSimple();
+    DMatrixSimple &ret = *p_ret;
+
+    utils::Check(src.info.group_ptr.size() == 0,
+                 "slice does not support group structure");
+    ret.Clear();
+    ret.info.num_row = len;
+    ret.info.num_col = src.info.num_col;
+
+    utils::IIterator<SparseBatch> *iter = src.fmat.RowIterator();    
+    iter->BeforeFirst();
+    utils::Assert(iter->Next(), "slice");
+    const SparseBatch &batch = iter->Value();
+    for(size_t i = 0; i < len; ++i) {
+      const int ridx = idxset[i];
+      SparseBatch::Inst inst = batch[ridx];
+      utils::Check(ridx < batch.size, "slice index exceed number of rows");
+      ret.row_data_.resize(ret.row_data_.size() + inst.length);      
+      memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
+             sizeof(SparseBatch::Entry) * inst.length);
+      ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
+      if (src.info.labels.size() != 0) {
+        ret.info.labels.push_back(src.info.labels[ridx]);
+      }
+      if (src.info.weights.size() != 0) {
+        ret.info.weights.push_back(src.info.weights[ridx]);
+      }
+      if (src.info.root_index.size() != 0) {
+        ret.info.weights.push_back(src.info.root_index[ridx]);
+      }
+    }
+    return p_ret;
+  }
+  void XGDMatrixFree(void *handle) {
+    delete static_cast<DataMatrix*>(handle);
+  }
+  void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {    
+    SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
+  }  
+  void XGDMatrixSetLabel(void *handle, const float *label, size_t len) {
+    DataMatrix *pmat = static_cast<DataMatrix*>(handle);
+    pmat->info.labels.resize(len);
+    memcpy(&(pmat->info).labels[0], label, sizeof(float) * len);
+  }
+  void XGDMatrixSetWeight(void *handle, const float *weight, size_t len) {
+    DataMatrix *pmat = static_cast<DataMatrix*>(handle);
+    pmat->info.weights.resize(len);
+    memcpy(&(pmat->info).weights[0], weight, sizeof(float) * len);
+  }
+  void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len){
+    DataMatrix *pmat = static_cast<DataMatrix*>(handle);
+    pmat->info.group_ptr.resize(len + 1);
+    pmat->info.group_ptr[0] = 0;
+    for (size_t i = 0; i < len; ++ i) {
+      pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i];
+    }
+  }
+  const float* XGDMatrixGetLabel(const void *handle, size_t* len) {
+    const DataMatrix *pmat = static_cast<const DataMatrix*>(handle);
+    *len = pmat->info.labels.size();
+    return &(pmat->info.labels[0]);
+  }
+  const float* XGDMatrixGetWeight(const void *handle, size_t* len) {
+    const DataMatrix *pmat = static_cast<const DataMatrix*>(handle);
+    *len = pmat->info.weights.size();
+    return &(pmat->info.weights[0]);
+  }
+  size_t XGDMatrixNumRow(const void *handle) {
+    return static_cast<const DataMatrix*>(handle)->info.num_row;
+  }
+
+  // xgboost implementation
+  void *XGBoosterCreate(void *dmats[], size_t len) {
+    std::vector<DataMatrix*> mats;
+    for (size_t i = 0; i < len; ++i) {
+      DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
+      mats.push_back(dtr);
+    }
+    return new Booster(mats);
+  }
+  void XGBoosterFree(void *handle) {
+    delete static_cast<Booster*>(handle);
+  }
+  void XGBoosterSetParam(void *handle, const char *name, const char *value) {
+    static_cast<Booster*>(handle)->SetParam(name, value);
+  }
+  void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) {
+    Booster *bst = static_cast<Booster*>(handle);
+    DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
+    bst->CheckInit(dtr);
+    bst->UpdateOneIter(iter, *dtr);
+  }
+  void XGBoosterBoostOneIter(void *handle, void *dtrain, 
+                             float *grad, float *hess, size_t len) {
+    Booster *bst = static_cast<Booster*>(handle);
+    DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
+    bst->CheckInit(dtr);
+    bst->BoostOneIter(*dtr, grad, hess, len);
+  }
+  const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], const char *evnames[], size_t len) {
+    Booster *bst = static_cast<Booster*>(handle);    
+    std::vector<std::string> names;
+    std::vector<const DataMatrix*> mats;
+    for (size_t i = 0; i < len; ++i) {
+      mats.push_back(static_cast<DataMatrix*>(dmats[i]));
+      names.push_back(std::string(evnames[i]));
+    }
+    bst->eval_str = bst->EvalOneIter(iter, mats, names);
+    return bst->eval_str.c_str();
+  }
+  const float *XGBoosterPredict(void *handle, void *dmat, size_t *len) {
+    return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), len);
+  }
+  void XGBoosterLoadModel(void *handle, const char *fname) {
+    static_cast<Booster*>(handle)->LoadModel(fname);
+  }
+  void XGBoosterSaveModel( const void *handle, const char *fname) {
+    static_cast<const Booster*>(handle)->SaveModel(fname);
+  }
+  const char** XGBoosterDumpModel(void *handle, const char *fmap, size_t *len){
+    using namespace xgboost::utils;
+    FeatMap featmap; 
+    if(strlen(fmap) != 0) {
+      featmap.LoadText(fmap);
+    }
+    return static_cast<Booster*>(handle)->GetModelDump(featmap, false, len);
+  }
+};
--- a/python/xgboost_wrapper.h
+++ b/python/xgboost_wrapper.h
@@ -0,0 +1,182 @@
+#ifndef XGBOOST_WRAPPER_H_
+#define XGBOOST_WRAPPER_H_
+/*!
+ * \file xgboost_wrapperh
+ * \author Tianqi Chen
+ * \brief a C style wrapper of xgboost
+ *  can be used to create wrapper of other languages
+ */
+#include <cstdio>
+
+extern "C" {
+  /*!
+   * \brief load a data matrix 
+   * \return a loaded data matrix
+   */
+  void* XGDMatrixCreateFromFile(const char *fname, int silent);
+  /*! 
+   * \brief create a matrix content from csr format
+   * \param handle a instance of data matrix
+   * \param indptr pointer to row headers
+   * \param indices findex
+   * \param data fvalue
+   * \param nindptr number of rows in the matix + 1 
+   * \param nelem number of nonzero elements in the matrix
+   * \return created dmatrix
+   */
+  void* XGDMatrixCreateFromCSR(const size_t *indptr,
+                               const unsigned *indices,
+                               const float *data,
+                               size_t nindptr,
+                               size_t nelem);
+  /*!
+   * \brief create matrix content from dense matrix
+   * \param handle a instance of data matrix
+   * \param data pointer to the data space
+   * \param nrow number of rows
+   * \param ncol number columns
+   * \param missing which value to represent missing value
+   * \return created dmatrix
+   */
+  void* XGDMatrixCreateFromMat(const float *data,
+                               size_t nrow,
+                               size_t ncol,
+                               float  missing);
+  /*!
+   * \brief create a new dmatrix from sliced content of existing matrix
+   * \param handle instance of data matrix to be sliced
+   * \param idxset index set
+   * \param len length of index set
+   * \return a sliced new matrix
+   */
+  void* XGDMatrixSliceDMatrix(void *handle,
+                              const int *idxset,
+                              size_t len);
+  /*!
+   * \brief free space in data matrix
+   */
+  void XGDMatrixFree(void *handle);
+  /*!
+   * \brief load a data matrix into binary file
+   * \param handle a instance of data matrix
+   * \param fname file name
+   * \param silent print statistics when saving
+   */
+  void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
+  /*!
+   * \brief set label of the training matrix
+   * \param handle a instance of data matrix
+   * \param label pointer to label
+   * \param len length of array
+   */
+  void XGDMatrixSetLabel(void *handle, const float *label, size_t len);
+  /*!
+   * \brief set weight of each instance
+   * \param handle a instance of data matrix
+   * \param weight data pointer to weights
+   * \param len length of array
+   */
+  void XGDMatrixSetWeight(void *handle, const float *weight, size_t len);
+  /*!
+   * \brief set label of the training matrix
+   * \param handle a instance of data matrix
+   * \param group pointer to group size
+   * \param len length of array
+   */
+  void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len);
+  /*!
+   * \brief get label set from matrix
+   * \param handle a instance of data matrix
+   * \param len used to set result length
+   * \return pointer to the label
+   */
+  const float* XGDMatrixGetLabel(const void *handle, size_t* out_len);
+  /*!
+   * \brief get weight set from matrix
+   * \param handle a instance of data matrix
+   * \param len used to set result length
+   * \return pointer to the weight
+   */
+  const float* XGDMatrixGetWeight(const void *handle, size_t* out_len);
+  /*!
+   * \brief return number of rows
+   */
+  size_t XGDMatrixNumRow(const void *handle);
+  // --- start XGBoost class
+  /*! 
+   * \brief create xgboost learner 
+   * \param dmats matrices that are set to be cached
+   * \param len length of dmats
+   */
+  void *XGBoosterCreate(void* dmats[], size_t len);
+  /*! 
+   * \brief free obj in handle 
+   * \param handle handle to be freed
+   */
+  void XGBoosterFree(void* handle);
+  /*! 
+   * \brief set parameters 
+   * \param handle handle
+   * \param name  parameter name
+   * \param val value of parameter
+   */    
+  void XGBoosterSetParam(void *handle, const char *name, const char *value);
+  /*! 
+   * \brief update the model in one round using dtrain
+   * \param handle handle
+   * \param iter current iteration rounds
+   * \param dtrain training data
+   */        
+  void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
+  /*!
+   * \brief update the model, by directly specify gradient and second order gradient,
+   *        this can be used to replace UpdateOneIter, to support customized loss function
+   * \param handle handle
+   * \param dtrain training data
+   * \param grad gradient statistics
+   * \param hess second order gradient statistics
+   * \param len length of grad/hess array
+   */
+  void XGBoosterBoostOneIter(void *handle, void *dtrain,
+                             float *grad, float *hess, size_t len);
+  /*! 
+   * \brief get evaluation statistics for xgboost
+   * \param handle handle
+   * \param iter current iteration rounds
+   * \param dmats pointers to data to be evaluated
+   * \param evnames pointers to names of each data
+   * \param len length of dmats
+   * \return the string containing evaluation stati
+   */        
+  const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
+                                   const char *evnames[], size_t len);
+  /*!
+   * \brief make prediction based on dmat
+   * \param handle handle
+   * \param dmat data matrix
+   * \param len used to store length of returning result
+   */
+  const float *XGBoosterPredict(void *handle, void *dmat, size_t *len);
+  /*!
+   * \brief load model from existing file
+   * \param handle handle
+   * \param fname file name
+   */
+  void XGBoosterLoadModel(void *handle, const char *fname);
+  /*!
+   * \brief save model into existing file
+   * \param handle handle
+   * \param fname file name
+   */
+  void XGBoosterSaveModel(const void *handle, const char *fname);
+  /*!
+   * \brief dump model, return array of strings representing model dump
+   * \param handle handle
+   * \param fmap  name to fmap can be empty string
+   * \param out_len length of output array
+   * \return char *data[], representing dump of each model
+   */
+  const char** XGBoosterDumpModel(void *handle, const char *fmap,
+                                  size_t *out_len);
+};
+#endif  // XGBOOST_WRAPPER_H_