diff --git a/python/xgboost.py b/python/xgboost.py index 63326b0f5..8f64d5b0d 100644 --- a/python/xgboost.py +++ b/python/xgboost.py @@ -1,15 +1,20 @@ # module for xgboost import ctypes -import numpy +# optinally have scipy sparse, though not necessary import scipy.sparse as scp -# load in xgboost library -xglib = ctypes.cdll.LoadLibrary('./libxgboostpy.so') - # entry type of sparse matrix class REntry(ctypes.Structure): _fields_ = [("findex", ctypes.c_uint), ("fvalue", ctypes.c_float) ] +# load in xgboost library +xglib = ctypes.cdll.LoadLibrary('./libxgboostpy.so') + +xglib.XGDMatrixCreate.restype = ctypes.c_void_p +xglib.XGDMatrixNumRow.restype = ctypes.c_ulong +xglib.XGDMatrixGetLabel.restype = ctypes.POINTER( ctypes.c_float ) +xglib.XGDMatrixGetRow.restype = ctypes.POINTER( REntry ) + # data matrix used in xgboost class DMatrix: # constructor @@ -40,27 +45,37 @@ class DMatrix: len(csr.indptr), len(csr.data) ) # destructor def __del__(self): - xglib.XGDMatrixFree(self.handle) + xglib.XGDMatrixFree(self.handle) # load data from file - def load(self, fname): - xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(fname), 1) + def load(self, fname, silent=True): + xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(fname), int(silent)) + # load data from file + def save_binary(self, fname, silent=True): + xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname), int(silent)) # set label of dmatrix def set_label(self, label): xglib.XGDMatrixSetLabel(self.handle, (ctypes.c_float*len(label))(*label), len(label) ); # get label from dmatrix def get_label(self): - GetLabel = xglib.XGDMatrixGetLabel - GetLabel.restype = ctypes.POINTER( ctypes.c_float ) length = ctypes.c_ulong() - labels = GetLabel(self.handle, ctypes.byref(length)); + labels = xglib.XGDMatrixGetLabel(self.handle, ctypes.byref(length)); return [ labels[i] for i in xrange(length.value) ] + # clear everything + def clear(self): + xglib.XGDMatrixClear(self.handle) + def num_row(self): + return xglib.XGDMatrixNumRow(self.handle) # append a row to DMatrix - def add_row(self, row): - xglib.XGDMatrixAddRow(self.handle, (REntry*len(row))(*row), len(row) ); - + def add_row(self, row, label): + xglib.XGDMatrixAddRow(self.handle, (REntry*len(row))(*row), len(row), label ) + # get n-throw from DMatrix + def __getitem__(self, ridx): + length = ctypes.c_ulong() + row = xglib.XGDMatrixGetRow(self.handle, ridx, ctypes.byref(length) ); + return [ (int(row[i].findex),row[i].fvalue) for i in xrange(length.value) ] + + mat = DMatrix('xx.buffer') -lb = mat.get_label() -print len(lb) -mat.set_label(lb) -mat.add_row( [(1,2), (3,4)] ) +print mat.num_row() +mat.clear() diff --git a/python/xgboost_python.cpp b/python/xgboost_python.cpp index 71caff1d0..8fb664417 100644 --- a/python/xgboost_python.cpp +++ b/python/xgboost_python.cpp @@ -18,12 +18,27 @@ namespace xgboost{ this->CacheLoad(fname, silent); init_col_ = this->data.HaveColAccess(); } + inline void Clear( void ){ + this->data.Clear(); + this->info.labels.clear(); + this->info.weights.clear(); + this->info.group_ptr.clear(); + } + inline size_t NumRow( void ) const{ + return this->data.NumRow(); + } inline void AddRow( const XGEntry *data, size_t len ){ xgboost::booster::FMatrixS &mat = this->data; mat.row_data_.resize( mat.row_ptr_.back() + len ); memcpy( &mat.row_data_[mat.row_ptr_.back()], data, sizeof(XGEntry)*len ); mat.row_ptr_.push_back( mat.row_ptr_.back() + len ); } + inline const XGEntry* GetRow(unsigned ridx, size_t* len) const{ + const xgboost::booster::FMatrixS &mat = this->data; + + *len = mat.row_ptr_[ridx+1] - mat.row_ptr_[ridx]; + return &mat.row_data_[ mat.row_ptr_[ridx] ]; + } inline void ParseCSR( const size_t *indptr, const unsigned *indices, const float *data, @@ -68,9 +83,6 @@ extern "C"{ void XGDMatrixSaveBinary( void *handle, const char *fname, int silent ){ static_cast(handle)->SaveBinary(fname, silent!=0); } - void XGDMatrixAddRow( void *handle, const XGEntry *data, size_t len ){ - static_cast(handle)->AddRow(data, len); - } void XGDMatrixParseCSR( void *handle, const size_t *indptr, const unsigned *indices, @@ -85,5 +97,17 @@ extern "C"{ const float* XGDMatrixGetLabel( const void *handle, size_t* len ){ return static_cast(handle)->GetLabel(len); } + void XGDMatrixClear(void *handle){ + static_cast(handle)->Clear(); + } + void XGDMatrixAddRow( void *handle, const XGEntry *data, size_t len ){ + static_cast(handle)->AddRow(data, len); + } + size_t XGDMatrixNumRow(const void *handle){ + return static_cast(handle)->NumRow(); + } + const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len){ + return static_cast(handle)->GetRow(ridx, len); + } }; diff --git a/python/xgboost_python.h b/python/xgboost_python.h index 2869c7aeb..313b4d817 100644 --- a/python/xgboost_python.h +++ b/python/xgboost_python.h @@ -60,8 +60,18 @@ extern "C"{ * \brief get label set from matrix * \param handle a instance of data matrix * \param len used to set result length + * \return pointer to the row */ const float* XGDMatrixGetLabel( const void *handle, size_t* len ); + /*! + * \brief clear all the records, including feature matrix and label + * \param handle a instance of data matrix + */ + void XGDMatrixClear(void *handle); + /*! + * \brief return number of rows + */ + size_t XGDMatrixNumRow(const void *handle); /*! * \brief add row * \param handle a instance of data matrix @@ -70,15 +80,62 @@ extern "C"{ */ void XGDMatrixAddRow(void *handle, const XGEntry *data, size_t len); /*! - * \brief create a booster - */ - void* XGBoostCreate(void); - - /*! - * \brief create a booster - */ - void* XGBoost(void); + * \brief get ridx-th row of sparse matrix + * \param handle handle + * \param ridx row index + * \param len used to set result length + * \reurn pointer to the row + */ + const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len); + // --- start XGBoost class + /*! + * \brief create xgboost learner + * \param dmats matrices that are set to be cached + * \param create a booster + */ + void *CreateXGBooster( void**dmats, size_t len ); + /*! + * \brief set parameters + * \param handle handle + * \param name parameter name + * \param val value of parameter + */ + void XGBoosterSetParam( void *handle, const char *name, const char *value ); + /*! + * \brief update the model in one round using dtrain + * \param handle handle + * \param dtrain training data + */ + void XGBoosterUpdateOneIter( void *handle, void *dtrain ); + /*! + * \brief print evaluation statistics to stdout for xgboost + * \param handle handle + * \param iter current iteration rounds + * \param dmats pointers to data to be evaluated + * \param evnames pointers to names of each data + * \param len length of dmats + */ + void XGBoosterEvalOneIter( void *handle, int iter, void *dmats[], const char *evnames[], size_t len ); + /*! + * \brief make prediction based on dmat + * \param handle handle + * \param dmat data matrix + * \param len used to store length of returning result + */ + const float *XGBoosterPredict( void *handle, void *dmat, size_t *len ); + /*! + * \brief load model from existing file + * \param handle handle + * \param fname file name + */ + void XGBoosterLoadModel( void *handle, const char *fname ); + /*! + * \brief save model into existing file + * \param handle handle + * \param fname file name + */ + void XGBoosterSaveModel( void *handle, const char *fname ); }; #endif