finish matrix

This commit is contained in:
tqchen 2014-05-03 17:12:25 -07:00
parent 5bab27cfa6
commit 20de7f8f97
3 changed files with 124 additions and 28 deletions

View File

@ -1,15 +1,20 @@
# module for xgboost
import ctypes
import numpy
# optinally have scipy sparse, though not necessary
import scipy.sparse as scp
# load in xgboost library
xglib = ctypes.cdll.LoadLibrary('./libxgboostpy.so')
# entry type of sparse matrix
class REntry(ctypes.Structure):
_fields_ = [("findex", ctypes.c_uint), ("fvalue", ctypes.c_float) ]
# load in xgboost library
xglib = ctypes.cdll.LoadLibrary('./libxgboostpy.so')
xglib.XGDMatrixCreate.restype = ctypes.c_void_p
xglib.XGDMatrixNumRow.restype = ctypes.c_ulong
xglib.XGDMatrixGetLabel.restype = ctypes.POINTER( ctypes.c_float )
xglib.XGDMatrixGetRow.restype = ctypes.POINTER( REntry )
# data matrix used in xgboost
class DMatrix:
# constructor
@ -40,27 +45,37 @@ class DMatrix:
len(csr.indptr), len(csr.data) )
# destructor
def __del__(self):
xglib.XGDMatrixFree(self.handle)
xglib.XGDMatrixFree(self.handle)
# load data from file
def load(self, fname):
xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(fname), 1)
def load(self, fname, silent=True):
xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(fname), int(silent))
# load data from file
def save_binary(self, fname, silent=True):
xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname), int(silent))
# set label of dmatrix
def set_label(self, label):
xglib.XGDMatrixSetLabel(self.handle, (ctypes.c_float*len(label))(*label), len(label) );
# get label from dmatrix
def get_label(self):
GetLabel = xglib.XGDMatrixGetLabel
GetLabel.restype = ctypes.POINTER( ctypes.c_float )
length = ctypes.c_ulong()
labels = GetLabel(self.handle, ctypes.byref(length));
labels = xglib.XGDMatrixGetLabel(self.handle, ctypes.byref(length));
return [ labels[i] for i in xrange(length.value) ]
# clear everything
def clear(self):
xglib.XGDMatrixClear(self.handle)
def num_row(self):
return xglib.XGDMatrixNumRow(self.handle)
# append a row to DMatrix
def add_row(self, row):
xglib.XGDMatrixAddRow(self.handle, (REntry*len(row))(*row), len(row) );
def add_row(self, row, label):
xglib.XGDMatrixAddRow(self.handle, (REntry*len(row))(*row), len(row), label )
# get n-throw from DMatrix
def __getitem__(self, ridx):
length = ctypes.c_ulong()
row = xglib.XGDMatrixGetRow(self.handle, ridx, ctypes.byref(length) );
return [ (int(row[i].findex),row[i].fvalue) for i in xrange(length.value) ]
mat = DMatrix('xx.buffer')
lb = mat.get_label()
print len(lb)
mat.set_label(lb)
mat.add_row( [(1,2), (3,4)] )
print mat.num_row()
mat.clear()

View File

@ -18,12 +18,27 @@ namespace xgboost{
this->CacheLoad(fname, silent);
init_col_ = this->data.HaveColAccess();
}
inline void Clear( void ){
this->data.Clear();
this->info.labels.clear();
this->info.weights.clear();
this->info.group_ptr.clear();
}
inline size_t NumRow( void ) const{
return this->data.NumRow();
}
inline void AddRow( const XGEntry *data, size_t len ){
xgboost::booster::FMatrixS &mat = this->data;
mat.row_data_.resize( mat.row_ptr_.back() + len );
memcpy( &mat.row_data_[mat.row_ptr_.back()], data, sizeof(XGEntry)*len );
mat.row_ptr_.push_back( mat.row_ptr_.back() + len );
}
inline const XGEntry* GetRow(unsigned ridx, size_t* len) const{
const xgboost::booster::FMatrixS &mat = this->data;
*len = mat.row_ptr_[ridx+1] - mat.row_ptr_[ridx];
return &mat.row_data_[ mat.row_ptr_[ridx] ];
}
inline void ParseCSR( const size_t *indptr,
const unsigned *indices,
const float *data,
@ -68,9 +83,6 @@ extern "C"{
void XGDMatrixSaveBinary( void *handle, const char *fname, int silent ){
static_cast<DMatrix*>(handle)->SaveBinary(fname, silent!=0);
}
void XGDMatrixAddRow( void *handle, const XGEntry *data, size_t len ){
static_cast<DMatrix*>(handle)->AddRow(data, len);
}
void XGDMatrixParseCSR( void *handle,
const size_t *indptr,
const unsigned *indices,
@ -85,5 +97,17 @@ extern "C"{
const float* XGDMatrixGetLabel( const void *handle, size_t* len ){
return static_cast<const DMatrix*>(handle)->GetLabel(len);
}
void XGDMatrixClear(void *handle){
static_cast<DMatrix*>(handle)->Clear();
}
void XGDMatrixAddRow( void *handle, const XGEntry *data, size_t len ){
static_cast<DMatrix*>(handle)->AddRow(data, len);
}
size_t XGDMatrixNumRow(const void *handle){
return static_cast<const DMatrix*>(handle)->NumRow();
}
const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len){
return static_cast<DMatrix*>(handle)->GetRow(ridx, len);
}
};

View File

@ -60,8 +60,18 @@ extern "C"{
* \brief get label set from matrix
* \param handle a instance of data matrix
* \param len used to set result length
* \return pointer to the row
*/
const float* XGDMatrixGetLabel( const void *handle, size_t* len );
/*!
* \brief clear all the records, including feature matrix and label
* \param handle a instance of data matrix
*/
void XGDMatrixClear(void *handle);
/*!
* \brief return number of rows
*/
size_t XGDMatrixNumRow(const void *handle);
/*!
* \brief add row
* \param handle a instance of data matrix
@ -70,15 +80,62 @@ extern "C"{
*/
void XGDMatrixAddRow(void *handle, const XGEntry *data, size_t len);
/*!
* \brief create a booster
*/
void* XGBoostCreate(void);
/*!
* \brief create a booster
*/
void* XGBoost(void);
* \brief get ridx-th row of sparse matrix
* \param handle handle
* \param ridx row index
* \param len used to set result length
* \reurn pointer to the row
*/
const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len);
// --- start XGBoost class
/*!
* \brief create xgboost learner
* \param dmats matrices that are set to be cached
* \param create a booster
*/
void *CreateXGBooster( void**dmats, size_t len );
/*!
* \brief set parameters
* \param handle handle
* \param name parameter name
* \param val value of parameter
*/
void XGBoosterSetParam( void *handle, const char *name, const char *value );
/*!
* \brief update the model in one round using dtrain
* \param handle handle
* \param dtrain training data
*/
void XGBoosterUpdateOneIter( void *handle, void *dtrain );
/*!
* \brief print evaluation statistics to stdout for xgboost
* \param handle handle
* \param iter current iteration rounds
* \param dmats pointers to data to be evaluated
* \param evnames pointers to names of each data
* \param len length of dmats
*/
void XGBoosterEvalOneIter( void *handle, int iter, void *dmats[], const char *evnames[], size_t len );
/*!
* \brief make prediction based on dmat
* \param handle handle
* \param dmat data matrix
* \param len used to store length of returning result
*/
const float *XGBoosterPredict( void *handle, void *dmat, size_t *len );
/*!
* \brief load model from existing file
* \param handle handle
* \param fname file name
*/
void XGBoosterLoadModel( void *handle, const char *fname );
/*!
* \brief save model into existing file
* \param handle handle
* \param fname file name
*/
void XGBoosterSaveModel( void *handle, const char *fname );
};
#endif