fix numpy convert

This commit is contained in:
tqchen 2014-05-15 20:28:34 -07:00
parent a7f3d7edd7
commit 2be3f6ece0
5 changed files with 64 additions and 6 deletions

View File

@ -1,4 +1,4 @@
Copyright (c) 2014 Tianqi Chen Copyright (c) 2014 by Tianqi Chen and Contributors
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.

View File

@ -68,7 +68,9 @@ for l in open('agaricus.txt.train'):
i += 1 i += 1
csr = scipy.sparse.csr_matrix( (dat, (row,col)) ) csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
print 'haha'
dtrain = xgb.DMatrix( csr ) dtrain = xgb.DMatrix( csr )
print 'set label'
dtrain.set_label(labels) dtrain.set_label(labels)
evallist = [(dtest,'eval'), (dtrain,'train')] evallist = [(dtest,'eval'), (dtrain,'train')]
bst = xgb.train( param, dtrain, num_round, evallist ) bst = xgb.train( param, dtrain, num_round, evallist )

View File

@ -33,15 +33,16 @@ def ctypes2numpy( cptr, length ):
# data matrix used in xgboost # data matrix used in xgboost
class DMatrix: class DMatrix:
# constructor # constructor
def __init__(self, data=None, label=None): def __init__(self, data=None, label=None, missing=0.0):
self.handle = xglib.XGDMatrixCreate() self.handle = xglib.XGDMatrixCreate()
if data == None: if data == None:
return return
if isinstance(data,str): if isinstance(data,str):
xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data), 1) xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data), 1)
elif isinstance(data,scp.csr_matrix): elif isinstance(data,scp.csr_matrix):
self.__init_from_csr(data) self.__init_from_csr(data)
elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
self.__init_from_npy2d(data, missing)
else: else:
try: try:
csr = scp.csr_matrix(data) csr = scp.csr_matrix(data)
@ -59,6 +60,12 @@ class DMatrix:
( ctypes.c_uint * len(csr.indices) )(*csr.indices), ( ctypes.c_uint * len(csr.indices) )(*csr.indices),
( ctypes.c_float * len(csr.data) )(*csr.data), ( ctypes.c_float * len(csr.data) )(*csr.data),
len(csr.indptr), len(csr.data) ) len(csr.indptr), len(csr.data) )
# convert data from numpy matrix
def __init_from_npy2d(self,mat,missing):
data = numpy.array( mat.reshape(mat.size), dtype='float32' )
xglib.XGDMatrixParseMat( self.handle,
data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
mat.shape[0], mat.shape[1], ctypes.c_float(missing) )
# destructor # destructor
def __del__(self): def __del__(self):
xglib.XGDMatrixFree(self.handle) xglib.XGDMatrixFree(self.handle)
@ -103,7 +110,7 @@ class DMatrix:
class Booster: class Booster:
"""learner class """ """learner class """
def __init__(self, params, cache=[]): def __init__(self, params={}, cache=[]):
""" constructor, param: """ """ constructor, param: """
for d in cache: for d in cache:
assert isinstance(d,DMatrix) assert isinstance(d,DMatrix)

View File

@ -52,6 +52,28 @@ namespace xgboost{
for( size_t i = 0; i < nelem; ++ i ){ for( size_t i = 0; i < nelem; ++ i ){
mat.row_data_[i] = XGEntry(indices[i], data[i]); mat.row_data_[i] = XGEntry(indices[i], data[i]);
} }
this->data.InitData();
this->init_col_ = true;
}
inline void ParseMat( const float *data,
size_t nrow,
size_t ncol,
float missing ){
xgboost::booster::FMatrixS &mat = this->data;
mat.Clear();
for( size_t i = 0; i < nrow; ++i, data += ncol ){
size_t nelem = 0;
for( size_t j = 0; j < ncol; ++j ){
if( data[j] != missing ){
mat.row_data_.push_back( XGEntry(j, data[j]) );
++ nelem;
}
}
mat.row_ptr_.push_back( mat.row_ptr_.back() + nelem );
}
this->data.InitData();
this->init_col_ = true;
} }
inline void SetLabel( const float *label, size_t len ){ inline void SetLabel( const float *label, size_t len ){
this->info.labels.resize( len ); this->info.labels.resize( len );
@ -163,6 +185,13 @@ extern "C"{
size_t nelem ){ size_t nelem ){
static_cast<DMatrix*>(handle)->ParseCSR(indptr, indices, data, nindptr, nelem); static_cast<DMatrix*>(handle)->ParseCSR(indptr, indices, data, nindptr, nelem);
} }
void XGDMatrixParseMat( void *handle,
const float *data,
size_t nrow,
size_t ncol,
float missing ){
static_cast<DMatrix*>(handle)->ParseMat(data, nrow, ncol, missing);
}
void XGDMatrixSetLabel( void *handle, const float *label, size_t len ){ void XGDMatrixSetLabel( void *handle, const float *label, size_t len ){
static_cast<DMatrix*>(handle)->SetLabel(label,len); static_cast<DMatrix*>(handle)->SetLabel(label,len);
} }

View File

@ -49,6 +49,19 @@ extern "C"{
const float *data, const float *data,
size_t nindptr, size_t nindptr,
size_t nelem ); size_t nelem );
/*!
* \brief set matrix content from data content
* \param handle a instance of data matrix
* \param data pointer to the data space
* \param nrow number of rows
* \param ncol number columns
* \param missing which value to represent missing value
*/
void XGDMatrixParseMat( void *handle,
const float *data,
size_t nrow,
size_t ncol,
float missing );
/*! /*!
* \brief set label of the training matrix * \brief set label of the training matrix
* \param handle a instance of data matrix * \param handle a instance of data matrix
@ -74,9 +87,16 @@ extern "C"{
* \brief get label set from matrix * \brief get label set from matrix
* \param handle a instance of data matrix * \param handle a instance of data matrix
* \param len used to set result length * \param len used to set result length
* \return pointer to the row * \return pointer to the label
*/ */
const float* XGDMatrixGetLabel( const void *handle, size_t* len ); const float* XGDMatrixGetLabel( const void *handle, size_t* len );
/*!
* \brief get weight set from matrix
* \param handle a instance of data matrix
* \param len used to set result length
* \return pointer to the weight
*/
const float* XGDMatrixGetWeight( const void *handle, size_t* len );
/*! /*!
* \brief clear all the records, including feature matrix and label * \brief clear all the records, including feature matrix and label
* \param handle a instance of data matrix * \param handle a instance of data matrix