fix numpy convert
This commit is contained in:
parent
a7f3d7edd7
commit
2be3f6ece0
2
LICENSE
2
LICENSE
@ -1,4 +1,4 @@
|
||||
Copyright (c) 2014 Tianqi Chen
|
||||
Copyright (c) 2014 by Tianqi Chen and Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
@ -68,7 +68,9 @@ for l in open('agaricus.txt.train'):
|
||||
i += 1
|
||||
|
||||
csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
|
||||
print 'haha'
|
||||
dtrain = xgb.DMatrix( csr )
|
||||
print 'set label'
|
||||
dtrain.set_label(labels)
|
||||
evallist = [(dtest,'eval'), (dtrain,'train')]
|
||||
bst = xgb.train( param, dtrain, num_round, evallist )
|
||||
|
||||
@ -33,15 +33,16 @@ def ctypes2numpy( cptr, length ):
|
||||
# data matrix used in xgboost
|
||||
class DMatrix:
|
||||
# constructor
|
||||
def __init__(self, data=None, label=None):
|
||||
def __init__(self, data=None, label=None, missing=0.0):
|
||||
self.handle = xglib.XGDMatrixCreate()
|
||||
if data == None:
|
||||
return
|
||||
if isinstance(data,str):
|
||||
xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data), 1)
|
||||
|
||||
elif isinstance(data,scp.csr_matrix):
|
||||
self.__init_from_csr(data)
|
||||
elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
|
||||
self.__init_from_npy2d(data, missing)
|
||||
else:
|
||||
try:
|
||||
csr = scp.csr_matrix(data)
|
||||
@ -59,6 +60,12 @@ class DMatrix:
|
||||
( ctypes.c_uint * len(csr.indices) )(*csr.indices),
|
||||
( ctypes.c_float * len(csr.data) )(*csr.data),
|
||||
len(csr.indptr), len(csr.data) )
|
||||
# convert data from numpy matrix
|
||||
def __init_from_npy2d(self,mat,missing):
|
||||
data = numpy.array( mat.reshape(mat.size), dtype='float32' )
|
||||
xglib.XGDMatrixParseMat( self.handle,
|
||||
data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
|
||||
mat.shape[0], mat.shape[1], ctypes.c_float(missing) )
|
||||
# destructor
|
||||
def __del__(self):
|
||||
xglib.XGDMatrixFree(self.handle)
|
||||
@ -103,7 +110,7 @@ class DMatrix:
|
||||
|
||||
class Booster:
|
||||
"""learner class """
|
||||
def __init__(self, params, cache=[]):
|
||||
def __init__(self, params={}, cache=[]):
|
||||
""" constructor, param: """
|
||||
for d in cache:
|
||||
assert isinstance(d,DMatrix)
|
||||
|
||||
@ -52,6 +52,28 @@ namespace xgboost{
|
||||
for( size_t i = 0; i < nelem; ++ i ){
|
||||
mat.row_data_[i] = XGEntry(indices[i], data[i]);
|
||||
}
|
||||
this->data.InitData();
|
||||
this->init_col_ = true;
|
||||
}
|
||||
|
||||
inline void ParseMat( const float *data,
|
||||
size_t nrow,
|
||||
size_t ncol,
|
||||
float missing ){
|
||||
xgboost::booster::FMatrixS &mat = this->data;
|
||||
mat.Clear();
|
||||
for( size_t i = 0; i < nrow; ++i, data += ncol ){
|
||||
size_t nelem = 0;
|
||||
for( size_t j = 0; j < ncol; ++j ){
|
||||
if( data[j] != missing ){
|
||||
mat.row_data_.push_back( XGEntry(j, data[j]) );
|
||||
++ nelem;
|
||||
}
|
||||
}
|
||||
mat.row_ptr_.push_back( mat.row_ptr_.back() + nelem );
|
||||
}
|
||||
this->data.InitData();
|
||||
this->init_col_ = true;
|
||||
}
|
||||
inline void SetLabel( const float *label, size_t len ){
|
||||
this->info.labels.resize( len );
|
||||
@ -163,6 +185,13 @@ extern "C"{
|
||||
size_t nelem ){
|
||||
static_cast<DMatrix*>(handle)->ParseCSR(indptr, indices, data, nindptr, nelem);
|
||||
}
|
||||
void XGDMatrixParseMat( void *handle,
|
||||
const float *data,
|
||||
size_t nrow,
|
||||
size_t ncol,
|
||||
float missing ){
|
||||
static_cast<DMatrix*>(handle)->ParseMat(data, nrow, ncol, missing);
|
||||
}
|
||||
void XGDMatrixSetLabel( void *handle, const float *label, size_t len ){
|
||||
static_cast<DMatrix*>(handle)->SetLabel(label,len);
|
||||
}
|
||||
|
||||
@ -49,6 +49,19 @@ extern "C"{
|
||||
const float *data,
|
||||
size_t nindptr,
|
||||
size_t nelem );
|
||||
/*!
|
||||
* \brief set matrix content from data content
|
||||
* \param handle a instance of data matrix
|
||||
* \param data pointer to the data space
|
||||
* \param nrow number of rows
|
||||
* \param ncol number columns
|
||||
* \param missing which value to represent missing value
|
||||
*/
|
||||
void XGDMatrixParseMat( void *handle,
|
||||
const float *data,
|
||||
size_t nrow,
|
||||
size_t ncol,
|
||||
float missing );
|
||||
/*!
|
||||
* \brief set label of the training matrix
|
||||
* \param handle a instance of data matrix
|
||||
@ -74,9 +87,16 @@ extern "C"{
|
||||
* \brief get label set from matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param len used to set result length
|
||||
* \return pointer to the row
|
||||
* \return pointer to the label
|
||||
*/
|
||||
const float* XGDMatrixGetLabel( const void *handle, size_t* len );
|
||||
/*!
|
||||
* \brief get weight set from matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param len used to set result length
|
||||
* \return pointer to the weight
|
||||
*/
|
||||
const float* XGDMatrixGetWeight( const void *handle, size_t* len );
|
||||
/*!
|
||||
* \brief clear all the records, including feature matrix and label
|
||||
* \param handle a instance of data matrix
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user