fix numpy convert
This commit is contained in:
parent
a7f3d7edd7
commit
2be3f6ece0
2
LICENSE
2
LICENSE
@ -1,4 +1,4 @@
|
|||||||
Copyright (c) 2014 Tianqi Chen
|
Copyright (c) 2014 by Tianqi Chen and Contributors
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
|
|||||||
@ -68,7 +68,9 @@ for l in open('agaricus.txt.train'):
|
|||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
|
csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
|
||||||
|
print 'haha'
|
||||||
dtrain = xgb.DMatrix( csr )
|
dtrain = xgb.DMatrix( csr )
|
||||||
|
print 'set label'
|
||||||
dtrain.set_label(labels)
|
dtrain.set_label(labels)
|
||||||
evallist = [(dtest,'eval'), (dtrain,'train')]
|
evallist = [(dtest,'eval'), (dtrain,'train')]
|
||||||
bst = xgb.train( param, dtrain, num_round, evallist )
|
bst = xgb.train( param, dtrain, num_round, evallist )
|
||||||
|
|||||||
@ -33,15 +33,16 @@ def ctypes2numpy( cptr, length ):
|
|||||||
# data matrix used in xgboost
|
# data matrix used in xgboost
|
||||||
class DMatrix:
|
class DMatrix:
|
||||||
# constructor
|
# constructor
|
||||||
def __init__(self, data=None, label=None):
|
def __init__(self, data=None, label=None, missing=0.0):
|
||||||
self.handle = xglib.XGDMatrixCreate()
|
self.handle = xglib.XGDMatrixCreate()
|
||||||
if data == None:
|
if data == None:
|
||||||
return
|
return
|
||||||
if isinstance(data,str):
|
if isinstance(data,str):
|
||||||
xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data), 1)
|
xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data), 1)
|
||||||
|
|
||||||
elif isinstance(data,scp.csr_matrix):
|
elif isinstance(data,scp.csr_matrix):
|
||||||
self.__init_from_csr(data)
|
self.__init_from_csr(data)
|
||||||
|
elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
|
||||||
|
self.__init_from_npy2d(data, missing)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
csr = scp.csr_matrix(data)
|
csr = scp.csr_matrix(data)
|
||||||
@ -59,6 +60,12 @@ class DMatrix:
|
|||||||
( ctypes.c_uint * len(csr.indices) )(*csr.indices),
|
( ctypes.c_uint * len(csr.indices) )(*csr.indices),
|
||||||
( ctypes.c_float * len(csr.data) )(*csr.data),
|
( ctypes.c_float * len(csr.data) )(*csr.data),
|
||||||
len(csr.indptr), len(csr.data) )
|
len(csr.indptr), len(csr.data) )
|
||||||
|
# convert data from numpy matrix
|
||||||
|
def __init_from_npy2d(self,mat,missing):
|
||||||
|
data = numpy.array( mat.reshape(mat.size), dtype='float32' )
|
||||||
|
xglib.XGDMatrixParseMat( self.handle,
|
||||||
|
data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
|
||||||
|
mat.shape[0], mat.shape[1], ctypes.c_float(missing) )
|
||||||
# destructor
|
# destructor
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
xglib.XGDMatrixFree(self.handle)
|
xglib.XGDMatrixFree(self.handle)
|
||||||
@ -103,7 +110,7 @@ class DMatrix:
|
|||||||
|
|
||||||
class Booster:
|
class Booster:
|
||||||
"""learner class """
|
"""learner class """
|
||||||
def __init__(self, params, cache=[]):
|
def __init__(self, params={}, cache=[]):
|
||||||
""" constructor, param: """
|
""" constructor, param: """
|
||||||
for d in cache:
|
for d in cache:
|
||||||
assert isinstance(d,DMatrix)
|
assert isinstance(d,DMatrix)
|
||||||
|
|||||||
@ -52,6 +52,28 @@ namespace xgboost{
|
|||||||
for( size_t i = 0; i < nelem; ++ i ){
|
for( size_t i = 0; i < nelem; ++ i ){
|
||||||
mat.row_data_[i] = XGEntry(indices[i], data[i]);
|
mat.row_data_[i] = XGEntry(indices[i], data[i]);
|
||||||
}
|
}
|
||||||
|
this->data.InitData();
|
||||||
|
this->init_col_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void ParseMat( const float *data,
|
||||||
|
size_t nrow,
|
||||||
|
size_t ncol,
|
||||||
|
float missing ){
|
||||||
|
xgboost::booster::FMatrixS &mat = this->data;
|
||||||
|
mat.Clear();
|
||||||
|
for( size_t i = 0; i < nrow; ++i, data += ncol ){
|
||||||
|
size_t nelem = 0;
|
||||||
|
for( size_t j = 0; j < ncol; ++j ){
|
||||||
|
if( data[j] != missing ){
|
||||||
|
mat.row_data_.push_back( XGEntry(j, data[j]) );
|
||||||
|
++ nelem;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mat.row_ptr_.push_back( mat.row_ptr_.back() + nelem );
|
||||||
|
}
|
||||||
|
this->data.InitData();
|
||||||
|
this->init_col_ = true;
|
||||||
}
|
}
|
||||||
inline void SetLabel( const float *label, size_t len ){
|
inline void SetLabel( const float *label, size_t len ){
|
||||||
this->info.labels.resize( len );
|
this->info.labels.resize( len );
|
||||||
@ -163,6 +185,13 @@ extern "C"{
|
|||||||
size_t nelem ){
|
size_t nelem ){
|
||||||
static_cast<DMatrix*>(handle)->ParseCSR(indptr, indices, data, nindptr, nelem);
|
static_cast<DMatrix*>(handle)->ParseCSR(indptr, indices, data, nindptr, nelem);
|
||||||
}
|
}
|
||||||
|
void XGDMatrixParseMat( void *handle,
|
||||||
|
const float *data,
|
||||||
|
size_t nrow,
|
||||||
|
size_t ncol,
|
||||||
|
float missing ){
|
||||||
|
static_cast<DMatrix*>(handle)->ParseMat(data, nrow, ncol, missing);
|
||||||
|
}
|
||||||
void XGDMatrixSetLabel( void *handle, const float *label, size_t len ){
|
void XGDMatrixSetLabel( void *handle, const float *label, size_t len ){
|
||||||
static_cast<DMatrix*>(handle)->SetLabel(label,len);
|
static_cast<DMatrix*>(handle)->SetLabel(label,len);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -49,6 +49,19 @@ extern "C"{
|
|||||||
const float *data,
|
const float *data,
|
||||||
size_t nindptr,
|
size_t nindptr,
|
||||||
size_t nelem );
|
size_t nelem );
|
||||||
|
/*!
|
||||||
|
* \brief set matrix content from data content
|
||||||
|
* \param handle a instance of data matrix
|
||||||
|
* \param data pointer to the data space
|
||||||
|
* \param nrow number of rows
|
||||||
|
* \param ncol number columns
|
||||||
|
* \param missing which value to represent missing value
|
||||||
|
*/
|
||||||
|
void XGDMatrixParseMat( void *handle,
|
||||||
|
const float *data,
|
||||||
|
size_t nrow,
|
||||||
|
size_t ncol,
|
||||||
|
float missing );
|
||||||
/*!
|
/*!
|
||||||
* \brief set label of the training matrix
|
* \brief set label of the training matrix
|
||||||
* \param handle a instance of data matrix
|
* \param handle a instance of data matrix
|
||||||
@ -74,9 +87,16 @@ extern "C"{
|
|||||||
* \brief get label set from matrix
|
* \brief get label set from matrix
|
||||||
* \param handle a instance of data matrix
|
* \param handle a instance of data matrix
|
||||||
* \param len used to set result length
|
* \param len used to set result length
|
||||||
* \return pointer to the row
|
* \return pointer to the label
|
||||||
*/
|
*/
|
||||||
const float* XGDMatrixGetLabel( const void *handle, size_t* len );
|
const float* XGDMatrixGetLabel( const void *handle, size_t* len );
|
||||||
|
/*!
|
||||||
|
* \brief get weight set from matrix
|
||||||
|
* \param handle a instance of data matrix
|
||||||
|
* \param len used to set result length
|
||||||
|
* \return pointer to the weight
|
||||||
|
*/
|
||||||
|
const float* XGDMatrixGetWeight( const void *handle, size_t* len );
|
||||||
/*!
|
/*!
|
||||||
* \brief clear all the records, including feature matrix and label
|
* \brief clear all the records, including feature matrix and label
|
||||||
* \param handle a instance of data matrix
|
* \param handle a instance of data matrix
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user