add create from csc
This commit is contained in:
@@ -22,6 +22,7 @@ xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH)
|
||||
# DMatrix functions
|
||||
xglib.XGDMatrixCreateFromFile.restype = ctypes.c_void_p
|
||||
xglib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p
|
||||
xglib.XGDMatrixCreateFromCSC.restype = ctypes.c_void_p
|
||||
xglib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p
|
||||
xglib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p
|
||||
xglib.XGDMatrixGetFloatInfo.restype = ctypes.POINTER(ctypes.c_float)
|
||||
@@ -66,6 +67,8 @@ class DMatrix:
|
||||
xglib.XGDMatrixCreateFromFile(ctypes.c_char_p(data.encode('utf-8')), 0))
|
||||
elif isinstance(data, scp.csr_matrix):
|
||||
self.__init_from_csr(data)
|
||||
elif isinstance(data, scp.csc_matrix):
|
||||
self.__init_from_csc(data)
|
||||
elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
|
||||
self.__init_from_npy2d(data, missing)
|
||||
else:
|
||||
@@ -88,6 +91,15 @@ class DMatrix:
|
||||
(ctypes.c_float * len(csr.data))(*csr.data),
|
||||
len(csr.indptr), len(csr.data)))
|
||||
|
||||
def __init_from_csc(self, csc):
|
||||
"""convert data from csr matrix"""
|
||||
assert len(csc.indices) == len(csc.data)
|
||||
self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromCSC(
|
||||
(ctypes.c_ulong * len(csc.indptr))(*csc.indptr),
|
||||
(ctypes.c_uint * len(csc.indices))(*csc.indices),
|
||||
(ctypes.c_float * len(csc.data))(*csc.data),
|
||||
len(csc.indptr), len(csc.data)))
|
||||
|
||||
def __init_from_npy2d(self,mat,missing):
|
||||
"""convert data from numpy matrix"""
|
||||
data = numpy.array(mat.reshape(mat.size), dtype='float32')
|
||||
|
||||
@@ -14,6 +14,7 @@ using namespace std;
|
||||
#include "../src/learner/learner-inl.hpp"
|
||||
#include "../src/io/io.h"
|
||||
#include "../src/utils/utils.h"
|
||||
#include "../src/utils/matrix_csr.h"
|
||||
#include "../src/io/simple_dmatrix-inl.hpp"
|
||||
|
||||
using namespace xgboost;
|
||||
@@ -102,6 +103,31 @@ extern "C"{
|
||||
mat.info.info.num_row = nindptr - 1;
|
||||
return p_mat;
|
||||
}
|
||||
XGB_DLL void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
|
||||
const unsigned *indices,
|
||||
const float *data,
|
||||
bst_ulong nindptr,
|
||||
bst_ulong nelem) {
|
||||
DMatrixSimple *p_mat = new DMatrixSimple();
|
||||
DMatrixSimple &mat = *p_mat;
|
||||
utils::SparseCSRMBuilder<RowBatch::Entry, false> builder(mat.row_ptr_, mat.row_data_);
|
||||
builder.InitBudget();
|
||||
bst_ulong ncol = nindptr - 1;
|
||||
for (bst_ulong i = 0; i < ncol; ++i) {
|
||||
for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
|
||||
builder.AddBudget(indices[j]);
|
||||
}
|
||||
}
|
||||
builder.InitStorage();
|
||||
for (bst_ulong i = 0; i < ncol; ++i) {
|
||||
for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
|
||||
builder.PushElem(indices[j], RowBatch::Entry(static_cast<bst_uint>(i), data[j]));
|
||||
}
|
||||
}
|
||||
mat.info.info.num_row = mat.row_ptr_.size() - 1;
|
||||
mat.info.info.num_col = static_cast<size_t>(ncol);
|
||||
return p_mat;
|
||||
}
|
||||
void* XGDMatrixCreateFromMat(const float *data,
|
||||
bst_ulong nrow,
|
||||
bst_ulong ncol,
|
||||
|
||||
@@ -22,7 +22,7 @@ extern "C" {
|
||||
* \return a loaded data matrix
|
||||
*/
|
||||
XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent);
|
||||
/*!
|
||||
/*!
|
||||
* \brief create a matrix content from csr format
|
||||
* \param indptr pointer to row headers
|
||||
* \param indices findex
|
||||
@@ -36,6 +36,20 @@ extern "C" {
|
||||
const float *data,
|
||||
bst_ulong nindptr,
|
||||
bst_ulong nelem);
|
||||
/*!
|
||||
* \brief create a matrix content from CSC format
|
||||
* \param col_ptr pointer to col headers
|
||||
* \param indices findex
|
||||
* \param data fvalue
|
||||
* \param nindptr number of rows in the matix + 1
|
||||
* \param nelem number of nonzero elements in the matrix
|
||||
* \return created dmatrix
|
||||
*/
|
||||
XGB_DLL void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
|
||||
const unsigned *indices,
|
||||
const float *data,
|
||||
bst_ulong nindptr,
|
||||
bst_ulong nelem);
|
||||
/*!
|
||||
* \brief create matrix content from dense matrix
|
||||
* \param data pointer to the data space
|
||||
|
||||
Reference in New Issue
Block a user