This commit is contained in:
tqchen
2015-04-16 17:03:18 -07:00
parent 22abf4e295
commit ddb7e538df
6 changed files with 11 additions and 50 deletions

View File

@@ -87,7 +87,7 @@ def c_array(ctype, values):
class DMatrix(object):
def __init__(self, data, label=None, missing=0.0, weight=None, cache_file=None):
def __init__(self, data, label=None, missing=0.0, weight=None):
"""
Data matrix used in XGBoost.
@@ -102,24 +102,13 @@ class DMatrix(object):
Value in the data which needs to be present as a missing value.
weight : list or numpy 1-D array (optional)
Weight for each instance.
cache_file: string
Path to the binary cache of input data, when this is enabled,
several binary cache files with the prefix cache_file will be created,
xgboost will try to use external memory as much as possible,
thus save memory during computation in general
"""
# force into void_p, mac need to pass things in as void_p
if data is None:
self.handle = None
return
if cache_file is not None:
if not isinstance(data, string_types):
raise Exception('cache_file must be used together with input file name')
if not isinstance(cache_file, string_types):
raise Exception('cache_file must be string')
self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateCache(c_str(data), c_str(cache_file), 0))
elif isinstance(data, string_types):
if isinstance(data, string_types):
self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromFile(c_str(data), 0))
elif isinstance(data, scipy.sparse.csr_matrix):
self._init_from_csr(data)

View File

@@ -114,11 +114,6 @@ extern "C"{
void* XGDMatrixCreateFromFile(const char *fname, int silent) {
return LoadDataMatrix(fname, silent != 0, false, false);
}
void* XGDMatrixCreateCache(const char *fname,
const char *cache_file,
int silent) {
return LoadDataMatrix(fname, silent != 0, false, false, cache_file);
}
void* XGDMatrixCreateFromCSR(const bst_ulong *indptr,
const unsigned *indices,
const float *data,

View File

@@ -24,17 +24,6 @@ extern "C" {
* \return a loaded data matrix
*/
XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent);
/*!
* \brief load a cached DMatrix, this is backed by several cache_files
* and usually cost less memory
* \param fname the name of the file, can be a cached buffer or text
* \param cache_file the name of cached file
* \param silent whether print messages during loading
* \return a loaded data matrix
*/
XGB_DLL void* XGDMatrixCreateCache(const char *fname,
const char *cache_file,
int silent);
/*!
* \brief create a matrix content from csr format
* \param indptr pointer to row headers