This commit is contained in:
tqchen 2015-04-16 17:03:18 -07:00
parent 22abf4e295
commit ddb7e538df
6 changed files with 11 additions and 50 deletions

View File

@ -76,17 +76,6 @@ extern "C" {
_WrapperEnd();
return ret;
}
SEXP XGDMatrixCreateCache_R(SEXP fname, SEXP cache_file, SEXP silent) {
_WrapperBegin();
void *handle = XGDMatrixCreateCache(CHAR(asChar(fname)),
CHAR(asChar(cache_file)),
asInteger(silent));
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
UNPROTECT(1);
_WrapperEnd();
return ret;
}
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
SEXP missing) {
_WrapperBegin();

View File

@ -24,15 +24,6 @@ extern "C" {
* \return a loaded data matrix
*/
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent);
/*!
* \brief load a cached DMatrix, this is backed by several cache_files
* and usually cost less memory
* \param fname the name of the file, can be a cached buffer or text
* \param cache_file the name of cached file
* \param silent whether print messages during loading
* \return a loaded data matrix
*/
SEXP XGDMatrixCreateCache_R(SEXP fname, SEXP cache_file, SEXP silent);
/*!
* \brief create matrix content from dense matrix
* This assumes the matrix is stored in column major format

View File

@ -16,6 +16,7 @@ DataMatrix* LoadDataMatrix(const char *fname,
bool loadsplit,
const char *cache_file) {
std::string fname_ = fname;
const char *dlm = strchr(fname, '#');
if (dlm != NULL) {
utils::Check(strchr(dlm + 1, '#') == NULL,
@ -51,6 +52,13 @@ DataMatrix* LoadDataMatrix(const char *fname,
dmat->CacheLoad(fname, silent, savebuffer);
return dmat;
} else {
std::string cache_fname = cache_file;
if (loadsplit) {
std::ostringstream os;
os << cache_file << ".r" << rabit::GetRank();
cache_fname = os.str();
cache_file = cache_fname.c_str();
}
FILE *fi = fopen64(cache_file, "rb");
if (fi != NULL) {
DMatrixPage *dmat = new DMatrixPage();

View File

@ -87,7 +87,7 @@ def c_array(ctype, values):
class DMatrix(object):
def __init__(self, data, label=None, missing=0.0, weight=None, cache_file=None):
def __init__(self, data, label=None, missing=0.0, weight=None):
"""
Data matrix used in XGBoost.
@ -102,24 +102,13 @@ class DMatrix(object):
Value in the data which needs to be present as a missing value.
weight : list or numpy 1-D array (optional)
Weight for each instance.
cache_file: string
Path to the binary cache of input data, when this is enabled,
several binary cache files with the prefix cache_file will be created,
xgboost will try to use external memory as much as possible,
thus save memory during computation in general
"""
# force into void_p, mac need to pass things in as void_p
if data is None:
self.handle = None
return
if cache_file is not None:
if not isinstance(data, string_types):
raise Exception('cache_file must be used together with input file name')
if not isinstance(cache_file, string_types):
raise Exception('cache_file must be string')
self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateCache(c_str(data), c_str(cache_file), 0))
elif isinstance(data, string_types):
if isinstance(data, string_types):
self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromFile(c_str(data), 0))
elif isinstance(data, scipy.sparse.csr_matrix):
self._init_from_csr(data)

View File

@ -114,11 +114,6 @@ extern "C"{
void* XGDMatrixCreateFromFile(const char *fname, int silent) {
return LoadDataMatrix(fname, silent != 0, false, false);
}
void* XGDMatrixCreateCache(const char *fname,
const char *cache_file,
int silent) {
return LoadDataMatrix(fname, silent != 0, false, false, cache_file);
}
void* XGDMatrixCreateFromCSR(const bst_ulong *indptr,
const unsigned *indices,
const float *data,

View File

@ -24,17 +24,6 @@ extern "C" {
* \return a loaded data matrix
*/
XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent);
/*!
* \brief load a cached DMatrix, this is backed by several cache_files
* and usually cost less memory
* \param fname the name of the file, can be a cached buffer or text
* \param cache_file the name of cached file
* \param silent whether print messages during loading
* \return a loaded data matrix
*/
XGB_DLL void* XGDMatrixCreateCache(const char *fname,
const char *cache_file,
int silent);
/*!
* \brief create a matrix content from csr format
* \param indptr pointer to row headers