OK
This commit is contained in:
parent
22abf4e295
commit
ddb7e538df
@ -76,17 +76,6 @@ extern "C" {
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
SEXP XGDMatrixCreateCache_R(SEXP fname, SEXP cache_file, SEXP silent) {
|
||||
_WrapperBegin();
|
||||
void *handle = XGDMatrixCreateCache(CHAR(asChar(fname)),
|
||||
CHAR(asChar(cache_file)),
|
||||
asInteger(silent));
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||
UNPROTECT(1);
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
|
||||
SEXP missing) {
|
||||
_WrapperBegin();
|
||||
|
||||
@ -24,15 +24,6 @@ extern "C" {
|
||||
* \return a loaded data matrix
|
||||
*/
|
||||
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent);
|
||||
/*!
|
||||
* \brief load a cached DMatrix, this is backed by several cache_files
|
||||
* and usually cost less memory
|
||||
* \param fname the name of the file, can be a cached buffer or text
|
||||
* \param cache_file the name of cached file
|
||||
* \param silent whether print messages during loading
|
||||
* \return a loaded data matrix
|
||||
*/
|
||||
SEXP XGDMatrixCreateCache_R(SEXP fname, SEXP cache_file, SEXP silent);
|
||||
/*!
|
||||
* \brief create matrix content from dense matrix
|
||||
* This assumes the matrix is stored in column major format
|
||||
|
||||
@ -16,6 +16,7 @@ DataMatrix* LoadDataMatrix(const char *fname,
|
||||
bool loadsplit,
|
||||
const char *cache_file) {
|
||||
std::string fname_ = fname;
|
||||
|
||||
const char *dlm = strchr(fname, '#');
|
||||
if (dlm != NULL) {
|
||||
utils::Check(strchr(dlm + 1, '#') == NULL,
|
||||
@ -26,7 +27,7 @@ DataMatrix* LoadDataMatrix(const char *fname,
|
||||
fname = fname_.c_str();
|
||||
cache_file = dlm +1;
|
||||
}
|
||||
|
||||
|
||||
if (cache_file == NULL) {
|
||||
if (!std::strcmp(fname, "stdin") ||
|
||||
!std::strncmp(fname, "s3://", 5) ||
|
||||
@ -51,6 +52,13 @@ DataMatrix* LoadDataMatrix(const char *fname,
|
||||
dmat->CacheLoad(fname, silent, savebuffer);
|
||||
return dmat;
|
||||
} else {
|
||||
std::string cache_fname = cache_file;
|
||||
if (loadsplit) {
|
||||
std::ostringstream os;
|
||||
os << cache_file << ".r" << rabit::GetRank();
|
||||
cache_fname = os.str();
|
||||
cache_file = cache_fname.c_str();
|
||||
}
|
||||
FILE *fi = fopen64(cache_file, "rb");
|
||||
if (fi != NULL) {
|
||||
DMatrixPage *dmat = new DMatrixPage();
|
||||
|
||||
@ -87,7 +87,7 @@ def c_array(ctype, values):
|
||||
|
||||
|
||||
class DMatrix(object):
|
||||
def __init__(self, data, label=None, missing=0.0, weight=None, cache_file=None):
|
||||
def __init__(self, data, label=None, missing=0.0, weight=None):
|
||||
"""
|
||||
Data matrix used in XGBoost.
|
||||
|
||||
@ -102,24 +102,13 @@ class DMatrix(object):
|
||||
Value in the data which needs to be present as a missing value.
|
||||
weight : list or numpy 1-D array (optional)
|
||||
Weight for each instance.
|
||||
cache_file: string
|
||||
Path to the binary cache of input data, when this is enabled,
|
||||
several binary cache files with the prefix cache_file will be created,
|
||||
xgboost will try to use external memory as much as possible,
|
||||
thus save memory during computation in general
|
||||
"""
|
||||
|
||||
# force into void_p, mac need to pass things in as void_p
|
||||
if data is None:
|
||||
self.handle = None
|
||||
return
|
||||
if cache_file is not None:
|
||||
if not isinstance(data, string_types):
|
||||
raise Exception('cache_file must be used together with input file name')
|
||||
if not isinstance(cache_file, string_types):
|
||||
raise Exception('cache_file must be string')
|
||||
self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateCache(c_str(data), c_str(cache_file), 0))
|
||||
elif isinstance(data, string_types):
|
||||
if isinstance(data, string_types):
|
||||
self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromFile(c_str(data), 0))
|
||||
elif isinstance(data, scipy.sparse.csr_matrix):
|
||||
self._init_from_csr(data)
|
||||
|
||||
@ -114,11 +114,6 @@ extern "C"{
|
||||
void* XGDMatrixCreateFromFile(const char *fname, int silent) {
|
||||
return LoadDataMatrix(fname, silent != 0, false, false);
|
||||
}
|
||||
void* XGDMatrixCreateCache(const char *fname,
|
||||
const char *cache_file,
|
||||
int silent) {
|
||||
return LoadDataMatrix(fname, silent != 0, false, false, cache_file);
|
||||
}
|
||||
void* XGDMatrixCreateFromCSR(const bst_ulong *indptr,
|
||||
const unsigned *indices,
|
||||
const float *data,
|
||||
|
||||
@ -24,17 +24,6 @@ extern "C" {
|
||||
* \return a loaded data matrix
|
||||
*/
|
||||
XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent);
|
||||
/*!
|
||||
* \brief load a cached DMatrix, this is backed by several cache_files
|
||||
* and usually cost less memory
|
||||
* \param fname the name of the file, can be a cached buffer or text
|
||||
* \param cache_file the name of cached file
|
||||
* \param silent whether print messages during loading
|
||||
* \return a loaded data matrix
|
||||
*/
|
||||
XGB_DLL void* XGDMatrixCreateCache(const char *fname,
|
||||
const char *cache_file,
|
||||
int silent);
|
||||
/*!
|
||||
* \brief create a matrix content from csr format
|
||||
* \param indptr pointer to row headers
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user