OK
This commit is contained in:
parent
22abf4e295
commit
ddb7e538df
@ -76,17 +76,6 @@ extern "C" {
|
|||||||
_WrapperEnd();
|
_WrapperEnd();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
SEXP XGDMatrixCreateCache_R(SEXP fname, SEXP cache_file, SEXP silent) {
|
|
||||||
_WrapperBegin();
|
|
||||||
void *handle = XGDMatrixCreateCache(CHAR(asChar(fname)),
|
|
||||||
CHAR(asChar(cache_file)),
|
|
||||||
asInteger(silent));
|
|
||||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
|
||||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
|
||||||
UNPROTECT(1);
|
|
||||||
_WrapperEnd();
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
|
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
|
||||||
SEXP missing) {
|
SEXP missing) {
|
||||||
_WrapperBegin();
|
_WrapperBegin();
|
||||||
|
|||||||
@ -24,15 +24,6 @@ extern "C" {
|
|||||||
* \return a loaded data matrix
|
* \return a loaded data matrix
|
||||||
*/
|
*/
|
||||||
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent);
|
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent);
|
||||||
/*!
|
|
||||||
* \brief load a cached DMatrix, this is backed by several cache_files
|
|
||||||
* and usually cost less memory
|
|
||||||
* \param fname the name of the file, can be a cached buffer or text
|
|
||||||
* \param cache_file the name of cached file
|
|
||||||
* \param silent whether print messages during loading
|
|
||||||
* \return a loaded data matrix
|
|
||||||
*/
|
|
||||||
SEXP XGDMatrixCreateCache_R(SEXP fname, SEXP cache_file, SEXP silent);
|
|
||||||
/*!
|
/*!
|
||||||
* \brief create matrix content from dense matrix
|
* \brief create matrix content from dense matrix
|
||||||
* This assumes the matrix is stored in column major format
|
* This assumes the matrix is stored in column major format
|
||||||
|
|||||||
@ -16,6 +16,7 @@ DataMatrix* LoadDataMatrix(const char *fname,
|
|||||||
bool loadsplit,
|
bool loadsplit,
|
||||||
const char *cache_file) {
|
const char *cache_file) {
|
||||||
std::string fname_ = fname;
|
std::string fname_ = fname;
|
||||||
|
|
||||||
const char *dlm = strchr(fname, '#');
|
const char *dlm = strchr(fname, '#');
|
||||||
if (dlm != NULL) {
|
if (dlm != NULL) {
|
||||||
utils::Check(strchr(dlm + 1, '#') == NULL,
|
utils::Check(strchr(dlm + 1, '#') == NULL,
|
||||||
@ -51,6 +52,13 @@ DataMatrix* LoadDataMatrix(const char *fname,
|
|||||||
dmat->CacheLoad(fname, silent, savebuffer);
|
dmat->CacheLoad(fname, silent, savebuffer);
|
||||||
return dmat;
|
return dmat;
|
||||||
} else {
|
} else {
|
||||||
|
std::string cache_fname = cache_file;
|
||||||
|
if (loadsplit) {
|
||||||
|
std::ostringstream os;
|
||||||
|
os << cache_file << ".r" << rabit::GetRank();
|
||||||
|
cache_fname = os.str();
|
||||||
|
cache_file = cache_fname.c_str();
|
||||||
|
}
|
||||||
FILE *fi = fopen64(cache_file, "rb");
|
FILE *fi = fopen64(cache_file, "rb");
|
||||||
if (fi != NULL) {
|
if (fi != NULL) {
|
||||||
DMatrixPage *dmat = new DMatrixPage();
|
DMatrixPage *dmat = new DMatrixPage();
|
||||||
|
|||||||
@ -87,7 +87,7 @@ def c_array(ctype, values):
|
|||||||
|
|
||||||
|
|
||||||
class DMatrix(object):
|
class DMatrix(object):
|
||||||
def __init__(self, data, label=None, missing=0.0, weight=None, cache_file=None):
|
def __init__(self, data, label=None, missing=0.0, weight=None):
|
||||||
"""
|
"""
|
||||||
Data matrix used in XGBoost.
|
Data matrix used in XGBoost.
|
||||||
|
|
||||||
@ -102,24 +102,13 @@ class DMatrix(object):
|
|||||||
Value in the data which needs to be present as a missing value.
|
Value in the data which needs to be present as a missing value.
|
||||||
weight : list or numpy 1-D array (optional)
|
weight : list or numpy 1-D array (optional)
|
||||||
Weight for each instance.
|
Weight for each instance.
|
||||||
cache_file: string
|
|
||||||
Path to the binary cache of input data, when this is enabled,
|
|
||||||
several binary cache files with the prefix cache_file will be created,
|
|
||||||
xgboost will try to use external memory as much as possible,
|
|
||||||
thus save memory during computation in general
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# force into void_p, mac need to pass things in as void_p
|
# force into void_p, mac need to pass things in as void_p
|
||||||
if data is None:
|
if data is None:
|
||||||
self.handle = None
|
self.handle = None
|
||||||
return
|
return
|
||||||
if cache_file is not None:
|
if isinstance(data, string_types):
|
||||||
if not isinstance(data, string_types):
|
|
||||||
raise Exception('cache_file must be used together with input file name')
|
|
||||||
if not isinstance(cache_file, string_types):
|
|
||||||
raise Exception('cache_file must be string')
|
|
||||||
self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateCache(c_str(data), c_str(cache_file), 0))
|
|
||||||
elif isinstance(data, string_types):
|
|
||||||
self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromFile(c_str(data), 0))
|
self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromFile(c_str(data), 0))
|
||||||
elif isinstance(data, scipy.sparse.csr_matrix):
|
elif isinstance(data, scipy.sparse.csr_matrix):
|
||||||
self._init_from_csr(data)
|
self._init_from_csr(data)
|
||||||
|
|||||||
@ -114,11 +114,6 @@ extern "C"{
|
|||||||
void* XGDMatrixCreateFromFile(const char *fname, int silent) {
|
void* XGDMatrixCreateFromFile(const char *fname, int silent) {
|
||||||
return LoadDataMatrix(fname, silent != 0, false, false);
|
return LoadDataMatrix(fname, silent != 0, false, false);
|
||||||
}
|
}
|
||||||
void* XGDMatrixCreateCache(const char *fname,
|
|
||||||
const char *cache_file,
|
|
||||||
int silent) {
|
|
||||||
return LoadDataMatrix(fname, silent != 0, false, false, cache_file);
|
|
||||||
}
|
|
||||||
void* XGDMatrixCreateFromCSR(const bst_ulong *indptr,
|
void* XGDMatrixCreateFromCSR(const bst_ulong *indptr,
|
||||||
const unsigned *indices,
|
const unsigned *indices,
|
||||||
const float *data,
|
const float *data,
|
||||||
|
|||||||
@ -24,17 +24,6 @@ extern "C" {
|
|||||||
* \return a loaded data matrix
|
* \return a loaded data matrix
|
||||||
*/
|
*/
|
||||||
XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent);
|
XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent);
|
||||||
/*!
|
|
||||||
* \brief load a cached DMatrix, this is backed by several cache_files
|
|
||||||
* and usually cost less memory
|
|
||||||
* \param fname the name of the file, can be a cached buffer or text
|
|
||||||
* \param cache_file the name of cached file
|
|
||||||
* \param silent whether print messages during loading
|
|
||||||
* \return a loaded data matrix
|
|
||||||
*/
|
|
||||||
XGB_DLL void* XGDMatrixCreateCache(const char *fname,
|
|
||||||
const char *cache_file,
|
|
||||||
int silent);
|
|
||||||
/*!
|
/*!
|
||||||
* \brief create a matrix content from csr format
|
* \brief create a matrix content from csr format
|
||||||
* \param indptr pointer to row headers
|
* \param indptr pointer to row headers
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user