From ddb7e538dfa2edef6ea24637f9028dc2e64822f3 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 16 Apr 2015 17:03:18 -0700 Subject: [PATCH] OK --- R-package/src/xgboost_R.cpp | 11 ----------- R-package/src/xgboost_R.h | 9 --------- src/io/io.cpp | 10 +++++++++- wrapper/xgboost.py | 15 ++------------- wrapper/xgboost_wrapper.cpp | 5 ----- wrapper/xgboost_wrapper.h | 11 ----------- 6 files changed, 11 insertions(+), 50 deletions(-) diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index f67462564..a2ca9536f 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -76,17 +76,6 @@ extern "C" { _WrapperEnd(); return ret; } - SEXP XGDMatrixCreateCache_R(SEXP fname, SEXP cache_file, SEXP silent) { - _WrapperBegin(); - void *handle = XGDMatrixCreateCache(CHAR(asChar(fname)), - CHAR(asChar(cache_file)), - asInteger(silent)); - SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); - R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); - UNPROTECT(1); - _WrapperEnd(); - return ret; - } SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing) { _WrapperBegin(); diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h index 1314cef15..61b84a80e 100644 --- a/R-package/src/xgboost_R.h +++ b/R-package/src/xgboost_R.h @@ -24,15 +24,6 @@ extern "C" { * \return a loaded data matrix */ SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent); - /*! - * \brief load a cached DMatrix, this is backed by several cache_files - * and usually cost less memory - * \param fname the name of the file, can be a cached buffer or text - * \param cache_file the name of cached file - * \param silent whether print messages during loading - * \return a loaded data matrix - */ - SEXP XGDMatrixCreateCache_R(SEXP fname, SEXP cache_file, SEXP silent); /*! * \brief create matrix content from dense matrix * This assumes the matrix is stored in column major format diff --git a/src/io/io.cpp b/src/io/io.cpp index 81ccf9489..967abf15b 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -16,6 +16,7 @@ DataMatrix* LoadDataMatrix(const char *fname, bool loadsplit, const char *cache_file) { std::string fname_ = fname; + const char *dlm = strchr(fname, '#'); if (dlm != NULL) { utils::Check(strchr(dlm + 1, '#') == NULL, @@ -26,7 +27,7 @@ DataMatrix* LoadDataMatrix(const char *fname, fname = fname_.c_str(); cache_file = dlm +1; } - + if (cache_file == NULL) { if (!std::strcmp(fname, "stdin") || !std::strncmp(fname, "s3://", 5) || @@ -51,6 +52,13 @@ DataMatrix* LoadDataMatrix(const char *fname, dmat->CacheLoad(fname, silent, savebuffer); return dmat; } else { + std::string cache_fname = cache_file; + if (loadsplit) { + std::ostringstream os; + os << cache_file << ".r" << rabit::GetRank(); + cache_fname = os.str(); + cache_file = cache_fname.c_str(); + } FILE *fi = fopen64(cache_file, "rb"); if (fi != NULL) { DMatrixPage *dmat = new DMatrixPage(); diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py index bfab05deb..5bb6377c5 100644 --- a/wrapper/xgboost.py +++ b/wrapper/xgboost.py @@ -87,7 +87,7 @@ def c_array(ctype, values): class DMatrix(object): - def __init__(self, data, label=None, missing=0.0, weight=None, cache_file=None): + def __init__(self, data, label=None, missing=0.0, weight=None): """ Data matrix used in XGBoost. @@ -102,24 +102,13 @@ class DMatrix(object): Value in the data which needs to be present as a missing value. weight : list or numpy 1-D array (optional) Weight for each instance. - cache_file: string - Path to the binary cache of input data, when this is enabled, - several binary cache files with the prefix cache_file will be created, - xgboost will try to use external memory as much as possible, - thus save memory during computation in general """ # force into void_p, mac need to pass things in as void_p if data is None: self.handle = None return - if cache_file is not None: - if not isinstance(data, string_types): - raise Exception('cache_file must be used together with input file name') - if not isinstance(cache_file, string_types): - raise Exception('cache_file must be string') - self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateCache(c_str(data), c_str(cache_file), 0)) - elif isinstance(data, string_types): + if isinstance(data, string_types): self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromFile(c_str(data), 0)) elif isinstance(data, scipy.sparse.csr_matrix): self._init_from_csr(data) diff --git a/wrapper/xgboost_wrapper.cpp b/wrapper/xgboost_wrapper.cpp index 45fc05082..dec266ff6 100644 --- a/wrapper/xgboost_wrapper.cpp +++ b/wrapper/xgboost_wrapper.cpp @@ -114,11 +114,6 @@ extern "C"{ void* XGDMatrixCreateFromFile(const char *fname, int silent) { return LoadDataMatrix(fname, silent != 0, false, false); } - void* XGDMatrixCreateCache(const char *fname, - const char *cache_file, - int silent) { - return LoadDataMatrix(fname, silent != 0, false, false, cache_file); - } void* XGDMatrixCreateFromCSR(const bst_ulong *indptr, const unsigned *indices, const float *data, diff --git a/wrapper/xgboost_wrapper.h b/wrapper/xgboost_wrapper.h index 66d1dfbc0..d51eb284f 100644 --- a/wrapper/xgboost_wrapper.h +++ b/wrapper/xgboost_wrapper.h @@ -24,17 +24,6 @@ extern "C" { * \return a loaded data matrix */ XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent); - /*! - * \brief load a cached DMatrix, this is backed by several cache_files - * and usually cost less memory - * \param fname the name of the file, can be a cached buffer or text - * \param cache_file the name of cached file - * \param silent whether print messages during loading - * \return a loaded data matrix - */ - XGB_DLL void* XGDMatrixCreateCache(const char *fname, - const char *cache_file, - int silent); /*! * \brief create a matrix content from csr format * \param indptr pointer to row headers