add dump statistics

This commit is contained in:
tqchen 2014-12-28 17:45:37 -08:00
parent 0c7e090c19
commit 6b96737811
6 changed files with 24 additions and 12 deletions

View File

@ -10,6 +10,10 @@
#' See demo/ for walkthrough example in R, and #' See demo/ for walkthrough example in R, and
#' \url{https://github.com/tqchen/xgboost/blob/master/demo/data/featmap.txt} #' \url{https://github.com/tqchen/xgboost/blob/master/demo/data/featmap.txt}
#' for example Format. #' for example Format.
#' @param with.stats whether dump statistics of splits
#' When this option is on, the model dump comes with two additional statistics:
#' gain is the approximate loss function gain we get in each split;
#' cover is the sum of second order gradient in each node.
#' #'
#' @examples #' @examples
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
@ -21,13 +25,13 @@
#' xgb.dump(bst, 'xgb.model.dump') #' xgb.dump(bst, 'xgb.model.dump')
#' @export #' @export
#' #'
xgb.dump <- function(model, fname, fmap = "") { xgb.dump <- function(model, fname, fmap = "", with.stats=FALSE) {
if (class(model) != "xgb.Booster") { if (class(model) != "xgb.Booster") {
stop("xgb.dump: first argument must be type xgb.Booster") stop("xgb.dump: first argument must be type xgb.Booster")
} }
if (typeof(fname) != "character") { if (typeof(fname) != "character") {
stop("xgb.dump: second argument must be type character") stop("xgb.dump: second argument must be type character")
} }
.Call("XGBoosterDumpModel_R", model, fname, fmap, PACKAGE = "xgboost") .Call("XGBoosterDumpModel_R", model, fname, fmap, as.integer(with.stats), PACKAGE = "xgboost")
return(TRUE) return(TRUE)
} }

View File

@ -241,10 +241,10 @@ extern "C" {
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
vec_sptr.push_back(vec_names[i].c_str()); vec_sptr.push_back(vec_names[i].c_str());
} }
_WrapperEnd();
return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
asInteger(iter), asInteger(iter),
BeginPtr(vec_dmats), BeginPtr(vec_sptr), len)); BeginPtr(vec_dmats), BeginPtr(vec_sptr), len));
_WrapperEnd();
} }
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit) { SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit) {
_WrapperBegin(); _WrapperBegin();
@ -272,11 +272,13 @@ extern "C" {
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
_WrapperEnd(); _WrapperEnd();
} }
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) { void XGBoosterDumpModel_R(SEXP handle, SEXP fname,
SEXP fmap, SEXP with_stats) {
_WrapperBegin(); _WrapperBegin();
bst_ulong olen; bst_ulong olen;
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle), const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
CHAR(asChar(fmap)), CHAR(asChar(fmap)),
asInteger(with_stats),
&olen); &olen);
FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w"); FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w");
for (size_t i = 0; i < olen; ++i) { for (size_t i = 0; i < olen; ++i) {

View File

@ -132,7 +132,8 @@ extern "C" {
* \param handle handle * \param handle handle
* \param fname file name of model that can be dumped into * \param fname file name of model that can be dumped into
* \param fmap name to fmap can be empty string * \param fmap name to fmap can be empty string
* \param with_stats whether dump statistics of splits
*/ */
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap); void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap, SEXP with_stats);
} }
#endif // XGBOOST_WRAPPER_R_H_ #endif // XGBOOST_WRAPPER_R_H_

View File

@ -368,13 +368,15 @@ class Booster:
None None
""" """
xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname.encode('utf-8')) ) xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname.encode('utf-8')) )
def dump_model(self, fo, fmap=''): def dump_model(self, fo, fmap='', with_stats = False):
"""dump model into text file """dump model into text file
Args: Args:
fo: string fo: string
file name to be dumped file name to be dumped
fmap: string, optional fmap: string, optional
file name of feature map names file name of feature map names
with_stats: bool, optional
whether output statistics of the split
Returns: Returns:
None None
""" """
@ -383,16 +385,18 @@ class Booster:
need_close = True need_close = True
else: else:
need_close = False need_close = False
ret = self.get_dump(fmap) ret = self.get_dump(fmap, with_stats)
for i in range(len(ret)): for i in range(len(ret)):
fo.write('booster[%d]:\n' %i) fo.write('booster[%d]:\n' %i)
fo.write( ret[i] ) fo.write( ret[i] )
if need_close: if need_close:
fo.close() fo.close()
def get_dump(self, fmap=''): def get_dump(self, fmap='', with_stats=False):
"""get dump of model as list of strings """ """get dump of model as list of strings """
length = ctypes.c_ulong() length = ctypes.c_ulong()
sarr = xglib.XGBoosterDumpModel(self.handle, ctypes.c_char_p(fmap.encode('utf-8')), ctypes.byref(length)) sarr = xglib.XGBoosterDumpModel(self.handle,
ctypes.c_char_p(fmap.encode('utf-8')),
int(with_stats), ctypes.byref(length))
res = [] res = []
for i in range(length.value): for i in range(length.value):
res.append( str(sarr[i]) ) res.append( str(sarr[i]) )

View File

@ -293,11 +293,11 @@ extern "C"{
void XGBoosterSaveModel(const void *handle, const char *fname) { void XGBoosterSaveModel(const void *handle, const char *fname) {
static_cast<const Booster*>(handle)->SaveModel(fname); static_cast<const Booster*>(handle)->SaveModel(fname);
} }
const char** XGBoosterDumpModel(void *handle, const char *fmap, bst_ulong *len){ const char** XGBoosterDumpModel(void *handle, const char *fmap, int with_stats, bst_ulong *len){
utils::FeatMap featmap; utils::FeatMap featmap;
if (strlen(fmap) != 0) { if (strlen(fmap) != 0) {
featmap.LoadText(fmap); featmap.LoadText(fmap);
} }
return static_cast<Booster*>(handle)->GetModelDump(featmap, false, len); return static_cast<Booster*>(handle)->GetModelDump(featmap, with_stats != 0, len);
} }
} }

View File

@ -200,11 +200,12 @@ extern "C" {
* \brief dump model, return array of strings representing model dump * \brief dump model, return array of strings representing model dump
* \param handle handle * \param handle handle
* \param fmap name to fmap can be empty string * \param fmap name to fmap can be empty string
* \param with_stats whether to dump with statistics
* \param out_len length of output array * \param out_len length of output array
* \return char *data[], representing dump of each model * \return char *data[], representing dump of each model
*/ */
XGB_DLL const char **XGBoosterDumpModel(void *handle, const char *fmap, XGB_DLL const char **XGBoosterDumpModel(void *handle, const char *fmap,
bst_ulong *out_len); int with_stats, bst_ulong *out_len);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif