From a06f01e8ec3da501f86ba212bf19b3c516b20aad Mon Sep 17 00:00:00 2001 From: hetong Date: Sat, 30 Aug 2014 15:14:36 -0700 Subject: [PATCH 01/64] improve document format --- R-package/R/getinfo.xgb.DMatrix.R | 9 ++++++--- R-package/R/slice.xgb.DMatrix.R | 11 +++++++---- R-package/R/xgb.DMatrix.save.R | 2 +- R-package/man/getinfo.Rd | 6 ++++++ R-package/man/slice.Rd | 6 ++++++ R-package/man/xgb.DMatrix.save.Rd | 2 +- 6 files changed, 27 insertions(+), 9 deletions(-) diff --git a/R-package/R/getinfo.xgb.DMatrix.R b/R-package/R/getinfo.xgb.DMatrix.R index 5b438049c..3a79fd2fb 100644 --- a/R-package/R/getinfo.xgb.DMatrix.R +++ b/R-package/R/getinfo.xgb.DMatrix.R @@ -4,20 +4,23 @@ setClass('xgb.DMatrix') #' #' Get information of an xgb.DMatrix object #' -#' @param object Object of class "xgb.DMatrix" -#' @param name the name of the field to get -#' #' @examples #' data(iris) #' iris[,5] <- as.numeric(iris[,5]) #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) #' labels <- getinfo(dtrain, "label") +#' @rdname getinfo #' @export #' getinfo <- function(object, ...){ UseMethod("getinfo") } +#' @param object Object of class "xgb.DMatrix" +#' @param name the name of the field to get +#' @param ... other parameters +#' @rdname getinfo +#' @method getinfo xgb.DMatrix setMethod("getinfo", signature = "xgb.DMatrix", definition = function(object, name) { if (typeof(name) != "character") { diff --git a/R-package/R/slice.xgb.DMatrix.R b/R-package/R/slice.xgb.DMatrix.R index 0c56829fa..8a93efc4d 100644 --- a/R-package/R/slice.xgb.DMatrix.R +++ b/R-package/R/slice.xgb.DMatrix.R @@ -6,22 +6,25 @@ setClass('xgb.DMatrix') #' Get a new DMatrix containing the specified rows of #' orginal xgb.DMatrix object #' -#' @param object Object of class "xgb.DMatrix" -#' @param idxset a integer vector of indices of rows needed -#' #' @examples #' data(iris) #' iris[,5] <- as.numeric(iris[,5]) #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) #' dsub <- slice(dtrain, 1:3) +#' @rdname slice #' @export #' slice <- function(object, ...){ UseMethod("slice") } +#' @param object Object of class "xgb.DMatrix" +#' @param idxset a integer vector of indices of rows needed +#' @param ... other parameters +#' @rdname slice +#' @method slice xgb.DMatrix setMethod("slice", signature = "xgb.DMatrix", - definition = function(object, idxset) { + definition = function(object, idxset, ...) { if (class(object) != "xgb.DMatrix") { stop("slice: first argument dtrain must be xgb.DMatrix") } diff --git a/R-package/R/xgb.DMatrix.save.R b/R-package/R/xgb.DMatrix.save.R index b108c2dad..4fcb71301 100644 --- a/R-package/R/xgb.DMatrix.save.R +++ b/R-package/R/xgb.DMatrix.save.R @@ -2,7 +2,7 @@ #' #' Save xgb.DMatrix object to binary file #' -#' @param model the model object. +#' @param DMatrix the model object. #' @param fname the name of the binary file. #' #' @examples diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd index 4f63b5e92..05a25c152 100644 --- a/R-package/man/getinfo.Rd +++ b/R-package/man/getinfo.Rd @@ -1,14 +1,20 @@ % Generated by roxygen2 (4.0.1): do not edit by hand +\docType{methods} \name{getinfo} \alias{getinfo} +\alias{getinfo,xgb.DMatrix-method} \title{Get information of an xgb.DMatrix object} \usage{ getinfo(object, ...) + +\S4method{getinfo}{xgb.DMatrix}(object, name) } \arguments{ \item{object}{Object of class "xgb.DMatrix"} \item{name}{the name of the field to get} + +\item{...}{other parameters} } \description{ Get information of an xgb.DMatrix object diff --git a/R-package/man/slice.Rd b/R-package/man/slice.Rd index 06d79f6c4..7acb14a32 100644 --- a/R-package/man/slice.Rd +++ b/R-package/man/slice.Rd @@ -1,15 +1,21 @@ % Generated by roxygen2 (4.0.1): do not edit by hand +\docType{methods} \name{slice} \alias{slice} +\alias{slice,xgb.DMatrix-method} \title{Get a new DMatrix containing the specified rows of orginal xgb.DMatrix object} \usage{ slice(object, ...) + +\S4method{slice}{xgb.DMatrix}(object, idxset, ...) } \arguments{ \item{object}{Object of class "xgb.DMatrix"} \item{idxset}{a integer vector of indices of rows needed} + +\item{...}{other parameters} } \description{ Get a new DMatrix containing the specified rows of diff --git a/R-package/man/xgb.DMatrix.save.Rd b/R-package/man/xgb.DMatrix.save.Rd index d4932fa42..e5e70501d 100644 --- a/R-package/man/xgb.DMatrix.save.Rd +++ b/R-package/man/xgb.DMatrix.save.Rd @@ -6,7 +6,7 @@ xgb.DMatrix.save(DMatrix, fname) } \arguments{ -\item{model}{the model object.} +\item{DMatrix}{the model object.} \item{fname}{the name of the binary file.} } From 1abdcaa11d4a266eb851fad6b40c8028dd13d42f Mon Sep 17 00:00:00 2001 From: hetong Date: Sat, 30 Aug 2014 15:17:17 -0700 Subject: [PATCH 02/64] eliminate warnings and notes from R CMD check --- R-package/DESCRIPTION | 4 ++-- R-package/demo/00Index | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 7d60143bd..bf9227c7c 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -12,5 +12,5 @@ BugReports: https://github.com/tqchen/xgboost/issues Depends: R (>= 2.0.2) Imports: - Matrix (>= 1.1-0), - methods + Matrix (>= 1.1-0) + diff --git a/R-package/demo/00Index b/R-package/demo/00Index index 2ca4abd32..9e5dddf29 100644 --- a/R-package/demo/00Index +++ b/R-package/demo/00Index @@ -1 +1 @@ -demo R code for xgboost usages on agaricus data +demo R code for xgboost usages on agaricus data From 3e854194284556e850e8418a86142c90df3830d1 Mon Sep 17 00:00:00 2001 From: hetong Date: Sat, 30 Aug 2014 15:34:36 -0700 Subject: [PATCH 03/64] add back import of methdos --- R-package/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index bf9227c7c..8897a989b 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -12,5 +12,5 @@ BugReports: https://github.com/tqchen/xgboost/issues Depends: R (>= 2.0.2) Imports: - Matrix (>= 1.1-0) + Matrix (>= 1.1-0), methods From 9b618acba2f9ad002cafba6e8e28926e98f289de Mon Sep 17 00:00:00 2001 From: hetong Date: Sat, 30 Aug 2014 15:42:57 -0700 Subject: [PATCH 04/64] add import methods in NAMESPACE --- R-package/DESCRIPTION | 4 ++-- R-package/NAMESPACE | 1 + R-package/R/utils.R | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 8897a989b..7d60143bd 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -12,5 +12,5 @@ BugReports: https://github.com/tqchen/xgboost/issues Depends: R (>= 2.0.2) Imports: - Matrix (>= 1.1-0), methods - + Matrix (>= 1.1-0), + methods diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 3fc74663e..4a7cb9465 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -10,5 +10,6 @@ export(xgb.save) export(xgb.train) export(xgboost) exportMethods(predict) +import(methods) importClassesFrom(Matrix,dgCMatrix) importClassesFrom(Matrix,dgeMatrix) diff --git a/R-package/R/utils.R b/R-package/R/utils.R index b3fb39748..1ae2dbe71 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -1,4 +1,5 @@ #' @importClassesFrom Matrix dgCMatrix dgeMatrix +#' @import methods # depends on matrix .onLoad <- function(libname, pkgname) { From 257c864274c08bf481c6e48758bafb29da64a3a8 Mon Sep 17 00:00:00 2001 From: hetong Date: Sat, 30 Aug 2014 16:26:26 -0700 Subject: [PATCH 05/64] remove pdf file From 9739a1c8066b04e1f599d3670b22a15abcd0da8b Mon Sep 17 00:00:00 2001 From: Tong He Date: Sat, 30 Aug 2014 18:17:20 -0700 Subject: [PATCH 06/64] Update DESCRIPTION --- R-package/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 7d60143bd..cae315aeb 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -4,7 +4,7 @@ Title: R wrapper of xgboost Version: 0.3-0 Date: 2014-08-23 Author: Tianqi Chen, Tong He -Maintainer: Tianqi Chen , Tong He +Maintainer: Tong He Description: xgboost License: file LICENSE URL: https://github.com/tqchen/xgboost From 9c0389981a554714e687890ab33fd2048dd6b01e Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 30 Aug 2014 18:49:30 -0700 Subject: [PATCH 07/64] fix print problem, fix Tong's email format --- R-package/DESCRIPTION | 2 +- R-package/src/Makevars | 4 ++-- R-package/src/Makevars.win | 5 ++--- R-package/src/xgboost_R.cpp | 3 +++ src/io/simple_dmatrix-inl.hpp | 34 +++++++++++++++++----------------- src/learner/dmatrix.h | 6 +++--- src/learner/evaluation-inl.hpp | 2 +- src/learner/learner-inl.hpp | 2 +- src/tree/updater_prune-inl.hpp | 4 ++-- src/utils/utils.h | 30 ++++++++++++++++++++++-------- 10 files changed, 54 insertions(+), 38 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index cae315aeb..9e9f0be23 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -4,7 +4,7 @@ Title: R wrapper of xgboost Version: 0.3-0 Date: 2014-08-23 Author: Tianqi Chen, Tong He -Maintainer: Tong He +Maintainer: Tong He Description: xgboost License: file LICENSE URL: https://github.com/tqchen/xgboost diff --git a/R-package/src/Makevars b/R-package/src/Makevars index 969571290..8569a802e 100644 --- a/R-package/src/Makevars +++ b/R-package/src/Makevars @@ -4,8 +4,8 @@ PKGROOT=../../ CXX=`R CMD config CXX` TCFLAGS=`R CMD config CFLAGS` # expose these flags to R CMD SHLIB -PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) -XGBFLAG= $(TCFLAGS) -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) +PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) +XGBFLAG= $(TCFLAGS) -DXGBOOST_CUSTOMIZE_MSG_ -fPIC $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) ifeq ($(no_omp),1) diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index d06076def..391c9b6d9 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -4,8 +4,8 @@ PKGROOT=../../ CXX=`Rcmd config CXX` TCFLAGS=`Rcmd config CFLAGS` # expose these flags to R CMD SHLIB -PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) -XGBFLAG= -O3 -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) +PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) +XGBFLAG= -O3 -DXGBOOST_CUSTOMIZE_MSG_ -fPIC $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) ifeq ($(no_omp),1) @@ -29,4 +29,3 @@ $(CXXOBJ) : clean: rm -rf *.so *.o *~ *.dll - \ No newline at end of file diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index b358ef4ae..20ff69854 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -18,6 +18,9 @@ void HandleAssertError(const char *msg) { void HandleCheckError(const char *msg) { error("%s", msg); } +void HandlePrint(const char *msg) { + Rprintf("%s", msg); +} } // namespace utils } // namespace xgboost diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp index df8bd9fee..36c6c8fd7 100644 --- a/src/io/simple_dmatrix-inl.hpp +++ b/src/io/simple_dmatrix-inl.hpp @@ -104,10 +104,10 @@ class DMatrixSimple : public DataMatrix { this->AddRow(feats); if (!silent) { - printf("%lux%lu matrix with %lu entries is loaded from %s\n", - static_cast(info.num_row()), - static_cast(info.num_col()), - static_cast(row_data_.size()), fname); + utils::Printf("%lux%lu matrix with %lu entries is loaded from %s\n", + static_cast(info.num_row()), + static_cast(info.num_col()), + static_cast(row_data_.size()), fname); } fclose(file); // try to load in additional file @@ -156,17 +156,17 @@ class DMatrixSimple : public DataMatrix { fmat_->LoadColAccess(fs); if (!silent) { - printf("%lux%lu matrix with %lu entries is loaded", - static_cast(info.num_row()), - static_cast(info.num_col()), - static_cast(row_data_.size())); + utils::Printf("%lux%lu matrix with %lu entries is loaded", + static_cast(info.num_row()), + static_cast(info.num_col()), + static_cast(row_data_.size())); if (fname != NULL) { - printf(" from %s\n", fname); + utils::Printf(" from %s\n", fname); } else { - printf("\n"); + utils::Printf("\n"); } if (info.group_ptr.size() != 0) { - printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1); + utils::Printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1); } } } @@ -186,13 +186,13 @@ class DMatrixSimple : public DataMatrix { fs.Close(); if (!silent) { - printf("%lux%lu matrix with %lu entries is saved to %s\n", - static_cast(info.num_row()), - static_cast(info.num_col()), - static_cast(row_data_.size()), fname); + utils::Printf("%lux%lu matrix with %lu entries is saved to %s\n", + static_cast(info.num_row()), + static_cast(info.num_col()), + static_cast(row_data_.size()), fname); if (info.group_ptr.size() != 0) { - printf("data contains %u groups\n", - static_cast(info.group_ptr.size()-1)); + utils::Printf("data contains %u groups\n", + static_cast(info.group_ptr.size()-1)); } } } diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h index 542b6f6f5..791b3467d 100644 --- a/src/learner/dmatrix.h +++ b/src/learner/dmatrix.h @@ -98,8 +98,8 @@ struct MetaInfo { group_ptr.push_back(group_ptr.back()+nline); } if (!silent) { - printf("%u groups are loaded from %s\n", - static_cast(group_ptr.size()-1), fname); + utils::Printf("%u groups are loaded from %s\n", + static_cast(group_ptr.size()-1), fname); } fclose(fi); return true; @@ -133,7 +133,7 @@ struct MetaInfo { weights.push_back(wt); } if (!silent) { - printf("loading %s from %s\n", field, fname); + utils::Printf("loading %s from %s\n", field, fname); } fclose(fi); return true; diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp index 3058cf06b..4f34a7ac4 100644 --- a/src/learner/evaluation-inl.hpp +++ b/src/learner/evaluation-inl.hpp @@ -183,7 +183,7 @@ struct EvalAMS : public IEvaluator { } } if (ntop == ndata) { - fprintf(stderr, "\tams-ratio=%g", static_cast(thresindex) / ndata); + utils::Printf("\tams-ratio=%g", static_cast(thresindex) / ndata); return static_cast(tams); } else { return static_cast(sqrt(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp))); diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp index 8e7bce0a8..f252abedf 100644 --- a/src/learner/learner-inl.hpp +++ b/src/learner/learner-inl.hpp @@ -70,7 +70,7 @@ class BoostLearner { static_cast(buffer_size)); this->SetParam("num_pbuffer", str_temp); if (!silent) { - printf("buffer_size=%ld\n", static_cast(buffer_size)); + utils::Printf("buffer_size=%ld\n", static_cast(buffer_size)); } } /*! diff --git a/src/tree/updater_prune-inl.hpp b/src/tree/updater_prune-inl.hpp index e3d5be11a..98fdf5ee4 100644 --- a/src/tree/updater_prune-inl.hpp +++ b/src/tree/updater_prune-inl.hpp @@ -63,8 +63,8 @@ class TreePruner: public IUpdater { } } if (silent == 0) { - printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n", - tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth()); + utils::Printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n", + tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth()); } } diff --git a/src/utils/utils.h b/src/utils/utils.h index 501895224..625bc2d48 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -48,9 +48,9 @@ namespace xgboost { /*! \brief namespace for helper utils of the project */ namespace utils { /*! \brief error message buffer length */ -const int kErrorBuffer = 1 << 12; +const int kPrintBuffer = 1 << 12; -#ifndef XGBOOST_CUSTOMIZE_ERROR_ +#ifndef XGBOOST_CUSTOMIZE_MSG_ /*! * \brief handling of Assert error, caused by in-apropriate input * \param msg error message @@ -67,19 +67,33 @@ inline void HandleCheckError(const char *msg) { fprintf(stderr, "%s\n", msg); exit(-1); } +inline void HandlePrint(const char *msg) { + printf("%s", msg); +} #else // include declarations, some one must implement this void HandleAssertError(const char *msg); void HandleCheckError(const char *msg); +void HandlePrint(const char *msg); #endif +/*! \brief printf, print message to the console */ +inline void Printf(const char *fmt, ...) { + std::string msg(kPrintBuffer, '\0'); + va_list args; + va_start(args, fmt); + vsnprintf(&msg[0], kPrintBuffer, fmt, args); + va_end(args); + HandlePrint(msg.c_str()); +} + /*! \brief assert an condition is true, use this to handle debug information */ inline void Assert(bool exp, const char *fmt, ...) { if (!exp) { - std::string msg(kErrorBuffer, '\0'); + std::string msg(kPrintBuffer, '\0'); va_list args; va_start(args, fmt); - vsnprintf(&msg[0], kErrorBuffer, fmt, args); + vsnprintf(&msg[0], kPrintBuffer, fmt, args); va_end(args); HandleAssertError(msg.c_str()); } @@ -88,10 +102,10 @@ inline void Assert(bool exp, const char *fmt, ...) { /*!\brief same as assert, but this is intended to be used as message for user*/ inline void Check(bool exp, const char *fmt, ...) { if (!exp) { - std::string msg(kErrorBuffer, '\0'); + std::string msg(kPrintBuffer, '\0'); va_list args; va_start(args, fmt); - vsnprintf(&msg[0], kErrorBuffer, fmt, args); + vsnprintf(&msg[0], kPrintBuffer, fmt, args); va_end(args); HandleCheckError(msg.c_str()); } @@ -100,10 +114,10 @@ inline void Check(bool exp, const char *fmt, ...) { /*! \brief report error message, same as check */ inline void Error(const char *fmt, ...) { { - std::string msg(kErrorBuffer, '\0'); + std::string msg(kPrintBuffer, '\0'); va_list args; va_start(args, fmt); - vsnprintf(&msg[0], kErrorBuffer, fmt, args); + vsnprintf(&msg[0], kPrintBuffer, fmt, args); va_end(args); HandleCheckError(msg.c_str()); } From 273816a3b45f1430094d17e3069b99c93b1c3edc Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 30 Aug 2014 18:58:32 -0700 Subject: [PATCH 08/64] chg data --- Makefile | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index 5599f3ab4..552954453 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ BIN = xgboost OBJ = updater.o gbm.o io.o SLIB = wrapper/libxgboostwrapper.so -.PHONY: clean all python +.PHONY: clean all python Rpack all: $(BIN) $(OBJ) $(SLIB) @@ -40,19 +40,21 @@ $(OBJ) : install: cp -f -r $(BIN) $(INSTALL_PATH) -R-package.tar.gz: - rm -rf xgboost-R - cp -r R-package xgboost-R - rm -rf xgboost-R/src/*.o xgboost-R/src/*.so xgboost-R/src/*.dll - cp -r src xgboost-R/src/src - mkdir xgboost-R/src/wrapper - cp wrapper/xgboost_wrapper.h xgboost-R/src/wrapper - cp wrapper/xgboost_wrapper.cpp xgboost-R/src/wrapper - cp ./LICENSE xgboost-R - cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost-R/src/Makevars - cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost-R/src/Makevars.win - tar czf $@ xgboost-R - rm -rf xgboost-R +Rpack: + rm -rf xgboost xgboost*.tar.gz + cp -r R-package xgboost + rm -rf xgboost/src/*.o xgboost/src/*.so xgboost/src/*.dll + rm -rf xgboost/demo/*.model xgboost/demo/*.buffer + cp -r src xgboost/src/src + mkdir xgboost/src/wrapper + cp wrapper/xgboost_wrapper.h xgboost/src/wrapper + cp wrapper/xgboost_wrapper.cpp xgboost/src/wrapper + cp ./LICENSE xgboost + cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars + cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.win + R CMD build xgboost + rm -rf xgboost + R CMD check --as-cran xgboost*.tar.gz clean: $(RM) $(OBJ) $(BIN) $(SLIB) *.o *~ */*~ */*/*~ From 104d1d61c79e1727fc395004c4ea3e6265d40541 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 30 Aug 2014 20:06:31 -0700 Subject: [PATCH 09/64] add license name --- R-package/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 9e9f0be23..a5be60f30 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -6,7 +6,7 @@ Date: 2014-08-23 Author: Tianqi Chen, Tong He Maintainer: Tong He Description: xgboost -License: file LICENSE +License: Apache License (== 2.0) | file LICENSE URL: https://github.com/tqchen/xgboost BugReports: https://github.com/tqchen/xgboost/issues Depends: From f2c8093ba672fc09228b854076c87ab270b196a8 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 30 Aug 2014 20:22:36 -0700 Subject: [PATCH 10/64] check in description --- R-package/DESCRIPTION | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index a5be60f30..39a14a517 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -1,11 +1,17 @@ Package: xgboost Type: Package -Title: R wrapper of xgboost +Title: eXtreme Gradient Boosting Version: 0.3-0 Date: 2014-08-23 Author: Tianqi Chen, Tong He Maintainer: Tong He -Description: xgboost +Description: xgboost is short for eXtreme Gradient Boosting package. + It is an efficient and scalable implementation of gradient boosting framework. + The package includes efficient linear model solver and tree learning algorithm. + The package can automatically do parallel computation with OpenMP, and it can be + more than 10 times faster than existing gradient boosting packages such as gbm. + It supports various objective functions, including regression, classification and ranking. + The package is made to be extendible, so that user are also allowed to define there own objectives easily. License: Apache License (== 2.0) | file LICENSE URL: https://github.com/tqchen/xgboost BugReports: https://github.com/tqchen/xgboost/issues From 629799df0b9d2cb08ac41050818e7e31a34bf1c8 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sat, 30 Aug 2014 20:24:23 -0700 Subject: [PATCH 11/64] Update DESCRIPTION --- R-package/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 39a14a517..0de13b49d 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -5,7 +5,7 @@ Version: 0.3-0 Date: 2014-08-23 Author: Tianqi Chen, Tong He Maintainer: Tong He -Description: xgboost is short for eXtreme Gradient Boosting package. +Description: This package is a R wrapper of xgboost, which is short for eXtreme Gradient Boosting. It is an efficient and scalable implementation of gradient boosting framework. The package includes efficient linear model solver and tree learning algorithm. The package can automatically do parallel computation with OpenMP, and it can be From b153ffe45167133a5a13f42e39c2993ca4784411 Mon Sep 17 00:00:00 2001 From: Tong He Date: Sat, 30 Aug 2014 20:46:21 -0700 Subject: [PATCH 12/64] Update DESCRIPTION --- R-package/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 0de13b49d..576c0fbef 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -12,7 +12,7 @@ Description: This package is a R wrapper of xgboost, which is short for eXtreme more than 10 times faster than existing gradient boosting packages such as gbm. It supports various objective functions, including regression, classification and ranking. The package is made to be extendible, so that user are also allowed to define there own objectives easily. -License: Apache License (== 2.0) | file LICENSE +License: Apache License (== 2.0) + file LICENSE URL: https://github.com/tqchen/xgboost BugReports: https://github.com/tqchen/xgboost/issues Depends: From 22a38d844050a1877f4e0deece3743d1b8a19285 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 30 Aug 2014 21:04:47 -0700 Subject: [PATCH 13/64] move demo to inst/examples --- R-package/DESCRIPTION | 2 +- R-package/demo/00Index | 1 - R-package/{demo => inst/examples}/agaricus.txt.test | 0 R-package/{demo => inst/examples}/agaricus.txt.train | 0 R-package/{demo => inst/examples}/demo.R | 0 R-package/{demo => inst/examples}/featmap.txt | 0 6 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 R-package/demo/00Index rename R-package/{demo => inst/examples}/agaricus.txt.test (100%) rename R-package/{demo => inst/examples}/agaricus.txt.train (100%) rename R-package/{demo => inst/examples}/demo.R (100%) rename R-package/{demo => inst/examples}/featmap.txt (100%) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 576c0fbef..47c36f1ef 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -3,7 +3,7 @@ Type: Package Title: eXtreme Gradient Boosting Version: 0.3-0 Date: 2014-08-23 -Author: Tianqi Chen, Tong He +Author: Tianqi Chen , Tong He Maintainer: Tong He Description: This package is a R wrapper of xgboost, which is short for eXtreme Gradient Boosting. It is an efficient and scalable implementation of gradient boosting framework. diff --git a/R-package/demo/00Index b/R-package/demo/00Index deleted file mode 100644 index 9e5dddf29..000000000 --- a/R-package/demo/00Index +++ /dev/null @@ -1 +0,0 @@ -demo R code for xgboost usages on agaricus data diff --git a/R-package/demo/agaricus.txt.test b/R-package/inst/examples/agaricus.txt.test similarity index 100% rename from R-package/demo/agaricus.txt.test rename to R-package/inst/examples/agaricus.txt.test diff --git a/R-package/demo/agaricus.txt.train b/R-package/inst/examples/agaricus.txt.train similarity index 100% rename from R-package/demo/agaricus.txt.train rename to R-package/inst/examples/agaricus.txt.train diff --git a/R-package/demo/demo.R b/R-package/inst/examples/demo.R similarity index 100% rename from R-package/demo/demo.R rename to R-package/inst/examples/demo.R diff --git a/R-package/demo/featmap.txt b/R-package/inst/examples/featmap.txt similarity index 100% rename from R-package/demo/featmap.txt rename to R-package/inst/examples/featmap.txt From b123fbbcf9f5dced9980b7df5f888d2587bc559f Mon Sep 17 00:00:00 2001 From: hetong Date: Sat, 30 Aug 2014 22:24:25 -0700 Subject: [PATCH 14/64] final revision before CRAN --- R-package/DESCRIPTION | 2 +- R-package/README.md | 2 +- R-package/vignettes/xgboost.Rnw | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 47c36f1ef..0a7df6a4b 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -12,7 +12,7 @@ Description: This package is a R wrapper of xgboost, which is short for eXtreme more than 10 times faster than existing gradient boosting packages such as gbm. It supports various objective functions, including regression, classification and ranking. The package is made to be extendible, so that user are also allowed to define there own objectives easily. -License: Apache License (== 2.0) + file LICENSE +License: Apache License (== 2.0) | file LICENSE URL: https://github.com/tqchen/xgboost BugReports: https://github.com/tqchen/xgboost/issues Depends: diff --git a/R-package/README.md b/R-package/README.md index 0fcf04981..07b124b8e 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -7,4 +7,4 @@ require(devtools) install_github('xgboost','tqchen',subdir='R-package') ``` -Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/demo/demo.R) for more details. +Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/inst/examples/demo.R) for more details. diff --git a/R-package/vignettes/xgboost.Rnw b/R-package/vignettes/xgboost.Rnw index ed4447d57..19254abaf 100644 --- a/R-package/vignettes/xgboost.Rnw +++ b/R-package/vignettes/xgboost.Rnw @@ -173,7 +173,7 @@ objective function. We also have \verb@slice@ for row extraction. It is useful in cross-validation. -For a walkthrough demo, please see \verb@R-package/demo/demo.R@ for further +For a walkthrough demo, please see \verb@R-package/inst/examples/demo.R@ for further details. \section{The Higgs Boson competition} From fabe2f39e26512345666e82381dd51fcd8317088 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 31 Aug 2014 08:36:17 -0700 Subject: [PATCH 15/64] more clean makefile --- R-package/src/Makevars | 13 +++++-------- R-package/src/Makevars.win | 13 +++++-------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/R-package/src/Makevars b/R-package/src/Makevars index 8569a802e..995dc4379 100644 --- a/R-package/src/Makevars +++ b/R-package/src/Makevars @@ -1,17 +1,14 @@ # package root PKGROOT=../../ # _*_ mode: Makefile; _*_ -CXX=`R CMD config CXX` -TCFLAGS=`R CMD config CFLAGS` +CXX=`"${R_HOME}/bin/R" CMD config CXX` +CFLAGS=`"${R_HOME}/bin/R" CMD config CFLAGS` +CXXFLAGS=`"${R_HOME}/bin/R" CMD config CXXFLAGS` # expose these flags to R CMD SHLIB PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) -XGBFLAG= $(TCFLAGS) -DXGBOOST_CUSTOMIZE_MSG_ -fPIC $(SHLIB_OPENMP_CFLAGS) +XGBFLAG= $(CXXFLAGS) -DXGBOOST_CUSTOMIZE_MSG_ -fPIC $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) -ifeq ($(no_omp),1) - PKG_CPPFLAGS += -DDISABLE_OPENMP -endif - CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o OBJECTS= xgboost_R.o $(CXXOBJ) @@ -25,7 +22,7 @@ xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp $(CXXOBJ) : - $(CXX) -c $(XGBFLAG) -o $@ $(firstword $(filter %.cpp %.c, $^) ) + $(CXX) -c $(XGBFLAG) -o $@ $+ clean: rm -rf *.so *.o *~ *.dll diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index 391c9b6d9..e4bd42d37 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -1,17 +1,14 @@ # package root PKGROOT=../../ # _*_ mode: Makefile; _*_ -CXX=`Rcmd config CXX` -TCFLAGS=`Rcmd config CFLAGS` +CXX=`"${R_HOME}/bin/Rcmd" config CXX` +CFLAGS=`"${R_HOME}/bin/Rcmd" config CFLAGS` +CXXFLAGS=`"${R_HOME}/bin/Rcmd" config CXXFLAGS` # expose these flags to R CMD SHLIB PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) -XGBFLAG= -O3 -DXGBOOST_CUSTOMIZE_MSG_ -fPIC $(SHLIB_OPENMP_CFLAGS) +XGBFLAG= $(CXXFLAGS) -DXGBOOST_CUSTOMIZE_MSG_ -fPIC $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) -ifeq ($(no_omp),1) - PKG_CPPFLAGS += -DDISABLE_OPENMP -endif - CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o OBJECTS= xgboost_R.o $(CXXOBJ) @@ -25,7 +22,7 @@ xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp $(CXXOBJ) : - $(CXX) -c $(XGBFLAG) -o $@ $(firstword $(filter %.cpp %.c, $^) ) + $(CXX) -c $(XGBFLAG) -o $@ $+ clean: rm -rf *.so *.o *~ *.dll From d4aacbf8cfb6cbfd7dd4e1d43a88e9bc3c9ac42e Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sun, 31 Aug 2014 09:08:17 -0700 Subject: [PATCH 16/64] add ignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index f1f9400ab..a29d910e5 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,7 @@ # Compiled Dynamic libraries *.so *.dylib - +*.page # Compiled Static libraries *.lai *.la From 26c61dc0a32b0142caeac751f75ccf77aa8a75ac Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 31 Aug 2014 09:12:58 -0700 Subject: [PATCH 17/64] remove useless flag --- R-package/src/Makevars | 1 - R-package/src/Makevars.win | 1 - 2 files changed, 2 deletions(-) diff --git a/R-package/src/Makevars b/R-package/src/Makevars index 995dc4379..a13d40a31 100644 --- a/R-package/src/Makevars +++ b/R-package/src/Makevars @@ -2,7 +2,6 @@ PKGROOT=../../ # _*_ mode: Makefile; _*_ CXX=`"${R_HOME}/bin/R" CMD config CXX` -CFLAGS=`"${R_HOME}/bin/R" CMD config CFLAGS` CXXFLAGS=`"${R_HOME}/bin/R" CMD config CXXFLAGS` # expose these flags to R CMD SHLIB PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index e4bd42d37..00ed5017c 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -2,7 +2,6 @@ PKGROOT=../../ # _*_ mode: Makefile; _*_ CXX=`"${R_HOME}/bin/Rcmd" config CXX` -CFLAGS=`"${R_HOME}/bin/Rcmd" config CFLAGS` CXXFLAGS=`"${R_HOME}/bin/Rcmd" config CXXFLAGS` # expose these flags to R CMD SHLIB PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) From e83090a579c40895a3e6acdfb892111391d7bf4d Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 31 Aug 2014 09:17:49 -0700 Subject: [PATCH 18/64] change flagname to pass check --- R-package/src/Makevars | 4 ++-- R-package/src/Makevars.win | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R-package/src/Makevars b/R-package/src/Makevars index a13d40a31..d9f794589 100644 --- a/R-package/src/Makevars +++ b/R-package/src/Makevars @@ -2,10 +2,10 @@ PKGROOT=../../ # _*_ mode: Makefile; _*_ CXX=`"${R_HOME}/bin/R" CMD config CXX` -CXXFLAGS=`"${R_HOME}/bin/R" CMD config CXXFLAGS` +TCXXFLAGS=`"${R_HOME}/bin/R" CMD config CXXFLAGS` # expose these flags to R CMD SHLIB PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) -XGBFLAG= $(CXXFLAGS) -DXGBOOST_CUSTOMIZE_MSG_ -fPIC $(SHLIB_OPENMP_CFLAGS) +XGBFLAG= $(TCXXFLAGS) -DXGBOOST_CUSTOMIZE_MSG_ -fPIC $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index 00ed5017c..2ba962895 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -2,10 +2,10 @@ PKGROOT=../../ # _*_ mode: Makefile; _*_ CXX=`"${R_HOME}/bin/Rcmd" config CXX` -CXXFLAGS=`"${R_HOME}/bin/Rcmd" config CXXFLAGS` +TCXXFLAGS=`"${R_HOME}/bin/Rcmd" config CXXFLAGS` # expose these flags to R CMD SHLIB PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) -XGBFLAG= $(CXXFLAGS) -DXGBOOST_CUSTOMIZE_MSG_ -fPIC $(SHLIB_OPENMP_CFLAGS) +XGBFLAG= $(TCXXFLAGS) -DXGBOOST_CUSTOMIZE_MSG_ -fPIC $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o From 4d5ec01cd38cad3cbb77eb2d4b2459be7406d612 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sun, 31 Aug 2014 09:25:25 -0700 Subject: [PATCH 19/64] change windows --- R-package/src/Makevars.win | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index 2ba962895..4b1c9ba8f 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -2,10 +2,11 @@ PKGROOT=../../ # _*_ mode: Makefile; _*_ CXX=`"${R_HOME}/bin/Rcmd" config CXX` -TCXXFLAGS=`"${R_HOME}/bin/Rcmd" config CXXFLAGS` +# overwrite CXXFLAGS, in order to force evaluate it in windows +CXXFLAGS=`"${R_HOME}/bin/Rcmd" config CXXFLAGS` # expose these flags to R CMD SHLIB PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) -XGBFLAG= $(TCXXFLAGS) -DXGBOOST_CUSTOMIZE_MSG_ -fPIC $(SHLIB_OPENMP_CFLAGS) +XGBFLAG= $(CXXFLAGS) -DXGBOOST_CUSTOMIZE_MSG_ -fPIC $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o From 37499245eae5bb52364966d7b79d6b50ca8e09d4 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 31 Aug 2014 10:26:20 -0700 Subject: [PATCH 20/64] remove GNUism --- R-package/src/Makevars | 14 +++++++++----- R-package/src/Makevars.win | 14 +++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/R-package/src/Makevars b/R-package/src/Makevars index d9f794589..cb51bde7b 100644 --- a/R-package/src/Makevars +++ b/R-package/src/Makevars @@ -16,12 +16,16 @@ all: $(SHLIB) $(SHLIB): $(OBJECTS) xgboost_wrapper.o: $(PKGROOT)/wrapper/xgboost_wrapper.cpp -xgboost_io.o: $(PKGROOT)/src/io/io.cpp -xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp -xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp + $(CXX) -c $(XGBFLAG) -o xgboost_wrapper.o $(PKGROOT)/wrapper/xgboost_wrapper.cpp -$(CXXOBJ) : - $(CXX) -c $(XGBFLAG) -o $@ $+ +xgboost_io.o: $(PKGROOT)/src/io/io.cpp + $(CXX) -c $(XGBFLAG) -o xgboost_io.o $(PKGROOT)/src/io/io.cpp + +xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp + $(CXX) -c $(XGBFLAG) -o xgboost_gbm.o $(PKGROOT)/src/gbm/gbm.cpp + +xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp + $(CXX) -c $(XGBFLAG) -o xgboost_updater.o $(PKGROOT)/src/tree/updater.cpp clean: rm -rf *.so *.o *~ *.dll diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index 4b1c9ba8f..14bc501fd 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -17,12 +17,16 @@ all: $(SHLIB) $(SHLIB): $(OBJECTS) xgboost_wrapper.o: $(PKGROOT)/wrapper/xgboost_wrapper.cpp -xgboost_io.o: $(PKGROOT)/src/io/io.cpp -xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp -xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp + $(CXX) -c $(XGBFLAG) -o xgboost_wrapper.o $(PKGROOT)/wrapper/xgboost_wrapper.cpp -$(CXXOBJ) : - $(CXX) -c $(XGBFLAG) -o $@ $+ +xgboost_io.o: $(PKGROOT)/src/io/io.cpp + $(CXX) -c $(XGBFLAG) -o xgboost_io.o $(PKGROOT)/src/io/io.cpp + +xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp + $(CXX) -c $(XGBFLAG) -o xgboost_gbm.o $(PKGROOT)/src/gbm/gbm.cpp + +xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp + $(CXX) -c $(XGBFLAG) -o xgboost_updater.o $(PKGROOT)/src/tree/updater.cpp clean: rm -rf *.so *.o *~ *.dll From 172423ca0cf2da0c4c46e1890c73b255d2530686 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sun, 31 Aug 2014 12:19:44 -0700 Subject: [PATCH 21/64] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ba4b08bfd..f7947b906 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Version ====== * This version xgboost-0.3, the code has been refactored from 0.2x to be cleaner and more flexibility * This version of xgboost is not compatible with 0.2x, due to huge amount of changes in code structure - - This means the model and buffer file of previous version can not be loaded in xgboost-unity + - This means the model and buffer file of previous version can not be loaded in xgboost-3.0 * For legacy 0.2x code, refer to [Here](https://github.com/tqchen/xgboost/releases/tag/v0.22) * Change log in [CHANGES.md](CHANGES.md) From 1ed40e2b46bbf1fe5245254f6216e480e1529c88 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 31 Aug 2014 13:13:11 -0700 Subject: [PATCH 22/64] more strict makefile --- R-package/src/Makevars | 27 ++++++++------------------- R-package/src/Makevars.win | 28 ++++++++-------------------- 2 files changed, 16 insertions(+), 39 deletions(-) diff --git a/R-package/src/Makevars b/R-package/src/Makevars index cb51bde7b..dce5fff7b 100644 --- a/R-package/src/Makevars +++ b/R-package/src/Makevars @@ -1,32 +1,21 @@ # package root PKGROOT=../../ # _*_ mode: Makefile; _*_ -CXX=`"${R_HOME}/bin/R" CMD config CXX` -TCXXFLAGS=`"${R_HOME}/bin/R" CMD config CXXFLAGS` -# expose these flags to R CMD SHLIB -PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) -XGBFLAG= $(TCXXFLAGS) -DXGBOOST_CUSTOMIZE_MSG_ -fPIC $(SHLIB_OPENMP_CFLAGS) +PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -I$(PKGROOT) +PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) -CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o +CXXOBJ= $(PKGROOT)/wrapper/xgboost_wrapper.o\ + $(PKGROOT)/src/io/io.o\ + $(PKGROOT)/src/gbm/gbm.o\ + $(PKGROOT)/src/tree/updater.o + OBJECTS= xgboost_R.o $(CXXOBJ) .PHONY: all clean all: $(SHLIB) $(SHLIB): $(OBJECTS) -xgboost_wrapper.o: $(PKGROOT)/wrapper/xgboost_wrapper.cpp - $(CXX) -c $(XGBFLAG) -o xgboost_wrapper.o $(PKGROOT)/wrapper/xgboost_wrapper.cpp - -xgboost_io.o: $(PKGROOT)/src/io/io.cpp - $(CXX) -c $(XGBFLAG) -o xgboost_io.o $(PKGROOT)/src/io/io.cpp - -xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp - $(CXX) -c $(XGBFLAG) -o xgboost_gbm.o $(PKGROOT)/src/gbm/gbm.cpp - -xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp - $(CXX) -c $(XGBFLAG) -o xgboost_updater.o $(PKGROOT)/src/tree/updater.cpp - clean: - rm -rf *.so *.o *~ *.dll + rm -rf *.so *.o *~ *.dll $(OBJECTS) diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index 14bc501fd..67de745c5 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -1,32 +1,20 @@ # package root PKGROOT=../../ # _*_ mode: Makefile; _*_ -CXX=`"${R_HOME}/bin/Rcmd" config CXX` -# overwrite CXXFLAGS, in order to force evaluate it in windows -CXXFLAGS=`"${R_HOME}/bin/Rcmd" config CXXFLAGS` -# expose these flags to R CMD SHLIB -PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) -XGBFLAG= $(CXXFLAGS) -DXGBOOST_CUSTOMIZE_MSG_ -fPIC $(SHLIB_OPENMP_CFLAGS) +PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -I$(PKGROOT) +PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) -CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o +CXXOBJ= $(PKGROOT)/wrapper/xgboost_wrapper.o\ + $(PKGROOT)/src/io/io.o\ + $(PKGROOT)/src/gbm/gbm.o\ + $(PKGROOT)/src/tree/updater.o + OBJECTS= xgboost_R.o $(CXXOBJ) .PHONY: all clean all: $(SHLIB) $(SHLIB): $(OBJECTS) -xgboost_wrapper.o: $(PKGROOT)/wrapper/xgboost_wrapper.cpp - $(CXX) -c $(XGBFLAG) -o xgboost_wrapper.o $(PKGROOT)/wrapper/xgboost_wrapper.cpp - -xgboost_io.o: $(PKGROOT)/src/io/io.cpp - $(CXX) -c $(XGBFLAG) -o xgboost_io.o $(PKGROOT)/src/io/io.cpp - -xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp - $(CXX) -c $(XGBFLAG) -o xgboost_gbm.o $(PKGROOT)/src/gbm/gbm.cpp - -xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp - $(CXX) -c $(XGBFLAG) -o xgboost_updater.o $(PKGROOT)/src/tree/updater.cpp - clean: - rm -rf *.so *.o *~ *.dll + rm -rf *.so *.o *~ *.dll $(OBJECTS) From 12d503cec895b16cb4273bd89507a04f1701a308 Mon Sep 17 00:00:00 2001 From: Tong He Date: Sun, 31 Aug 2014 13:39:49 -0700 Subject: [PATCH 23/64] Update DESCRIPTION --- R-package/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 0a7df6a4b..eb154609c 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -11,7 +11,7 @@ Description: This package is a R wrapper of xgboost, which is short for eXtreme The package can automatically do parallel computation with OpenMP, and it can be more than 10 times faster than existing gradient boosting packages such as gbm. It supports various objective functions, including regression, classification and ranking. - The package is made to be extendible, so that user are also allowed to define there own objectives easily. + The package is made to be extensible, so that user are also allowed to define there own objectives easily. License: Apache License (== 2.0) | file LICENSE URL: https://github.com/tqchen/xgboost BugReports: https://github.com/tqchen/xgboost/issues From 168f78623f7502df3831e62ac1617091a7e511ed Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 31 Aug 2014 14:07:44 -0700 Subject: [PATCH 24/64] allow standalone random --- R-package/R/utils.R | 1 - R-package/src/Makevars | 2 +- R-package/src/xgboost_R.cpp | 55 ++++++++++++++++++++++++++++++++++++- R-package/src/xgboost_R.h | 1 + src/utils/random.h | 49 ++++++++++++++------------------- 5 files changed, 77 insertions(+), 31 deletions(-) diff --git a/R-package/R/utils.R b/R-package/R/utils.R index 1ae2dbe71..015026fe6 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -49,7 +49,6 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) { } } handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost") - .Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE = "xgboost") if (length(params) != 0) { for (i in 1:length(params)) { p <- params[i] diff --git a/R-package/src/Makevars b/R-package/src/Makevars index dce5fff7b..d75c51d08 100644 --- a/R-package/src/Makevars +++ b/R-package/src/Makevars @@ -1,7 +1,7 @@ # package root PKGROOT=../../ # _*_ mode: Makefile; _*_ -PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -I$(PKGROOT) +PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -I$(PKGROOT) PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index 20ff69854..70d2e71de 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -1,8 +1,8 @@ +#include "xgboost_R.h" #include #include #include #include -#include "xgboost_R.h" #include "wrapper/xgboost_wrapper.h" #include "src/utils/utils.h" #include "src/utils/omp.h" @@ -22,8 +22,28 @@ void HandlePrint(const char *msg) { Rprintf("%s", msg); } } // namespace utils +namespace random { +void Seed(unsigned seed) { + warning("parameter seed is ignored, please set random seed using set.seed"); +} +double Uniform(void) { + return unif_rand(); +} +double Normal(void) { + return norm_rand(); +} +} // namespace random } // namespace xgboost +// call before wrapper starts +inline void _WrapperBegin(void) { + GetRNGstate(); +} +// call after wrapper starts +inline void _WrapperEnd(void) { + PutRNGstate(); +} + extern "C" { void _DMatrixFinalizer(SEXP ext) { if (R_ExternalPtrAddr(ext) == NULL) return; @@ -31,14 +51,17 @@ extern "C" { R_ClearExternalPtr(ext); } SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) { + _WrapperBegin(); void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent)); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); UNPROTECT(1); + _WrapperEnd(); return ret; } SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing) { + _WrapperBegin(); SEXP dim = getAttrib(mat, R_DimSymbol); int nrow = INTEGER(dim)[0]; int ncol = INTEGER(dim)[1]; @@ -54,11 +77,13 @@ extern "C" { SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); UNPROTECT(1); + _WrapperEnd(); return ret; } SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data) { + _WrapperBegin(); const int *col_ptr = INTEGER(indptr); const int *row_index = INTEGER(indices); const double *col_data = REAL(data); @@ -92,9 +117,11 @@ extern "C" { SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); UNPROTECT(1); + _WrapperEnd(); return ret; } SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) { + _WrapperBegin(); int len = length(idxset); std::vector idxvec(len); for (int i = 0; i < len; ++i) { @@ -104,13 +131,17 @@ extern "C" { SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); UNPROTECT(1); + _WrapperEnd(); return ret; } void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) { + _WrapperBegin(); XGDMatrixSaveBinary(R_ExternalPtrAddr(handle), CHAR(asChar(fname)), asInteger(silent)); + _WrapperEnd(); } void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) { + _WrapperBegin(); int len = length(array); const char *name = CHAR(asChar(field)); if (!strcmp("group", name)) { @@ -120,6 +151,7 @@ extern "C" { vec[i] = static_cast(INTEGER(array)[i]); } XGDMatrixSetGroup(R_ExternalPtrAddr(handle), &vec[0], len); + _WrapperEnd(); return; } { @@ -132,8 +164,10 @@ extern "C" { CHAR(asChar(field)), &vec[0], len); } + _WrapperEnd(); } SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { + _WrapperBegin(); bst_ulong olen; const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), CHAR(asChar(field)), &olen); @@ -142,6 +176,7 @@ extern "C" { REAL(ret)[i] = res[i]; } UNPROTECT(1); + _WrapperEnd(); return ret; } // functions related to booster @@ -151,6 +186,7 @@ extern "C" { R_ClearExternalPtr(ext); } SEXP XGBoosterCreate_R(SEXP dmats) { + _WrapperBegin(); int len = length(dmats); std::vector dvec; for (int i = 0; i < len; ++i){ @@ -160,19 +196,25 @@ extern "C" { SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE); UNPROTECT(1); + _WrapperEnd(); return ret; } void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) { + _WrapperBegin(); XGBoosterSetParam(R_ExternalPtrAddr(handle), CHAR(asChar(name)), CHAR(asChar(val))); + _WrapperEnd(); } void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) { + _WrapperBegin(); XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle), asInteger(iter), R_ExternalPtrAddr(dtrain)); + _WrapperEnd(); } void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) { + _WrapperBegin(); utils::Check(length(grad) == length(hess), "gradient and hess must have same length"); int len = length(grad); std::vector tgrad(len), thess(len); @@ -184,8 +226,10 @@ extern "C" { XGBoosterBoostOneIter(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(dtrain), &tgrad[0], &thess[0], len); + _WrapperEnd(); } SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) { + _WrapperBegin(); utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length"); int len = length(dmats); std::vector vec_dmats; @@ -201,8 +245,10 @@ extern "C" { return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), asInteger(iter), &vec_dmats[0], &vec_sptr[0], len)); + _WrapperEnd(); } SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) { + _WrapperBegin(); bst_ulong olen; const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(dmat), @@ -213,15 +259,21 @@ extern "C" { REAL(ret)[i] = res[i]; } UNPROTECT(1); + _WrapperEnd(); return ret; } void XGBoosterLoadModel_R(SEXP handle, SEXP fname) { + _WrapperBegin(); XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); + _WrapperEnd(); } void XGBoosterSaveModel_R(SEXP handle, SEXP fname) { + _WrapperBegin(); XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); + _WrapperEnd(); } void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) { + _WrapperBegin(); bst_ulong olen; const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle), CHAR(asChar(fmap)), @@ -232,5 +284,6 @@ extern "C" { fprintf(fo, "%s", res[i]); } fclose(fo); + _WrapperEnd(); } } diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h index ecacdeced..8b4a3372c 100644 --- a/R-package/src/xgboost_R.h +++ b/R-package/src/xgboost_R.h @@ -7,6 +7,7 @@ */ extern "C" { #include +#include } extern "C" { diff --git a/src/utils/random.h b/src/utils/random.h index bf8b04d9d..bd2465589 100644 --- a/src/utils/random.h +++ b/src/utils/random.h @@ -16,30 +16,21 @@ /*! namespace of PRNG */ namespace xgboost { namespace random { - +#ifndef XGBOOST_CUSTOMIZE_PRNG_ /*! \brief seed the PRNG */ -inline void Seed(uint32_t seed) { +inline void Seed(unsigned seed) { srand(seed); } -/*! \brief return a real number uniform in [0,1) */ -inline double NextDouble(void) { +/*! \brief basic function, uniform */ +inline double Uniform(void) { return static_cast(rand()) / (static_cast(RAND_MAX)+1.0); } /*! \brief return a real numer uniform in (0,1) */ inline double NextDouble2(void) { return (static_cast(rand()) + 1.0) / (static_cast(RAND_MAX)+2.0); } - -/*! \brief return a random number */ -inline uint32_t NextUInt32(void) { - return (uint32_t)rand(); -} -/*! \brief return a random number in n */ -inline uint32_t NextUInt32(uint32_t n) { - return (uint32_t)floor(NextDouble() * n); -} /*! \brief return x~N(0,1) */ -inline double SampleNormal() { +inline double Normal(void) { double x, y, s; do { x = 2 * NextDouble2() - 1.0; @@ -49,22 +40,24 @@ inline double SampleNormal() { return x * sqrt(-2.0 * log(s) / s); } +#else +// include declarations, to be implemented +void Seed(unsigned seed); +double Uniform(void); +double Normal(void); +#endif -/*! \brief return iid x,y ~N(0,1) */ -inline void SampleNormal2D(double &xx, double &yy) { - double x, y, s; - do { - x = 2 * NextDouble2() - 1.0; - y = 2 * NextDouble2() - 1.0; - s = x*x + y*y; - } while (s >= 1.0 || s == 0.0); - double t = sqrt(-2.0 * log(s) / s); - xx = x * t; - yy = y * t; +/*! \brief return a real number uniform in [0,1) */ +inline double NextDouble(void) { + return Uniform(); +} +/*! \brief return a random number in n */ +inline uint32_t NextUInt32(uint32_t n) { + return (uint32_t)floor(NextDouble() * n); } /*! \brief return x~N(mu,sigma^2) */ inline double SampleNormal(double mu, double sigma) { - return SampleNormal() * sigma + mu; + return Normal() * sigma + mu; } /*! \brief return 1 with probability p, coin flip */ inline int SampleBinary(double p) { @@ -90,7 +83,7 @@ struct Random{ inline void Seed(unsigned sd) { this->rseed = sd; #if defined(_MSC_VER)||defined(_WIN32) - srand(rseed); + ::xgboost::utils::Seed(sd); #endif } /*! \brief return a real number uniform in [0,1) */ @@ -99,7 +92,7 @@ struct Random{ // For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general // todo, replace with another PRNG #if defined(_MSC_VER)||defined(_WIN32) - return static_cast(rand()) / (static_cast(RAND_MAX) + 1.0); + return Uniform(); #else return static_cast(rand_r(&rseed)) / (static_cast(RAND_MAX) + 1.0); #endif From 9e0cc778e8c8c8922786e6d7e0808e0992c1809c Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 31 Aug 2014 14:12:47 -0700 Subject: [PATCH 25/64] fix win --- R-package/src/Makevars.win | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index 67de745c5..e4f65cbef 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -1,7 +1,7 @@ # package root PKGROOT=../../ # _*_ mode: Makefile; _*_ -PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -I$(PKGROOT) +PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -I$(PKGROOT) PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) From d5f37d123828dbe20ae3d6d7f086f96eae17f51c Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sun, 31 Aug 2014 14:13:44 -0700 Subject: [PATCH 26/64] add git ignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index a29d910e5..a0bd1b6f6 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,9 @@ *.la *.a *~ +*.Rcheck +*.rds +*.tar.gz *txt* *conf *buffer From 88da7839b7021cc6e01a85e770dbebf97277d7a0 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 31 Aug 2014 14:14:39 -0700 Subject: [PATCH 27/64] fix random --- src/utils/random.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/random.h b/src/utils/random.h index bd2465589..590baca2a 100644 --- a/src/utils/random.h +++ b/src/utils/random.h @@ -83,7 +83,7 @@ struct Random{ inline void Seed(unsigned sd) { this->rseed = sd; #if defined(_MSC_VER)||defined(_WIN32) - ::xgboost::utils::Seed(sd); + ::xgboost::random::Seed(sd); #endif } /*! \brief return a real number uniform in [0,1) */ From 79fa8b99d4d15b1b168f00a3713152ce1ebda90b Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 31 Aug 2014 14:26:35 -0700 Subject: [PATCH 28/64] pack script with cleanup --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 552954453..775b42eba 100644 --- a/Makefile +++ b/Makefile @@ -43,6 +43,9 @@ install: Rpack: rm -rf xgboost xgboost*.tar.gz cp -r R-package xgboost + rm -rf xgboost/inst/examples/*.buffer + rm -rf xgboost/inst/examples/*.model + rm -rf xgboost/inst/examples/dump* rm -rf xgboost/src/*.o xgboost/src/*.so xgboost/src/*.dll rm -rf xgboost/demo/*.model xgboost/demo/*.buffer cp -r src xgboost/src/src From b49927e60235d30d19a88a8493d2ab80115395cb Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sun, 31 Aug 2014 14:32:45 -0700 Subject: [PATCH 29/64] Update xgboost_R.cpp --- R-package/src/xgboost_R.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index 70d2e71de..96d3871b1 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -1,8 +1,8 @@ -#include "xgboost_R.h" #include #include #include #include +#include "xgboost_R.h" #include "wrapper/xgboost_wrapper.h" #include "src/utils/utils.h" #include "src/utils/omp.h" From b2097b96c7f0a18111712297bf2ec4f324750247 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 31 Aug 2014 22:39:37 -0700 Subject: [PATCH 30/64] more clean makevar --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 775b42eba..d3140777d 100644 --- a/Makefile +++ b/Makefile @@ -60,4 +60,4 @@ Rpack: R CMD check --as-cran xgboost*.tar.gz clean: - $(RM) $(OBJ) $(BIN) $(SLIB) *.o *~ */*~ */*/*~ + $(RM) $(OBJ) $(BIN) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~ From 8b3465cde034151a95aeb1e79635d1cb3ca014bc Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 31 Aug 2014 22:42:15 -0700 Subject: [PATCH 31/64] cleaner makevar --- R-package/src/Makevars | 14 +------------- R-package/src/Makevars.win | 15 +-------------- 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/R-package/src/Makevars b/R-package/src/Makevars index d75c51d08..e03a15425 100644 --- a/R-package/src/Makevars +++ b/R-package/src/Makevars @@ -4,18 +4,6 @@ PKGROOT=../../ PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -I$(PKGROOT) PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) +OBJECTS= xgboost_R.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o -CXXOBJ= $(PKGROOT)/wrapper/xgboost_wrapper.o\ - $(PKGROOT)/src/io/io.o\ - $(PKGROOT)/src/gbm/gbm.o\ - $(PKGROOT)/src/tree/updater.o - -OBJECTS= xgboost_R.o $(CXXOBJ) - -.PHONY: all clean -all: $(SHLIB) -$(SHLIB): $(OBJECTS) - -clean: - rm -rf *.so *.o *~ *.dll $(OBJECTS) diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index e4f65cbef..aec0d2c89 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -4,17 +4,4 @@ PKGROOT=../../ PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -I$(PKGROOT) PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) - -CXXOBJ= $(PKGROOT)/wrapper/xgboost_wrapper.o\ - $(PKGROOT)/src/io/io.o\ - $(PKGROOT)/src/gbm/gbm.o\ - $(PKGROOT)/src/tree/updater.o - -OBJECTS= xgboost_R.o $(CXXOBJ) - -.PHONY: all clean -all: $(SHLIB) -$(SHLIB): $(OBJECTS) - -clean: - rm -rf *.so *.o *~ *.dll $(OBJECTS) +OBJECTS= xgboost_R.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o From 485e0f140ed6ce10b7a1ecf2483fabd7fc74b150 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 31 Aug 2014 22:53:35 -0700 Subject: [PATCH 32/64] add --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index d3140777d..28a289ac6 100644 --- a/Makefile +++ b/Makefile @@ -41,6 +41,7 @@ install: cp -f -r $(BIN) $(INSTALL_PATH) Rpack: + make clean rm -rf xgboost xgboost*.tar.gz cp -r R-package xgboost rm -rf xgboost/inst/examples/*.buffer From 6641fa546d1713dd8766b08abcb42e3c9ca58d97 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Sep 2014 08:50:45 -0700 Subject: [PATCH 33/64] change warning to pragma message --- R-package/src/Makevars | 6 +++--- src/utils/omp.h | 9 ++------- src/utils/utils.h | 2 +- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/R-package/src/Makevars b/R-package/src/Makevars index e03a15425..e5f386f9b 100644 --- a/R-package/src/Makevars +++ b/R-package/src/Makevars @@ -1,9 +1,9 @@ # package root PKGROOT=../../ # _*_ mode: Makefile; _*_ -PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -I$(PKGROOT) -PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) -PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) +PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -Wpedantic -I$(PKGROOT) +PKG_CXXFLAGS= +PKG_LIBS = OBJECTS= xgboost_R.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o diff --git a/src/utils/omp.h b/src/utils/omp.h index 0380ebd67..933ad97f6 100644 --- a/src/utils/omp.h +++ b/src/utils/omp.h @@ -9,13 +9,8 @@ #include #else #ifndef DISABLE_OPENMP -#ifndef _MSC_VER -#warning "OpenMP is not available, compile to single thread code."\ - "You may want to ungrade your compiler to enable OpenMP support,"\ - "to get benefit of multi-threading." -#else -// TODO add warning for msvc -#endif +// use pragma message instead of warning +#pragma message "Warning: OpenMP is not available, xgboost will be compiled into single thread code. You may want to ungrade your compiler to enable OpenMP support, to get benefit of multi-threading." #endif inline int omp_get_thread_num() { return 0; } inline int omp_get_num_threads() { return 1; } diff --git a/src/utils/utils.h b/src/utils/utils.h index 625bc2d48..9d235d82a 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -19,7 +19,7 @@ #else #ifdef _FILE_OFFSET_BITS #if _FILE_OFFSET_BITS == 32 -#warning "FILE OFFSET BITS defined to be 32 bit" +#pragma message "Warning: FILE OFFSET BITS defined to be 32 bit" #endif #endif From 427ab6434cd14de7f87588ac3c631b66e4e4a493 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Sep 2014 08:56:40 -0700 Subject: [PATCH 34/64] message --- R-package/src/Makevars | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R-package/src/Makevars b/R-package/src/Makevars index e5f386f9b..ae1e3def5 100644 --- a/R-package/src/Makevars +++ b/R-package/src/Makevars @@ -1,9 +1,9 @@ # package root PKGROOT=../../ # _*_ mode: Makefile; _*_ -PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -Wpedantic -I$(PKGROOT) -PKG_CXXFLAGS= -PKG_LIBS = +PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -I$(PKGROOT) +PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) +PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) OBJECTS= xgboost_R.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o From 7393291f81e5e5cbb793b6d3284fa767a91d6d08 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Mon, 1 Sep 2014 08:59:02 -0700 Subject: [PATCH 35/64] msvc --- src/utils/omp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/omp.h b/src/utils/omp.h index 933ad97f6..73dcf6449 100644 --- a/src/utils/omp.h +++ b/src/utils/omp.h @@ -10,7 +10,7 @@ #else #ifndef DISABLE_OPENMP // use pragma message instead of warning -#pragma message "Warning: OpenMP is not available, xgboost will be compiled into single thread code. You may want to ungrade your compiler to enable OpenMP support, to get benefit of multi-threading." +#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single thread code. You may want to ungrade your compiler to enable OpenMP support, to get benefit of multi-threading.") #endif inline int omp_get_thread_num() { return 0; } inline int omp_get_num_threads() { return 1; } From 4c451de90bed49e9429c05391982527f8bc1f437 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Sep 2014 09:00:45 -0700 Subject: [PATCH 36/64] change message --- src/utils/utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/utils.h b/src/utils/utils.h index 9d235d82a..75d493cc2 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -19,7 +19,7 @@ #else #ifdef _FILE_OFFSET_BITS #if _FILE_OFFSET_BITS == 32 -#pragma message "Warning: FILE OFFSET BITS defined to be 32 bit" +#pragma message ("Warning: FILE OFFSET BITS defined to be 32 bit") #endif #endif From 24e87e1cf866ec175108413301773da3d03db5f0 Mon Sep 17 00:00:00 2001 From: hetong Date: Mon, 1 Sep 2014 15:07:17 -0700 Subject: [PATCH 37/64] fix doc with redirection to inst/examples --- R-package/DESCRIPTION | 16 +++++++++------- R-package/R/xgb.dump.R | 2 +- R-package/R/xgb.train.R | 2 +- R-package/R/xgboost.R | 2 +- R-package/man/xgb.dump.Rd | 2 +- R-package/man/xgb.train.Rd | 2 +- R-package/man/xgboost.Rd | 2 +- 7 files changed, 15 insertions(+), 13 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index eb154609c..9a1bfe8b6 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -5,13 +5,15 @@ Version: 0.3-0 Date: 2014-08-23 Author: Tianqi Chen , Tong He Maintainer: Tong He -Description: This package is a R wrapper of xgboost, which is short for eXtreme Gradient Boosting. - It is an efficient and scalable implementation of gradient boosting framework. - The package includes efficient linear model solver and tree learning algorithm. - The package can automatically do parallel computation with OpenMP, and it can be - more than 10 times faster than existing gradient boosting packages such as gbm. - It supports various objective functions, including regression, classification and ranking. - The package is made to be extensible, so that user are also allowed to define there own objectives easily. +Description: This package is a R wrapper of xgboost, which is short for eXtreme + Gradient Boosting. It is an efficient and scalable implementation of + gradient boosting framework. The package includes efficient linear model + solver and tree learning algorithm. The package can automatically do + parallel computation with OpenMP, and it can be more than 10 times faster + than existing gradient boosting packages such as gbm. It supports various + objective functions, including regression, classification and ranking. The + package is made to be extensible, so that user are also allowed to define + there own objectives easily. License: Apache License (== 2.0) | file LICENSE URL: https://github.com/tqchen/xgboost BugReports: https://github.com/tqchen/xgboost/issues diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R index 2c7813712..dc3f431a2 100644 --- a/R-package/R/xgb.dump.R +++ b/R-package/R/xgb.dump.R @@ -5,7 +5,7 @@ #' @param model the model object. #' @param fname the name of the binary file. #' @param fmap feature map file representing the type of feature, to make it -#' look nice, run demo/demo.R for result and demo/featmap.txt for example +#' look nice, run inst/examples/demo.R for result and inst/examples/featmap.txt for example #' Format: https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model #' #' @examples diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index ceb87c1cb..99a6dcc2d 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -15,7 +15,7 @@ #' } #' #' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for -#' further details. See also demo/demo.R for walkthrough example in R. +#' further details. See also inst/examples/demo.R for walkthrough example in R. #' @param dtrain takes an \code{xgb.DMatrix} as the input. #' @param nrounds the max number of iterations #' @param watchlist what information should be printed when \code{verbose=1} or diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R index c6fc99980..6f4633fb8 100644 --- a/R-package/R/xgboost.R +++ b/R-package/R/xgboost.R @@ -19,7 +19,7 @@ #' } #' #' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for -#' further details. See also demo/demo.R for walkthrough example in R. +#' further details. See also inst/examples/demo.R for walkthrough example in R. #' @param nrounds the max number of iterations #' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print #' information of performance. If 2, xgboost will print information of both diff --git a/R-package/man/xgb.dump.Rd b/R-package/man/xgb.dump.Rd index 1e0360b31..69ec13300 100644 --- a/R-package/man/xgb.dump.Rd +++ b/R-package/man/xgb.dump.Rd @@ -11,7 +11,7 @@ xgb.dump(model, fname, fmap = "") \item{fname}{the name of the binary file.} \item{fmap}{feature map file representing the type of feature, to make it - look nice, run demo/demo.R for result and demo/featmap.txt for example + look nice, run inst/examples/demo.R for result and inst/examples/featmap.txt for example Format: https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model} } \description{ diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd index 1f29afa04..aa2778501 100644 --- a/R-package/man/xgb.train.Rd +++ b/R-package/man/xgb.train.Rd @@ -20,7 +20,7 @@ xgb.train(params = list(), dtrain, nrounds, watchlist = list(), } See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for - further details. See also demo/demo.R for walkthrough example in R.} + further details. See also inst/examples/demo.R for walkthrough example in R.} \item{dtrain}{takes an \code{xgb.DMatrix} as the input.} diff --git a/R-package/man/xgboost.Rd b/R-package/man/xgboost.Rd index a76ce5b3d..2b6c1a124 100644 --- a/R-package/man/xgboost.Rd +++ b/R-package/man/xgboost.Rd @@ -25,7 +25,7 @@ xgboost(data = NULL, label = NULL, params = list(), nrounds, } See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for - further details. See also demo/demo.R for walkthrough example in R.} + further details. See also inst/examples/demo.R for walkthrough example in R.} \item{nrounds}{the max number of iterations} From 4592e500cb68d113d39a3c3ebd8d37e4deb579d4 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Sep 2014 15:10:19 -0700 Subject: [PATCH 38/64] add ntree limit --- R-package/R/predict.xgb.Booster.R | 14 +++++++++++--- R-package/src/xgboost_R.cpp | 3 ++- R-package/src/xgboost_R.h | 3 ++- src/gbm/gblinear-inl.hpp | 5 ++++- src/gbm/gbm.h | 5 ++++- src/gbm/gbtree-inl.hpp | 15 ++++++++++----- src/learner/learner-inl.hpp | 14 ++++++++++---- wrapper/xgboost.py | 5 +++-- wrapper/xgboost_wrapper.cpp | 8 ++++---- wrapper/xgboost_wrapper.h | 4 +++- 10 files changed, 53 insertions(+), 23 deletions(-) diff --git a/R-package/R/predict.xgb.Booster.R b/R-package/R/predict.xgb.Booster.R index b51a1b19c..87666a55f 100644 --- a/R-package/R/predict.xgb.Booster.R +++ b/R-package/R/predict.xgb.Booster.R @@ -11,7 +11,8 @@ setClass("xgb.Booster") #' value of sum of functions, when outputmargin=TRUE, the prediction is #' untransformed margin value. In logistic regression, outputmargin=T will #' output value before logistic transformation. -#' +#' @param ntreelimit limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear. +#' set it to be value bigger than 0 #' @examples #' data(iris) #' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) @@ -19,11 +20,18 @@ setClass("xgb.Booster") #' @export #' setMethod("predict", signature = "xgb.Booster", - definition = function(object, newdata, outputmargin = FALSE) { + definition = function(object, newdata, outputmargin = FALSE, ntreelimit = NULL) { if (class(newdata) != "xgb.DMatrix") { newdata <- xgb.DMatrix(newdata) } - ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), PACKAGE = "xgboost") + if (is.null(ntreelimit)) { + ntreelimit <- 0 + } else { + if (ntreelimit < 1){ + stop("predict: ntreelimit must be greater equal than 1") + } + } + ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), as.integer(ntreelimit), PACKAGE = "xgboost") return(ret) }) diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index 96d3871b1..b03410a4c 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -247,12 +247,13 @@ extern "C" { &vec_dmats[0], &vec_sptr[0], len)); _WrapperEnd(); } - SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) { + SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit) { _WrapperBegin(); bst_ulong olen; const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(dmat), asInteger(output_margin), + asInteger(ntree_limit), &olen); SEXP ret = PROTECT(allocVector(REALSXP, olen)); for (size_t i = 0; i < olen; ++i) { diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h index 8b4a3372c..c988ff1e5 100644 --- a/R-package/src/xgboost_R.h +++ b/R-package/src/xgboost_R.h @@ -107,8 +107,9 @@ extern "C" { * \param handle handle * \param dmat data matrix * \param output_margin whether only output raw margin value + * \param ntree_limit limit number of trees used in prediction */ - SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin); + SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit); /*! * \brief load model from existing file * \param handle handle diff --git a/src/gbm/gblinear-inl.hpp b/src/gbm/gblinear-inl.hpp index e9566f87e..a9d4c8d62 100644 --- a/src/gbm/gblinear-inl.hpp +++ b/src/gbm/gblinear-inl.hpp @@ -105,7 +105,10 @@ class GBLinear : public IGradBooster { virtual void Predict(IFMatrix *p_fmat, int64_t buffer_offset, const BoosterInfo &info, - std::vector *out_preds) { + std::vector *out_preds, + unsigned ntree_limit = 0) { + utils::Check(ntree_limit == 0, + "GBLinear::Predict ntrees is only valid for gbtree predictor"); std::vector &preds = *out_preds; preds.resize(0); // start collecting the prediction diff --git a/src/gbm/gbm.h b/src/gbm/gbm.h index c548cab94..07dade4ac 100644 --- a/src/gbm/gbm.h +++ b/src/gbm/gbm.h @@ -57,11 +57,14 @@ class IGradBooster { * the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size") * \param info extra side information that may be needed for prediction * \param out_preds output vector to hold the predictions + * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means + * we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear */ virtual void Predict(IFMatrix *p_fmat, int64_t buffer_offset, const BoosterInfo &info, - std::vector *out_preds) = 0; + std::vector *out_preds, + unsigned ntree_limit = 0) = 0; /*! * \brief dump the model in text format * \param fmap feature map that may help give interpretations of feature diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp index f66b49d00..8fea28727 100644 --- a/src/gbm/gbtree-inl.hpp +++ b/src/gbm/gbtree-inl.hpp @@ -105,7 +105,8 @@ class GBTree : public IGradBooster { virtual void Predict(IFMatrix *p_fmat, int64_t buffer_offset, const BoosterInfo &info, - std::vector *out_preds) { + std::vector *out_preds, + unsigned ntree_limit = 0) { int nthread; #pragma omp parallel { @@ -137,7 +138,8 @@ class GBTree : public IGradBooster { this->Pred(batch[i], buffer_offset < 0 ? -1 : buffer_offset + ridx, gid, info.GetRoot(ridx), &feats, - &preds[ridx * mparam.num_output_group + gid], stride); + &preds[ridx * mparam.num_output_group + gid], stride, + ntree_limit); } } } @@ -212,14 +214,16 @@ class GBTree : public IGradBooster { int bst_group, unsigned root_index, tree::RegTree::FVec *p_feats, - float *out_pred, size_t stride) { + float *out_pred, size_t stride, unsigned ntree_limit) { size_t itop = 0; float psum = 0.0f; // sum of leaf vector std::vector vec_psum(mparam.size_leaf_vector, 0.0f); const int64_t bid = mparam.BufferOffset(buffer_index, bst_group); + // number of valid trees + unsigned treeleft = ntree_limit == 0 ? std::numeric_limits::max() : ntree_limit; // load buffered results if any - if (bid >= 0) { + if (bid >= 0 && ntree_limit == 0) { itop = pred_counter[bid]; psum = pred_buffer[bid]; for (int i = 0; i < mparam.size_leaf_vector; ++i) { @@ -235,12 +239,13 @@ class GBTree : public IGradBooster { for (int j = 0; j < mparam.size_leaf_vector; ++j) { vec_psum[j] += trees[i]->leafvec(tid)[j]; } + if(--treeleft == 0) break; } } p_feats->Drop(inst); } // updated the buffered results - if (bid >= 0) { + if (bid >= 0 && ntree_limit == 0) { pred_counter[bid] = static_cast(trees.size()); pred_buffer[bid] = psum; for (int i = 0; i < mparam.size_leaf_vector; ++i) { diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp index f252abedf..60e1fccf1 100644 --- a/src/learner/learner-inl.hpp +++ b/src/learner/learner-inl.hpp @@ -212,11 +212,14 @@ class BoostLearner { * \param data input data * \param output_margin whether to only predict margin value instead of transformed prediction * \param out_preds output vector that stores the prediction + * \param ntree_limit limit number of trees used for boosted tree + * predictor, when it equals 0, this means we are using all the trees */ inline void Predict(const DMatrix &data, bool output_margin, - std::vector *out_preds) const { - this->PredictRaw(data, out_preds); + std::vector *out_preds, + unsigned ntree_limit = 0) const { + this->PredictRaw(data, out_preds, ntree_limit); if (!output_margin) { obj_->PredTransform(out_preds); } @@ -246,11 +249,14 @@ class BoostLearner { * \brief get un-transformed prediction * \param data training data matrix * \param out_preds output vector that stores the prediction + * \param ntree_limit limit number of trees used for boosted tree + * predictor, when it equals 0, this means we are using all the trees */ inline void PredictRaw(const DMatrix &data, - std::vector *out_preds) const { + std::vector *out_preds, + unsigned ntree_limit = 0) const { gbm_->Predict(data.fmat(), this->FindBufferOffset(data), - data.info.info, out_preds); + data.info.info, out_preds, ntree_limit); // add base margin std::vector &preds = *out_preds; const bst_omp_uint ndata = static_cast(preds.size()); diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py index e2cbdba2e..a6999a39f 100644 --- a/wrapper/xgboost.py +++ b/wrapper/xgboost.py @@ -192,15 +192,16 @@ class Booster: return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals)) def eval(self, mat, name = 'eval', it = 0): return self.eval_set( [(mat,name)], it) - def predict(self, data, output_margin=False): + def predict(self, data, output_margin=False, ntree_limit=0): """ predict with data data: the dmatrix storing the input output_margin: whether output raw margin value that is untransformed + ntree_limit: limit number of trees in prediction, default to 0, 0 means using all the trees """ length = ctypes.c_ulong() preds = xglib.XGBoosterPredict(self.handle, data.handle, - int(output_margin), ctypes.byref(length)) + int(output_margin), ntree_limit, ctypes.byref(length)) return ctypes2numpy(preds, length.value, 'float32') def save_model(self, fname): """ save model to file """ diff --git a/wrapper/xgboost_wrapper.cpp b/wrapper/xgboost_wrapper.cpp index 70c7e87b0..3f45c1438 100644 --- a/wrapper/xgboost_wrapper.cpp +++ b/wrapper/xgboost_wrapper.cpp @@ -25,9 +25,9 @@ class Booster: public learner::BoostLearner { this->init_model = false; this->SetCacheData(mats); } - const float *Pred(const DataMatrix &dmat, int output_margin, bst_ulong *len) { + inline const float *Pred(const DataMatrix &dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) { this->CheckInitModel(); - this->Predict(dmat, output_margin != 0, &this->preds_); + this->Predict(dmat, output_margin != 0, &this->preds_, ntree_limit); *len = static_cast(this->preds_.size()); return &this->preds_[0]; } @@ -249,8 +249,8 @@ extern "C"{ bst->eval_str = bst->EvalOneIter(iter, mats, names); return bst->eval_str.c_str(); } - const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len) { - return static_cast(handle)->Pred(*static_cast(dmat), output_margin, len); + const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) { + return static_cast(handle)->Pred(*static_cast(dmat), output_margin, ntree_limit, len); } void XGBoosterLoadModel(void *handle, const char *fname) { static_cast(handle)->LoadModel(fname); diff --git a/wrapper/xgboost_wrapper.h b/wrapper/xgboost_wrapper.h index 65446aea6..9687ec0a3 100644 --- a/wrapper/xgboost_wrapper.h +++ b/wrapper/xgboost_wrapper.h @@ -165,9 +165,11 @@ extern "C" { * \param handle handle * \param dmat data matrix * \param output_margin whether only output raw margin value + * \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees + * when the parameter is set to 0, we will use all the trees * \param len used to store length of returning result */ - XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len); + XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len); /*! * \brief load model from existing file * \param handle handle From 025ca170ec02c2796154beb0653e83dbcb183131 Mon Sep 17 00:00:00 2001 From: Tong He Date: Mon, 1 Sep 2014 15:25:16 -0700 Subject: [PATCH 39/64] Update predict.xgb.Booster.R --- R-package/R/predict.xgb.Booster.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/R/predict.xgb.Booster.R b/R-package/R/predict.xgb.Booster.R index 87666a55f..d42766f87 100644 --- a/R-package/R/predict.xgb.Booster.R +++ b/R-package/R/predict.xgb.Booster.R @@ -28,7 +28,7 @@ setMethod("predict", signature = "xgb.Booster", ntreelimit <- 0 } else { if (ntreelimit < 1){ - stop("predict: ntreelimit must be greater equal than 1") + stop("predict: ntreelimit must be equal to or greater than 1") } } ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), as.integer(ntreelimit), PACKAGE = "xgboost") From 8863c520e7f54dbff2675760b827a9b7c8b978c1 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Sep 2014 15:32:02 -0700 Subject: [PATCH 40/64] some quick fix --- Makefile | 2 +- R-package/src/xgboost_R.cpp | 1 + src/utils/omp.h | 2 +- src/utils/utils.h | 3 +-- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 28a289ac6..2fd881843 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ export CC = gcc export CXX = g++ export LDFLAGS= -pthread -lm -export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -pedantic +export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -pedantic -ansi ifeq ($(no_omp),1) CFLAGS += -DDISABLE_OPENMP diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index b03410a4c..38cce98b4 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include "xgboost_R.h" #include "wrapper/xgboost_wrapper.h" #include "src/utils/utils.h" diff --git a/src/utils/omp.h b/src/utils/omp.h index 73dcf6449..43d01a54f 100644 --- a/src/utils/omp.h +++ b/src/utils/omp.h @@ -10,7 +10,7 @@ #else #ifndef DISABLE_OPENMP // use pragma message instead of warning -#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single thread code. You may want to ungrade your compiler to enable OpenMP support, to get benefit of multi-threading.") +#pragma message ("Warning: OpenMP is not available. XGBoost will be compiled with single thread mode. You may want to use compiler with OpenMP support to get benefit of multi-threading.") #endif inline int omp_get_thread_num() { return 0; } inline int omp_get_num_threads() { return 1; } diff --git a/src/utils/utils.h b/src/utils/utils.h index 75d493cc2..7fc966c97 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -23,12 +23,11 @@ #endif #endif -#ifdef __APPLE__ +#ifdef __APPLE__ #define off64_t off_t #define fopen64 fopen #endif -#define _FILE_OFFSET_BITS 64 extern "C" { #include } From b973a4dcaa9e8701fbe61f2f07cb85e2a548b5e3 Mon Sep 17 00:00:00 2001 From: hetong Date: Mon, 1 Sep 2014 15:38:29 -0700 Subject: [PATCH 41/64] improve doc in predict --- R-package/R/predict.xgb.Booster.R | 2 +- R-package/man/predict-xgb.Booster-method.Rd | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/R-package/R/predict.xgb.Booster.R b/R-package/R/predict.xgb.Booster.R index d42766f87..390ac689e 100644 --- a/R-package/R/predict.xgb.Booster.R +++ b/R-package/R/predict.xgb.Booster.R @@ -12,7 +12,7 @@ setClass("xgb.Booster") #' untransformed margin value. In logistic regression, outputmargin=T will #' output value before logistic transformation. #' @param ntreelimit limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear. -#' set it to be value bigger than 0 +#' set it to be value bigger than 0. It will use all trees by default. #' @examples #' data(iris) #' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) diff --git a/R-package/man/predict-xgb.Booster-method.Rd b/R-package/man/predict-xgb.Booster-method.Rd index d43fd7362..d192997d2 100644 --- a/R-package/man/predict-xgb.Booster-method.Rd +++ b/R-package/man/predict-xgb.Booster-method.Rd @@ -4,7 +4,8 @@ \alias{predict,xgb.Booster-method} \title{Predict method for eXtreme Gradient Boosting model} \usage{ -\S4method{predict}{xgb.Booster}(object, newdata, outputmargin = FALSE) +\S4method{predict}{xgb.Booster}(object, newdata, outputmargin = FALSE, + ntreelimit = NULL) } \arguments{ \item{object}{Object of class "xgb.Boost"} @@ -13,9 +14,12 @@ \code{xgb.DMatrix}.} \item{outputmargin}{whether the prediction should be shown in the original - value of sum of functions, when outputmargin=TRUE, the prediction is - untransformed margin value. In logistic regression, outputmargin=T will - output value before logistic transformation.} +value of sum of functions, when outputmargin=TRUE, the prediction is +untransformed margin value. In logistic regression, outputmargin=T will +output value before logistic transformation.} + +\item{ntreelimit}{limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear. +set it to be value bigger than 0. It will use all trees by default.} } \description{ Predicted values based on xgboost model object. From ada9dd94ad2895c2394453164342abb249a74aff Mon Sep 17 00:00:00 2001 From: Tong He Date: Mon, 1 Sep 2014 15:51:48 -0700 Subject: [PATCH 42/64] Update omp.h --- src/utils/omp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/omp.h b/src/utils/omp.h index 73dcf6449..615dbdf51 100644 --- a/src/utils/omp.h +++ b/src/utils/omp.h @@ -10,7 +10,7 @@ #else #ifndef DISABLE_OPENMP // use pragma message instead of warning -#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single thread code. You may want to ungrade your compiler to enable OpenMP support, to get benefit of multi-threading.") +#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single-thread code. Downgrade your compiler to enable OpenMP support and get benefit of multi-threading.") #endif inline int omp_get_thread_num() { return 0; } inline int omp_get_num_threads() { return 1; } From d391becb4e2a714cce3ee626f9a37cf0d16212d3 Mon Sep 17 00:00:00 2001 From: Tong He Date: Mon, 1 Sep 2014 16:16:06 -0700 Subject: [PATCH 43/64] Update omp.h --- src/utils/omp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/omp.h b/src/utils/omp.h index 615dbdf51..5eb5612e0 100644 --- a/src/utils/omp.h +++ b/src/utils/omp.h @@ -10,7 +10,7 @@ #else #ifndef DISABLE_OPENMP // use pragma message instead of warning -#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single-thread code. Downgrade your compiler to enable OpenMP support and get benefit of multi-threading.") +#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single-thread code. Use OpenMP-enabled compiler to get benefit of multi-threading") #endif inline int omp_get_thread_num() { return 0; } inline int omp_get_num_threads() { return 1; } From a6ce55493d4df2d3fb4b337f6245fffff8d4fc25 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Sep 2014 17:02:42 -0700 Subject: [PATCH 44/64] make R package strict c99 --- R-package/src/Makevars | 4 ++-- R-package/src/Makevars.win | 4 ++-- R-package/src/xgboost_R.cpp | 21 +++++++++++---------- R-package/src/xgboost_assert.c | 25 +++++++++++++++++++++++++ src/utils/utils.h | 12 +++++++++++- 5 files changed, 51 insertions(+), 15 deletions(-) create mode 100644 R-package/src/xgboost_assert.c diff --git a/R-package/src/Makevars b/R-package/src/Makevars index ae1e3def5..44dce490e 100644 --- a/R-package/src/Makevars +++ b/R-package/src/Makevars @@ -1,9 +1,9 @@ # package root PKGROOT=../../ # _*_ mode: Makefile; _*_ -PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -I$(PKGROOT) +PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -I$(PKGROOT) PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) -OBJECTS= xgboost_R.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o +OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index aec0d2c89..289f1a15a 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -1,7 +1,7 @@ # package root PKGROOT=../../ # _*_ mode: Makefile; _*_ -PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -I$(PKGROOT) +PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -I$(PKGROOT) PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) -OBJECTS= xgboost_R.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o +OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index 38cce98b4..2f1a8b772 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -8,21 +8,22 @@ #include "src/utils/utils.h" #include "src/utils/omp.h" #include "src/utils/matrix_csr.h" - using namespace xgboost; + +extern "C" { + void XGBoostAssert_R(int exp, const char *fmt, ...); + void XGBoostCheck_R(int exp, const char *fmt, ...); +} + // implements error handling namespace xgboost { namespace utils { -void HandleAssertError(const char *msg) { - error("%s", msg); -} -void HandleCheckError(const char *msg) { - error("%s", msg); -} -void HandlePrint(const char *msg) { - Rprintf("%s", msg); -} +void (*Printf)(const char *fmt, ...) = Rprintf; +void (*Assert)(int exp, const char *fmt, ...) = XGBoostAssert_R; +void (*Check)(int exp, const char *fmt, ...) = XGBoostCheck_R; +void (*Error)(const char *fmt, ...) = error; } // namespace utils + namespace random { void Seed(unsigned seed) { warning("parameter seed is ignored, please set random seed using set.seed"); diff --git a/R-package/src/xgboost_assert.c b/R-package/src/xgboost_assert.c new file mode 100644 index 000000000..a2539c3b9 --- /dev/null +++ b/R-package/src/xgboost_assert.c @@ -0,0 +1,25 @@ +#include +#include +#include + +// implements error handling +void XGBoostAssert_R(int exp, const char *fmt, ...) { + char buf[1024]; + if (exp == 0) { + va_list args; + va_start(args, fmt); + vsprintf(buf, fmt, args); + va_end(args); + error("AssertError:%s\n", buf); + } +} +void XGBoostCheck_R(int exp, const char *fmt, ...) { + char buf[1024]; + if (exp == 0) { + va_list args; + va_start(args, fmt); + vsprintf(buf, fmt, args); + va_end(args); + error("%s\n", buf); + } +} diff --git a/src/utils/utils.h b/src/utils/utils.h index 7fc966c97..1214be861 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -70,14 +70,23 @@ inline void HandlePrint(const char *msg) { printf("%s", msg); } #else +#ifndef XGBOOST_STRICT_CXX98__ // include declarations, some one must implement this void HandleAssertError(const char *msg); void HandleCheckError(const char *msg); void HandlePrint(const char *msg); #endif +#endif +#ifdef XGBOOST_STRICT_CXX98_ +// these function pointers are to be assigned +extern void (*Printf)(const char *fmt, ...); +extern void (*Assert)(int exp, const char *fmt, ...); +extern void (*Check)(int exp, const char *fmt, ...); +extern void (*Error)(const char *fmt, ...); +#else /*! \brief printf, print message to the console */ -inline void Printf(const char *fmt, ...) { +inline void Printf(const char *fmt, ...) { std::string msg(kPrintBuffer, '\0'); va_list args; va_start(args, fmt); @@ -121,6 +130,7 @@ inline void Error(const char *fmt, ...) { HandleCheckError(msg.c_str()); } } +#endif /*! \brief replace fopen, report error when the file open fails */ inline FILE *FopenCheck(const char *fname, const char *flag) { From 0c5f2b9409cbb2f554aa28cc011ecc637ccf9902 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Sep 2014 17:15:04 -0700 Subject: [PATCH 45/64] gard GNU c --- Makefile | 2 +- src/utils/utils.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 2fd881843..c7524747c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ export CC = gcc -export CXX = g++ +export CXX = clang++ export LDFLAGS= -pthread -lm export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -pedantic -ansi diff --git a/src/utils/utils.h b/src/utils/utils.h index 1214be861..49bd36819 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -10,8 +10,10 @@ #include #include #include -#ifdef _MSC_VER +#if !defined(__GNUC__) #define fopen64 fopen +#endif +#ifdef _MSC_VER // NOTE: sprintf_s is not equivalent to snprintf, // they are equivalent when success, which is sufficient for our case #define snprintf sprintf_s From 0d5debcc25f6c5fc43486c981b1b754c8f2e212f Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Sep 2014 17:23:44 -0700 Subject: [PATCH 46/64] fine fix --- Makefile | 2 +- src/utils/utils.h | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index c7524747c..2fd881843 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ export CC = gcc -export CXX = clang++ +export CXX = g++ export LDFLAGS= -pthread -lm export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -pedantic -ansi diff --git a/src/utils/utils.h b/src/utils/utils.h index 49bd36819..56a957df9 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -7,9 +7,13 @@ */ #define _CRT_SECURE_NO_WARNINGS #include -#include #include #include + +#ifndef XGBOOST_STRICT_CXX98_ +#include +#endif + #if !defined(__GNUC__) #define fopen64 fopen #endif @@ -72,7 +76,7 @@ inline void HandlePrint(const char *msg) { printf("%s", msg); } #else -#ifndef XGBOOST_STRICT_CXX98__ +#ifndef XGBOOST_STRICT_CXX98_ // include declarations, some one must implement this void HandleAssertError(const char *msg); void HandleCheckError(const char *msg); From 9ee9d29f1347dc649a6a62dcc407e8f24869b467 Mon Sep 17 00:00:00 2001 From: hetong Date: Mon, 1 Sep 2014 17:24:13 -0700 Subject: [PATCH 47/64] refine readme.md --- R-package/README.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/R-package/README.md b/R-package/README.md index 07b124b8e..cdec7ac97 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -1,10 +1,20 @@ -This is subfolder for experimental version of R package. +# R package for xgboost. -Installation: +## Installation + +For up-to-date version(which is recommended), please install from github. ```r require(devtools) install_github('xgboost','tqchen',subdir='R-package') ``` +For stable version on CRAN, please run + +```r +install.packages('xgboost') +``` + +## Examples + Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/inst/examples/demo.R) for more details. From 76d5fc7e78550970beab7904fd1cc8874f4a33d8 Mon Sep 17 00:00:00 2001 From: hetong Date: Mon, 1 Sep 2014 17:43:28 -0700 Subject: [PATCH 48/64] attemp to fix line breaking issue of doc --- R-package/R/xgb.dump.R | 9 ++++++--- R-package/R/xgb.train.R | 9 +++++---- R-package/man/xgb.dump.Rd | 8 +++++--- R-package/man/xgb.train.Rd | 9 +++++---- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R index dc3f431a2..09406dc99 100644 --- a/R-package/R/xgb.dump.R +++ b/R-package/R/xgb.dump.R @@ -4,9 +4,12 @@ #' #' @param model the model object. #' @param fname the name of the binary file. -#' @param fmap feature map file representing the type of feature, to make it -#' look nice, run inst/examples/demo.R for result and inst/examples/featmap.txt for example -#' Format: https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model +#' @param fmap feature map file representing the type of feature. +#' Detailed description could be found at +#' \url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}. +#' Run inst/examples/demo.R for the result and inst/examples/featmap.txt +#' for example Format. +#' #' #' @examples #' data(iris) diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index 99a6dcc2d..58a575d03 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -24,10 +24,11 @@ #' watchlist=list(validation1=mat1, validation2=mat2) to watch #' the performance of each round's model on mat1 and mat2 #' -#' @param obj customized objective function. Given prediction and dtrain, -#' return gradient and second order gradient. -#' @param feval custimized evaluation function. Given prediction and dtrain, -#' return a \code{list(metric='metric-name', value='metric-value')}. +#' @param obj customized objective function. Returns gradient and second order +#' gradient with given prediction and dtrain, +#' @param feval custimized evaluation function. Returns +#' \code{list(metric='metric-name', value='metric-value')} with given +#' prediction and dtrain, #' @param ... other parameters to pass to \code{params}. #' #' @details diff --git a/R-package/man/xgb.dump.Rd b/R-package/man/xgb.dump.Rd index 69ec13300..4d6933811 100644 --- a/R-package/man/xgb.dump.Rd +++ b/R-package/man/xgb.dump.Rd @@ -10,9 +10,11 @@ xgb.dump(model, fname, fmap = "") \item{fname}{the name of the binary file.} -\item{fmap}{feature map file representing the type of feature, to make it - look nice, run inst/examples/demo.R for result and inst/examples/featmap.txt for example - Format: https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model} +\item{fmap}{feature map file representing the type of feature. + Detailed description could be found at + \url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}. + Run inst/examples/demo.R for the result and inst/examples/featmap.txt + for example Format.} } \description{ Save a xgboost model to text file. Could be parsed later. diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd index aa2778501..4da3b0013 100644 --- a/R-package/man/xgb.train.Rd +++ b/R-package/man/xgb.train.Rd @@ -32,11 +32,12 @@ xgb.train(params = list(), dtrain, nrounds, watchlist = list(), watchlist=list(validation1=mat1, validation2=mat2) to watch the performance of each round's model on mat1 and mat2} -\item{obj}{customized objective function. Given prediction and dtrain, -return gradient and second order gradient.} +\item{obj}{customized objective function. Returns gradient and second order +gradient with given prediction and dtrain,} -\item{feval}{custimized evaluation function. Given prediction and dtrain, -return a \code{list(metric='metric-name', value='metric-value')}.} +\item{feval}{custimized evaluation function. Returns +\code{list(metric='metric-name', value='metric-value')} with given +prediction and dtrain,} \item{...}{other parameters to pass to \code{params}.} } From 51a9a36b5167cf145d05857b1e66280930045124 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Mon, 1 Sep 2014 18:53:24 -0700 Subject: [PATCH 49/64] Update DESCRIPTION --- R-package/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 9a1bfe8b6..40705e317 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -13,7 +13,7 @@ Description: This package is a R wrapper of xgboost, which is short for eXtreme than existing gradient boosting packages such as gbm. It supports various objective functions, including regression, classification and ranking. The package is made to be extensible, so that user are also allowed to define - there own objectives easily. + their own objectives easily. License: Apache License (== 2.0) | file LICENSE URL: https://github.com/tqchen/xgboost BugReports: https://github.com/tqchen/xgboost/issues From 85e3fbb06a89469d86f543cd5462a734371550e8 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Mon, 1 Sep 2014 18:54:45 -0700 Subject: [PATCH 50/64] Update README.md --- R-package/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/README.md b/R-package/README.md index cdec7ac97..8d3068daf 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -2,7 +2,7 @@ ## Installation -For up-to-date version(which is recommended), please install from github. +For up-to-date version(which is recommended), please install from github, windows user will require installtion of [RTools](http://cran.r-project.org/bin/windows/Rtools/). ```r require(devtools) From 1841d730af82d6c6df9f7847cf0ba1313d04adee Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Mon, 1 Sep 2014 18:55:20 -0700 Subject: [PATCH 51/64] Update README.md --- R-package/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/README.md b/R-package/README.md index 8d3068daf..bb92870b6 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -2,7 +2,7 @@ ## Installation -For up-to-date version(which is recommended), please install from github, windows user will require installtion of [RTools](http://cran.r-project.org/bin/windows/Rtools/). +For up-to-date version(which is recommended), please install from github. Windows user will need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first. ```r require(devtools) From 48411193aedfb97025441e3f72f9ed48e090adbc Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Mon, 1 Sep 2014 18:58:00 -0700 Subject: [PATCH 52/64] Update README.md --- R-package/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R-package/README.md b/R-package/README.md index bb92870b6..070f7c834 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -17,4 +17,5 @@ install.packages('xgboost') ## Examples -Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/inst/examples/demo.R) for more details. +* Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/inst/examples/demo.R) for walk throughe example. +* See also the example script for [Higgs Challenge](https://github.com/tqchen/xgboost/tree/master/demo/kaggle-higgs) including [speedtest](https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/speedtest.R) From b60b23ed1c2196d1755860ce219ecc23eda97118 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Mon, 1 Sep 2014 18:58:56 -0700 Subject: [PATCH 53/64] Update README.md --- R-package/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/README.md b/R-package/README.md index 070f7c834..60df71154 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -18,4 +18,4 @@ install.packages('xgboost') ## Examples * Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/inst/examples/demo.R) for walk throughe example. -* See also the example script for [Higgs Challenge](https://github.com/tqchen/xgboost/tree/master/demo/kaggle-higgs) including [speedtest](https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/speedtest.R) +* See also the [example scripts](https://github.com/tqchen/xgboost/tree/master/demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/speedtest.R) on this dataset. From 50f1b5d90385904c7649e34f10eb61c0e0d43ded Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Mon, 1 Sep 2014 19:00:37 -0700 Subject: [PATCH 54/64] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f7947b906..38291b09d 100644 --- a/README.md +++ b/README.md @@ -41,5 +41,5 @@ Version XGBoost in Graphlab Create ====== -* XGBoost is adopted as part of boosted tree toolkit in Graphlab Create (GLC). Graphlab Create is a powerful python toolkit that allows you to data manipulation, graph processing, hyper-parameter search, and visualization of big data in one framework. Try the Graphlab Create in http://graphlab.com/products/create/quick-start-guide.html +* XGBoost is adopted as part of boosted tree toolkit in Graphlab Create (GLC). Graphlab Create is a powerful python toolkit that allows you to data manipulation, graph processing, hyper-parameter search, and visualization of TeraBytes scale data in one framework. Try the Graphlab Create in http://graphlab.com/products/create/quick-start-guide.html * Nice blogpost by Jay Gu using GLC boosted tree to solve kaggle bike sharing challenge: http://blog.graphlab.com/using-gradient-boosted-trees-to-predict-bike-sharing-demand From 42fb7b4d9d218d00ad74f6b7ad79875a837e874f Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Sep 2014 22:06:10 -0700 Subject: [PATCH 55/64] some fix to make it more c++ --- Makefile | 2 +- R-package/src/xgboost_R.cpp | 11 +++++++---- src/learner/evaluation-inl.hpp | 2 +- src/learner/helper_utils.h | 1 + src/learner/objective-inl.hpp | 16 ++++++++-------- src/utils/random.h | 2 +- src/utils/utils.h | 13 +++++++------ 7 files changed, 26 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index 2fd881843..3705bc4a5 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ export CC = gcc export CXX = g++ export LDFLAGS= -pthread -lm -export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -pedantic -ansi +export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -pedantic -std=c++98 -Wpedantic -pedantic-errors ifeq ($(no_omp),1) CFLAGS += -DDISABLE_OPENMP diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index 2f1a8b772..ac5934d17 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -8,6 +8,7 @@ #include "src/utils/utils.h" #include "src/utils/omp.h" #include "src/utils/matrix_csr.h" +using namespace std; using namespace xgboost; extern "C" { @@ -18,10 +19,12 @@ extern "C" { // implements error handling namespace xgboost { namespace utils { -void (*Printf)(const char *fmt, ...) = Rprintf; -void (*Assert)(int exp, const char *fmt, ...) = XGBoostAssert_R; -void (*Check)(int exp, const char *fmt, ...) = XGBoostCheck_R; -void (*Error)(const char *fmt, ...) = error; +extern "C" { + void (*Printf)(const char *fmt, ...) = Rprintf; + void (*Assert)(int exp, const char *fmt, ...) = XGBoostAssert_R; + void (*Check)(int exp, const char *fmt, ...) = XGBoostCheck_R; + void (*Error)(const char *fmt, ...) = error; +} } // namespace utils namespace random { diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp index 4f34a7ac4..52877e17b 100644 --- a/src/learner/evaluation-inl.hpp +++ b/src/learner/evaluation-inl.hpp @@ -8,8 +8,8 @@ #include #include #include -#include #include +#include #include #include "./evaluation.h" #include "./helper_utils.h" diff --git a/src/learner/helper_utils.h b/src/learner/helper_utils.h index e2f8a3574..aa1e66bbc 100644 --- a/src/learner/helper_utils.h +++ b/src/learner/helper_utils.h @@ -7,6 +7,7 @@ */ #include #include +#include #include namespace xgboost { namespace learner { diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp index 9e338a6b2..576549eac 100644 --- a/src/learner/objective-inl.hpp +++ b/src/learner/objective-inl.hpp @@ -6,9 +6,9 @@ * \author Tianqi Chen, Kailong Chen */ #include -#include #include #include +#include #include #include "../data.h" #include "./objective.h" @@ -37,7 +37,7 @@ struct LossType { case kLogisticRaw: case kLinearSquare: return x; case kLogisticClassify: - case kLogisticNeglik: return 1.0f / (1.0f + expf(-x)); + case kLogisticNeglik: return 1.0f / (1.0f + std::exp(-x)); default: utils::Error("unknown loss_type"); return 0.0f; } } @@ -50,7 +50,7 @@ struct LossType { inline float FirstOrderGradient(float predt, float label) const { switch (loss_type) { case kLinearSquare: return predt - label; - case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt)); + case kLogisticRaw: predt = 1.0f / (1.0f + std::exp(-predt)); case kLogisticClassify: case kLogisticNeglik: return predt - label; default: utils::Error("unknown loss_type"); return 0.0f; @@ -65,7 +65,7 @@ struct LossType { inline float SecondOrderGradient(float predt, float label) const { switch (loss_type) { case kLinearSquare: return 1.0f; - case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt)); + case kLogisticRaw: predt = 1.0f / (1.0f + std::exp(-predt)); case kLogisticClassify: case kLogisticNeglik: return predt * (1 - predt); default: utils::Error("unknown loss_type"); return 0.0f; @@ -80,7 +80,7 @@ struct LossType { loss_type == kLogisticNeglik ) { utils::Check(base_score > 0.0f && base_score < 1.0f, "base_score must be in (0,1) for logistic loss"); - base_score = -logf(1.0f / base_score - 1.0f); + base_score = -std::log(1.0f / base_score - 1.0f); } return base_score; } @@ -419,8 +419,8 @@ class LambdaRankObjNDCG : public LambdaRankObj { for (size_t i = 0; i < pairs.size(); ++i) { unsigned pos_idx = pairs[i].pos_index; unsigned neg_idx = pairs[i].neg_index; - float pos_loginv = 1.0f / logf(pos_idx + 2.0f); - float neg_loginv = 1.0f / logf(neg_idx + 2.0f); + float pos_loginv = 1.0f / std::log(pos_idx + 2.0f); + float neg_loginv = 1.0f / std::log(neg_idx + 2.0f); int pos_label = static_cast(sorted_list[pos_idx].label); int neg_label = static_cast(sorted_list[neg_idx].label); float original = @@ -438,7 +438,7 @@ class LambdaRankObjNDCG : public LambdaRankObj { for (size_t i = 0; i < labels.size(); ++i) { const unsigned rel = static_cast(labels[i]); if (rel != 0) { - sumdcg += ((1 << rel) - 1) / logf(static_cast(i + 2)); + sumdcg += ((1 << rel) - 1) / std::log(static_cast(i + 2)); } } return static_cast(sumdcg); diff --git a/src/utils/random.h b/src/utils/random.h index 590baca2a..57e1f243d 100644 --- a/src/utils/random.h +++ b/src/utils/random.h @@ -91,7 +91,7 @@ struct Random{ // use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe // For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general // todo, replace with another PRNG -#if defined(_MSC_VER)||defined(_WIN32) +#if defined(_MSC_VER)||defined(_WIN32)||defined(XGBOOST_STRICT_CXX98_) return Uniform(); #else return static_cast(rand_r(&rseed)) / (static_cast(RAND_MAX) + 1.0); diff --git a/src/utils/utils.h b/src/utils/utils.h index 56a957df9..d2de82fbe 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -15,7 +15,7 @@ #endif #if !defined(__GNUC__) -#define fopen64 fopen +#define fopen64 std::fopen #endif #ifdef _MSC_VER // NOTE: sprintf_s is not equivalent to snprintf, @@ -31,7 +31,7 @@ #ifdef __APPLE__ #define off64_t off_t -#define fopen64 fopen +#define fopen64 std::fopen #endif extern "C" { @@ -52,6 +52,7 @@ typedef long int64_t; namespace xgboost { /*! \brief namespace for helper utils of the project */ namespace utils { + /*! \brief error message buffer length */ const int kPrintBuffer = 1 << 12; @@ -86,10 +87,10 @@ void HandlePrint(const char *msg); #ifdef XGBOOST_STRICT_CXX98_ // these function pointers are to be assigned -extern void (*Printf)(const char *fmt, ...); -extern void (*Assert)(int exp, const char *fmt, ...); -extern void (*Check)(int exp, const char *fmt, ...); -extern void (*Error)(const char *fmt, ...); +extern "C" void (*Printf)(const char *fmt, ...); +extern "C" void (*Assert)(int exp, const char *fmt, ...); +extern "C" void (*Check)(int exp, const char *fmt, ...); +extern "C" void (*Error)(const char *fmt, ...); #else /*! \brief printf, print message to the console */ inline void Printf(const char *fmt, ...) { From 9100ffc12a9c980e382f59120dd4142d1619386c Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Sep 2014 22:32:03 -0700 Subject: [PATCH 56/64] chg version --- Makefile | 2 +- src/gbm/gbm.cpp | 1 + src/io/io.cpp | 1 + src/tree/updater.cpp | 1 + src/utils/utils.h | 1 - src/xgboost_main.cpp | 6 +++++- wrapper/xgboost_wrapper.cpp | 3 +++ 7 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 3705bc4a5..28a289ac6 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ export CC = gcc export CXX = g++ export LDFLAGS= -pthread -lm -export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -pedantic -std=c++98 -Wpedantic -pedantic-errors +export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -pedantic ifeq ($(no_omp),1) CFLAGS += -DDISABLE_OPENMP diff --git a/src/gbm/gbm.cpp b/src/gbm/gbm.cpp index ae0e4af94..4713838e9 100644 --- a/src/gbm/gbm.cpp +++ b/src/gbm/gbm.cpp @@ -1,6 +1,7 @@ #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #include +using namespace std; #include "./gbm.h" #include "./gbtree-inl.hpp" #include "./gblinear-inl.hpp" diff --git a/src/io/io.cpp b/src/io/io.cpp index d251d7a96..dead398f7 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1,6 +1,7 @@ #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #include +using namespace std; #include "./io.h" #include "../utils/io.h" #include "../utils/utils.h" diff --git a/src/tree/updater.cpp b/src/tree/updater.cpp index 25bee7922..09b63eb49 100644 --- a/src/tree/updater.cpp +++ b/src/tree/updater.cpp @@ -1,6 +1,7 @@ #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #include +using namespace std; #include "./updater.h" #include "./updater_prune-inl.hpp" #include "./updater_refresh-inl.hpp" diff --git a/src/utils/utils.h b/src/utils/utils.h index d2de82fbe..66e29185a 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -84,7 +84,6 @@ void HandleCheckError(const char *msg); void HandlePrint(const char *msg); #endif #endif - #ifdef XGBOOST_STRICT_CXX98_ // these function pointers are to be assigned extern "C" void (*Printf)(const char *fmt, ...); diff --git a/src/xgboost_main.cpp b/src/xgboost_main.cpp index 13acacda2..75544dd0e 100644 --- a/src/xgboost_main.cpp +++ b/src/xgboost_main.cpp @@ -50,6 +50,7 @@ class BoostLearnTask{ if (!strcmp("use_buffer", name)) use_buffer = atoi(val); if (!strcmp("num_round", name)) num_round = atoi(val); if (!strcmp("pred_margin", name)) pred_margin = atoi(val); + if (!strcmp("ntree_limit", name)) ntree_limit = atoi(val); if (!strcmp("save_period", name)) save_period = atoi(val); if (!strcmp("eval_train", name)) eval_train = atoi(val); if (!strcmp("task", name)) task = val; @@ -79,6 +80,7 @@ class BoostLearnTask{ save_period = 0; eval_train = 0; pred_margin = 0; + ntree_limit = 0; dump_model_stats = 0; task = "train"; model_in = "NULL"; @@ -186,7 +188,7 @@ class BoostLearnTask{ inline void TaskPred(void) { std::vector preds; if (!silent) printf("start prediction...\n"); - learner.Predict(*data, pred_margin != 0, &preds); + learner.Predict(*data, pred_margin != 0, &preds, ntree_limit); if (!silent) printf("writing prediction to %s\n", name_pred.c_str()); FILE *fo = utils::FopenCheck(name_pred.c_str(), "w"); for (size_t i = 0; i < preds.size(); i++) { @@ -217,6 +219,8 @@ class BoostLearnTask{ std::string task; /*! \brief name of predict file */ std::string name_pred; + /*!\brief limit number of trees in prediction */ + int ntree_limit; /*!\brief whether to directly output margin value */ int pred_margin; /*! \brief whether dump statistics along with model */ diff --git a/wrapper/xgboost_wrapper.cpp b/wrapper/xgboost_wrapper.cpp index 3f45c1438..2c48bd8f1 100644 --- a/wrapper/xgboost_wrapper.cpp +++ b/wrapper/xgboost_wrapper.cpp @@ -6,6 +6,9 @@ #include #include #include +// include all std functions +using namespace std; + #include "./xgboost_wrapper.h" #include "../src/data.h" #include "../src/learner/learner-inl.hpp" From 29a7027dbab2f98665355643738262df1aa68758 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Sep 2014 22:50:48 -0700 Subject: [PATCH 57/64] fix the zero length vector --- R-package/R/utils.R | 8 +++++--- R-package/src/xgboost_R.cpp | 7 ++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/R-package/R/utils.R b/R-package/R/utils.R index 015026fe6..da602478a 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -121,8 +121,8 @@ xgb.iter.eval <- function(booster, watchlist, iter) { stop("xgb.eval: watch list can only contain xgb.DMatrix") } } - evnames <- list() if (length(watchlist) != 0) { + evnames <- list() for (i in 1:length(watchlist)) { w <- watchlist[i] if (length(names(w)) == 0) { @@ -130,8 +130,10 @@ xgb.iter.eval <- function(booster, watchlist, iter) { } evnames <- append(evnames, names(w)) } + msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, + evnames, PACKAGE = "xgboost") + } else { + msg <- "" } - msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, - evnames, PACKAGE = "xgboost") return(msg) } diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index ac5934d17..f255d233d 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -197,7 +197,12 @@ extern "C" { for (int i = 0; i < len; ++i){ dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); } - void *handle = XGBoosterCreate(&dvec[0], dvec.size()); + void *handle; + if (dvec.size() == 0) { + handle = XGBoosterCreate(NULL, 0); + } else { + handle = XGBoosterCreate(&dvec[0], dvec.size()); + } SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE); UNPROTECT(1); From bb5c151f574c6e2a9281af5af5bfb44da7bd8825 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Sep 2014 23:12:50 -0700 Subject: [PATCH 58/64] move sprintf into std --- R-package/src/xgboost_R.cpp | 2 ++ R-package/src/xgboost_assert.c | 8 ++++++++ src/learner/evaluation.h | 2 +- src/learner/learner-inl.hpp | 6 +++--- src/utils/utils.h | 9 +++++++++ 5 files changed, 23 insertions(+), 4 deletions(-) diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index f255d233d..215328eda 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -14,6 +14,7 @@ using namespace xgboost; extern "C" { void XGBoostAssert_R(int exp, const char *fmt, ...); void XGBoostCheck_R(int exp, const char *fmt, ...); + int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...); } // implements error handling @@ -21,6 +22,7 @@ namespace xgboost { namespace utils { extern "C" { void (*Printf)(const char *fmt, ...) = Rprintf; + int (*SPrintf)(char *buf, size_t size, const char *fmt, ...) = XGBoostSPrintf_R; void (*Assert)(int exp, const char *fmt, ...) = XGBoostAssert_R; void (*Check)(int exp, const char *fmt, ...) = XGBoostCheck_R; void (*Error)(const char *fmt, ...) = error; diff --git a/R-package/src/xgboost_assert.c b/R-package/src/xgboost_assert.c index a2539c3b9..20b789492 100644 --- a/R-package/src/xgboost_assert.c +++ b/R-package/src/xgboost_assert.c @@ -23,3 +23,11 @@ void XGBoostCheck_R(int exp, const char *fmt, ...) { error("%s\n", buf); } } +int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...) { + int ret; + va_list args; + va_start(args, fmt); + ret = vsnprintf(buf, size, fmt, args); + va_end(args); + return ret; +} diff --git a/src/learner/evaluation.h b/src/learner/evaluation.h index 90f4a5839..ec37e1f4a 100644 --- a/src/learner/evaluation.h +++ b/src/learner/evaluation.h @@ -73,7 +73,7 @@ class EvalSet{ for (size_t i = 0; i < evals_.size(); ++i) { float res = evals_[i]->Eval(preds, info); char tmp[1024]; - snprintf(tmp, sizeof(tmp), "\t%s-%s:%f", evname, evals_[i]->Name(), res); + utils::SPrintf(tmp, sizeof(tmp), "\t%s-%s:%f", evname, evals_[i]->Name(), res); result += tmp; } return result; diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp index 60e1fccf1..5d7c9d06a 100644 --- a/src/learner/learner-inl.hpp +++ b/src/learner/learner-inl.hpp @@ -63,10 +63,10 @@ class BoostLearner { } char str_temp[25]; if (num_feature > mparam.num_feature) { - snprintf(str_temp, sizeof(str_temp), "%u", num_feature); + utils::SPrintf(str_temp, sizeof(str_temp), "%u", num_feature); this->SetParam("bst:num_feature", str_temp); } - snprintf(str_temp, sizeof(str_temp), "%lu", + utils::SPrintf(str_temp, sizeof(str_temp), "%lu", static_cast(buffer_size)); this->SetParam("num_pbuffer", str_temp); if (!silent) { @@ -183,7 +183,7 @@ class BoostLearner { const std::vector &evname) { std::string res; char tmp[256]; - snprintf(tmp, sizeof(tmp), "[%d]", iter); + utils::SPrintf(tmp, sizeof(tmp), "[%d]", iter); res = tmp; for (size_t i = 0; i < evals.size(); ++i) { this->PredictRaw(*evals[i], &preds_); diff --git a/src/utils/utils.h b/src/utils/utils.h index 66e29185a..3798a579c 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -87,6 +87,7 @@ void HandlePrint(const char *msg); #ifdef XGBOOST_STRICT_CXX98_ // these function pointers are to be assigned extern "C" void (*Printf)(const char *fmt, ...); +extern "C" int (*SPrintf)(char *buf, size_t size, const char *fmt, ...); extern "C" void (*Assert)(int exp, const char *fmt, ...); extern "C" void (*Check)(int exp, const char *fmt, ...); extern "C" void (*Error)(const char *fmt, ...); @@ -100,6 +101,14 @@ inline void Printf(const char *fmt, ...) { va_end(args); HandlePrint(msg.c_str()); } +/*! \brief portable version of snprintf */ +inline int SPrintf(char *buf, size_t size, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + int ret = vsnprintf(buf, size, fmt, args); + va_end(args); + return ret; +} /*! \brief assert an condition is true, use this to handle debug information */ inline void Assert(bool exp, const char *fmt, ...) { From c9f2f47acbf66713a2d50b0022d4c942b9819952 Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 2 Sep 2014 00:12:15 -0700 Subject: [PATCH 59/64] fix som solaris --- src/io/simple_dmatrix-inl.hpp | 10 +++++----- src/learner/dmatrix.h | 4 ++-- src/tree/param.h | 18 +++++++++--------- src/tree/updater_refresh-inl.hpp | 1 - src/utils/io.h | 3 +-- 5 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp index 36c6c8fd7..ca2eeadac 100644 --- a/src/io/simple_dmatrix-inl.hpp +++ b/src/io/simple_dmatrix-inl.hpp @@ -147,9 +147,9 @@ class DMatrixSimple : public DataMatrix { * \param fname file name, used to print message */ inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) { - int magic; - utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format"); - utils::Check(magic == kMagic, "invalid format,magic number mismatch"); + int tmagic; + utils::Check(fs.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format"); + utils::Check(tmagic == kMagic, "invalid format,magic number mismatch"); info.LoadBinary(fs); FMatrixS::LoadBinary(fs, &row_ptr_, &row_data_); @@ -177,8 +177,8 @@ class DMatrixSimple : public DataMatrix { */ inline void SaveBinary(const char* fname, bool silent = false) const { utils::FileStream fs(utils::FopenCheck(fname, "wb")); - int magic = kMagic; - fs.Write(&magic, sizeof(magic)); + int tmagic = kMagic; + fs.Write(&tmagic, sizeof(tmagic)); info.SaveBinary(fs); FMatrixS::SaveBinary(fs, row_ptr_, row_data_); diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h index 791b3467d..bef84900a 100644 --- a/src/learner/dmatrix.h +++ b/src/learner/dmatrix.h @@ -125,12 +125,12 @@ struct MetaInfo { } // try to load weight information from file, if exists inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) { - std::vector &weights = this->GetFloatInfo(field); + std::vector &data = this->GetFloatInfo(field); FILE *fi = fopen64(fname, "r"); if (fi == NULL) return false; float wt; while (fscanf(fi, "%f", &wt) == 1) { - weights.push_back(wt); + data.push_back(wt); } if (!silent) { utils::Printf("loading %s from %s\n", field, fname); diff --git a/src/tree/param.h b/src/tree/param.h index 52c273749..602644e85 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -295,14 +295,14 @@ struct SplitEntry{ * \brief decides whether a we can replace current entry with the statistics given * This function gives better priority to lower index when loss_chg equals * not the best way, but helps to give consistent result during multi-thread execution - * \param loss_chg the loss reduction get through the split + * \param new_loss_chg the loss reduction get through the split * \param split_index the feature index where the split is on */ - inline bool NeedReplace(bst_float loss_chg, unsigned split_index) const { + inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const { if (this->split_index() <= split_index) { - return loss_chg > this->loss_chg; + return new_loss_chg > this->loss_chg; } else { - return !(this->loss_chg > loss_chg); + return !(this->loss_chg > new_loss_chg); } } /*! @@ -322,19 +322,19 @@ struct SplitEntry{ } /*! * \brief update the split entry, replace it if e is better - * \param loss_chg loss reduction of new candidate + * \param new_loss_chg loss reduction of new candidate * \param split_index feature index to split on * \param split_value the split point * \param default_left whether the missing value goes to left * \return whether the proposed split is better and can replace current split */ - inline bool Update(bst_float loss_chg, unsigned split_index, - float split_value, bool default_left) { + inline bool Update(bst_float new_loss_chg, unsigned split_index, + float new_split_value, bool default_left) { if (this->NeedReplace(loss_chg, split_index)) { - this->loss_chg = loss_chg; + this->loss_chg = new_loss_chg; if (default_left) split_index |= (1U << 31); this->sindex = split_index; - this->split_value = split_value; + this->split_value = new_split_value; return true; } else { return false; diff --git a/src/tree/updater_refresh-inl.hpp b/src/tree/updater_refresh-inl.hpp index d184dcb39..2c7b19bea 100644 --- a/src/tree/updater_refresh-inl.hpp +++ b/src/tree/updater_refresh-inl.hpp @@ -29,7 +29,6 @@ class TreeRefresher: public IUpdater { const std::vector &trees) { if (trees.size() == 0) return; // number of threads - int nthread; // thread temporal space std::vector< std::vector > stemp; std::vector fvec_temp; diff --git a/src/utils/io.h b/src/utils/io.h index 4a80e9a58..a15e2f0ce 100644 --- a/src/utils/io.h +++ b/src/utils/io.h @@ -93,8 +93,7 @@ class FileStream : public IStream { private: FILE *fp; public: - explicit FileStream(FILE *fp) { - this->fp = fp; + explicit FileStream(FILE *fp) : fp(fp) { } virtual size_t Read(void *ptr, size_t size) { return fread(ptr, size, 1, fp); From 1d5db6877d456ccb6950e241ce0727e57c3b72d2 Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 2 Sep 2014 08:55:26 -0700 Subject: [PATCH 60/64] fix param.h --- src/tree/param.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/tree/param.h b/src/tree/param.h index 602644e85..52c273749 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -295,14 +295,14 @@ struct SplitEntry{ * \brief decides whether a we can replace current entry with the statistics given * This function gives better priority to lower index when loss_chg equals * not the best way, but helps to give consistent result during multi-thread execution - * \param new_loss_chg the loss reduction get through the split + * \param loss_chg the loss reduction get through the split * \param split_index the feature index where the split is on */ - inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const { + inline bool NeedReplace(bst_float loss_chg, unsigned split_index) const { if (this->split_index() <= split_index) { - return new_loss_chg > this->loss_chg; + return loss_chg > this->loss_chg; } else { - return !(this->loss_chg > new_loss_chg); + return !(this->loss_chg > loss_chg); } } /*! @@ -322,19 +322,19 @@ struct SplitEntry{ } /*! * \brief update the split entry, replace it if e is better - * \param new_loss_chg loss reduction of new candidate + * \param loss_chg loss reduction of new candidate * \param split_index feature index to split on * \param split_value the split point * \param default_left whether the missing value goes to left * \return whether the proposed split is better and can replace current split */ - inline bool Update(bst_float new_loss_chg, unsigned split_index, - float new_split_value, bool default_left) { + inline bool Update(bst_float loss_chg, unsigned split_index, + float split_value, bool default_left) { if (this->NeedReplace(loss_chg, split_index)) { - this->loss_chg = new_loss_chg; + this->loss_chg = loss_chg; if (default_left) split_index |= (1U << 31); this->sindex = split_index; - this->split_value = new_split_value; + this->split_value = split_value; return true; } else { return false; From 28128a1b6ee4571387bbd55bc943b6b7ebd78f51 Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 2 Sep 2014 09:02:27 -0700 Subject: [PATCH 61/64] fix new warning --- src/tree/param.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/tree/param.h b/src/tree/param.h index 52c273749..79bc162c3 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -298,11 +298,11 @@ struct SplitEntry{ * \param loss_chg the loss reduction get through the split * \param split_index the feature index where the split is on */ - inline bool NeedReplace(bst_float loss_chg, unsigned split_index) const { + inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const { if (this->split_index() <= split_index) { - return loss_chg > this->loss_chg; + return new_loss_chg > this->loss_chg; } else { - return !(this->loss_chg > loss_chg); + return !(this->loss_chg > new_loss_chg); } } /*! @@ -328,13 +328,13 @@ struct SplitEntry{ * \param default_left whether the missing value goes to left * \return whether the proposed split is better and can replace current split */ - inline bool Update(bst_float loss_chg, unsigned split_index, - float split_value, bool default_left) { - if (this->NeedReplace(loss_chg, split_index)) { - this->loss_chg = loss_chg; + inline bool Update(bst_float new_loss_chg, unsigned split_index, + float new_split_value, bool default_left) { + if (this->NeedReplace(new_loss_chg, split_index)) { + this->loss_chg = new_loss_chg; if (default_left) split_index |= (1U << 31); this->sindex = split_index; - this->split_value = split_value; + this->split_value = new_split_value; return true; } else { return false; From 70219ee1aea2115ccfeca957c9a68ab99a60e559 Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 2 Sep 2014 09:06:24 -0700 Subject: [PATCH 62/64] move nthread to local var --- src/tree/updater_refresh-inl.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/tree/updater_refresh-inl.hpp b/src/tree/updater_refresh-inl.hpp index 2c7b19bea..a37630333 100644 --- a/src/tree/updater_refresh-inl.hpp +++ b/src/tree/updater_refresh-inl.hpp @@ -26,13 +26,14 @@ class TreeRefresher: public IUpdater { virtual void Update(const std::vector &gpair, IFMatrix *p_fmat, const BoosterInfo &info, - const std::vector &trees) { + const std::vector &trees) { if (trees.size() == 0) return; // number of threads // thread temporal space std::vector< std::vector > stemp; std::vector fvec_temp; // setup temp space for each thread + int nthread; #pragma omp parallel { nthread = omp_get_num_threads(); @@ -126,8 +127,6 @@ class TreeRefresher: public IUpdater { this->Refresh(gstats, tree[nid].cright(), p_tree); } } - // number of thread in the data - int nthread; // training parameter TrainParam param; }; From 27cabd131efa0af5ee782bd10c410c6b65cfa08f Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 2 Sep 2014 11:01:38 -0700 Subject: [PATCH 63/64] add beginPtr, to make vector address taking safe --- R-package/src/xgboost_R.cpp | 23 ++++++++++------------- src/utils/utils.h | 11 ++++++++++- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index 215328eda..cb2db8ddb 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -10,6 +10,7 @@ #include "src/utils/matrix_csr.h" using namespace std; using namespace xgboost; +using namespace xgboost::utils; extern "C" { void XGBoostAssert_R(int exp, const char *fmt, ...); @@ -80,7 +81,7 @@ extern "C" { data[i * ncol +j] = din[i + nrow * j]; } } - void *handle = XGDMatrixCreateFromMat(&data[0], nrow, ncol, asReal(missing)); + void *handle = XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing)); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); UNPROTECT(1); @@ -120,7 +121,8 @@ extern "C" { col_index[i] = csr_data[i].first; row_data[i] = csr_data[i].second; } - void *handle = XGDMatrixCreateFromCSR(&row_ptr[0], &col_index[0], &row_data[0], row_ptr.size(), ndata ); + void *handle = XGDMatrixCreateFromCSR(BeginPtr(row_ptr), BeginPtr(col_index), + BeginPtr(row_data), row_ptr.size(), ndata ); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); UNPROTECT(1); @@ -134,7 +136,7 @@ extern "C" { for (int i = 0; i < len; ++i) { idxvec[i] = INTEGER(idxset)[i] - 1; } - void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), &idxvec[0], len); + void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), BeginPtr(idxvec), len); SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); UNPROTECT(1); @@ -157,7 +159,7 @@ extern "C" { for (int i = 0; i < len; ++i) { vec[i] = static_cast(INTEGER(array)[i]); } - XGDMatrixSetGroup(R_ExternalPtrAddr(handle), &vec[0], len); + XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len); _WrapperEnd(); return; } @@ -169,7 +171,7 @@ extern "C" { } XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), CHAR(asChar(field)), - &vec[0], len); + BeginPtr(vec), len); } _WrapperEnd(); } @@ -199,12 +201,7 @@ extern "C" { for (int i = 0; i < len; ++i){ dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); } - void *handle; - if (dvec.size() == 0) { - handle = XGBoosterCreate(NULL, 0); - } else { - handle = XGBoosterCreate(&dvec[0], dvec.size()); - } + void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size()); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE); UNPROTECT(1); @@ -237,7 +234,7 @@ extern "C" { } XGBoosterBoostOneIter(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(dtrain), - &tgrad[0], &thess[0], len); + BeginPtr(tgrad), BeginPtr(thess), len); _WrapperEnd(); } SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) { @@ -256,7 +253,7 @@ extern "C" { } return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), asInteger(iter), - &vec_dmats[0], &vec_sptr[0], len)); + BeginPtr(vec_dmats), BeginPtr(vec_sptr), len)); _WrapperEnd(); } SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit) { diff --git a/src/utils/utils.h b/src/utils/utils.h index 3798a579c..e16d03778 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -9,6 +9,7 @@ #include #include #include +#include #ifndef XGBOOST_STRICT_CXX98_ #include @@ -153,7 +154,15 @@ inline FILE *FopenCheck(const char *fname, const char *flag) { Check(fp != NULL, "can not open file \"%s\"\n", fname); return fp; } - +/*! \brief get the beginning address of a vector */ +template +inline T *BeginPtr(std::vector &vec) { + if (vec.size() == 0) { + return NULL; + } else { + return &vec[0]; + } +} } // namespace utils } // namespace xgboost #endif // XGBOOST_UTILS_UTILS_H_ From c75275a861654b60bdcda038003cb6a0fc802ca3 Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 2 Sep 2014 11:14:57 -0700 Subject: [PATCH 64/64] more movement to beginptr --- R-package/src/xgboost_R.cpp | 1 - src/io/simple_dmatrix-inl.hpp | 10 ++++++---- src/io/simple_fmatrix-inl.hpp | 12 ++++++------ src/utils/utils.h | 12 +++++++++++- wrapper/xgboost_wrapper.cpp | 13 +++++++------ 5 files changed, 30 insertions(+), 18 deletions(-) diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index cb2db8ddb..a7753dfa5 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -10,7 +10,6 @@ #include "src/utils/matrix_csr.h" using namespace std; using namespace xgboost; -using namespace xgboost::utils; extern "C" { void XGBoostAssert_R(int exp, const char *fmt, ...); diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp index ca2eeadac..0883955fe 100644 --- a/src/io/simple_dmatrix-inl.hpp +++ b/src/io/simple_dmatrix-inl.hpp @@ -54,8 +54,10 @@ class DMatrixSimple : public DataMatrix { for (size_t i = 0; i < batch.size; ++i) { RowBatch::Inst inst = batch[i]; row_data_.resize(row_data_.size() + inst.length); - memcpy(&row_data_[row_ptr_.back()], inst.data, - sizeof(RowBatch::Entry) * inst.length); + if (inst.length != 0) { + memcpy(&row_data_[row_ptr_.back()], inst.data, + sizeof(RowBatch::Entry) * inst.length); + } row_ptr_.push_back(row_ptr_.back() + inst.length); } } @@ -244,8 +246,8 @@ class DMatrixSimple : public DataMatrix { at_first_ = false; batch_.size = parent_->row_ptr_.size() - 1; batch_.base_rowid = 0; - batch_.ind_ptr = &parent_->row_ptr_[0]; - batch_.data_ptr = &parent_->row_data_[0]; + batch_.ind_ptr = BeginPtr(parent_->row_ptr_); + batch_.data_ptr = BeginPtr(parent_->row_data_); return true; } virtual const RowBatch &Value(void) const { diff --git a/src/io/simple_fmatrix-inl.hpp b/src/io/simple_fmatrix-inl.hpp index 86763a105..7c8631a29 100644 --- a/src/io/simple_fmatrix-inl.hpp +++ b/src/io/simple_fmatrix-inl.hpp @@ -110,9 +110,9 @@ class FMatrixS : public IFMatrix{ const std::vector &data) { size_t nrow = ptr.size() - 1; fo.Write(&nrow, sizeof(size_t)); - fo.Write(&ptr[0], ptr.size() * sizeof(size_t)); + fo.Write(BeginPtr(ptr), ptr.size() * sizeof(size_t)); if (data.size() != 0) { - fo.Write(&data[0], data.size() * sizeof(RowBatch::Entry)); + fo.Write(BeginPtr(data), data.size() * sizeof(RowBatch::Entry)); } } /*! @@ -127,11 +127,11 @@ class FMatrixS : public IFMatrix{ size_t nrow; utils::Check(fi.Read(&nrow, sizeof(size_t)) != 0, "invalid input file format"); out_ptr->resize(nrow + 1); - utils::Check(fi.Read(&(*out_ptr)[0], out_ptr->size() * sizeof(size_t)) != 0, + utils::Check(fi.Read(BeginPtr(*out_ptr), out_ptr->size() * sizeof(size_t)) != 0, "invalid input file format"); out_data->resize(out_ptr->back()); if (out_data->size() != 0) { - utils::Assert(fi.Read(&(*out_data)[0], out_data->size() * sizeof(RowBatch::Entry)) != 0, + utils::Assert(fi.Read(BeginPtr(*out_data), out_data->size() * sizeof(RowBatch::Entry)) != 0, "invalid input file format"); } } @@ -213,8 +213,8 @@ class FMatrixS : public IFMatrix{ col_data_[i] = SparseBatch::Inst(&data[0] + ptr[ridx], static_cast(ptr[ridx+1] - ptr[ridx])); } - batch_.col_index = &col_index_[0]; - batch_.col_data = &col_data_[0]; + batch_.col_index = BeginPtr(col_index_); + batch_.col_data = BeginPtr(col_data_); this->BeforeFirst(); } // data content diff --git a/src/utils/utils.h b/src/utils/utils.h index e16d03778..5c3342d8e 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -154,6 +154,8 @@ inline FILE *FopenCheck(const char *fname, const char *flag) { Check(fp != NULL, "can not open file \"%s\"\n", fname); return fp; } +} // namespace utils +// easy utils that can be directly acessed in xgboost /*! \brief get the beginning address of a vector */ template inline T *BeginPtr(std::vector &vec) { @@ -163,6 +165,14 @@ inline T *BeginPtr(std::vector &vec) { return &vec[0]; } } -} // namespace utils +/*! \brief get the beginning address of a vector */ +template +inline const T *BeginPtr(const std::vector &vec) { + if (vec.size() == 0) { + return NULL; + } else { + return &vec[0]; + } +} } // namespace xgboost #endif // XGBOOST_UTILS_UTILS_H_ diff --git a/wrapper/xgboost_wrapper.cpp b/wrapper/xgboost_wrapper.cpp index 2c48bd8f1..abb844bce 100644 --- a/wrapper/xgboost_wrapper.cpp +++ b/wrapper/xgboost_wrapper.cpp @@ -13,6 +13,7 @@ using namespace std; #include "../src/data.h" #include "../src/learner/learner-inl.hpp" #include "../src/io/io.h" +#include "../src/utils/utils.h" #include "../src/io/simple_dmatrix-inl.hpp" using namespace xgboost; @@ -32,7 +33,7 @@ class Booster: public learner::BoostLearner { this->CheckInitModel(); this->Predict(dmat, output_margin != 0, &this->preds_, ntree_limit); *len = static_cast(this->preds_.size()); - return &this->preds_[0]; + return BeginPtr(this->preds_); } inline void BoostOneIter(const DataMatrix &train, float *grad, float *hess, bst_ulong len) { @@ -60,7 +61,7 @@ class Booster: public learner::BoostLearner { model_dump_cptr[i] = model_dump[i].c_str(); } *len = static_cast(model_dump.size()); - return &model_dump_cptr[0]; + return BeginPtr(model_dump_cptr); } // temporal fields // temporal data to save evaluation dump @@ -177,13 +178,13 @@ extern "C"{ std::vector &vec = static_cast(handle)->info.GetFloatInfo(field); vec.resize(len); - memcpy(&vec[0], info, sizeof(float) * len); + memcpy(BeginPtr(vec), info, sizeof(float) * len); } void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) { std::vector &vec = static_cast(handle)->info.GetUIntInfo(field); vec.resize(len); - memcpy(&vec[0], info, sizeof(unsigned) * len); + memcpy(BeginPtr(vec), info, sizeof(unsigned) * len); } void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) { DataMatrix *pmat = static_cast(handle); @@ -197,13 +198,13 @@ extern "C"{ const std::vector &vec = static_cast(handle)->info.GetFloatInfo(field); *len = static_cast(vec.size()); - return &vec[0]; + return BeginPtr(vec); } const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) { const std::vector &vec = static_cast(handle)->info.GetUIntInfo(field); *len = static_cast(vec.size()); - return &vec[0]; + return BeginPtr(vec); } bst_ulong XGDMatrixNumRow(const void *handle) { return static_cast(static_cast(handle)->info.num_row());