t push origin unityMerge branch 'master' into unity

This commit is contained in:
tqchen 2014-09-02 11:22:57 -07:00
commit 76c513b191
58 changed files with 481 additions and 289 deletions

5
.gitignore vendored
View File

@ -6,12 +6,15 @@
# Compiled Dynamic libraries # Compiled Dynamic libraries
*.so *.so
*.dylib *.dylib
*.page
# Compiled Static libraries # Compiled Static libraries
*.lai *.lai
*.la *.la
*.a *.a
*~ *~
*.Rcheck
*.rds
*.tar.gz
*txt* *txt*
*conf *conf
*buffer *buffer

View File

@ -15,7 +15,7 @@ BIN = xgboost
OBJ = updater.o gbm.o io.o OBJ = updater.o gbm.o io.o
SLIB = wrapper/libxgboostwrapper.so SLIB = wrapper/libxgboostwrapper.so
.PHONY: clean all python .PHONY: clean all python Rpack
all: $(BIN) $(OBJ) $(SLIB) all: $(BIN) $(OBJ) $(SLIB)
@ -40,19 +40,25 @@ $(OBJ) :
install: install:
cp -f -r $(BIN) $(INSTALL_PATH) cp -f -r $(BIN) $(INSTALL_PATH)
R-package.tar.gz: Rpack:
rm -rf xgboost-R make clean
cp -r R-package xgboost-R rm -rf xgboost xgboost*.tar.gz
rm -rf xgboost-R/src/*.o xgboost-R/src/*.so xgboost-R/src/*.dll cp -r R-package xgboost
cp -r src xgboost-R/src/src rm -rf xgboost/inst/examples/*.buffer
mkdir xgboost-R/src/wrapper rm -rf xgboost/inst/examples/*.model
cp wrapper/xgboost_wrapper.h xgboost-R/src/wrapper rm -rf xgboost/inst/examples/dump*
cp wrapper/xgboost_wrapper.cpp xgboost-R/src/wrapper rm -rf xgboost/src/*.o xgboost/src/*.so xgboost/src/*.dll
cp ./LICENSE xgboost-R rm -rf xgboost/demo/*.model xgboost/demo/*.buffer
cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost-R/src/Makevars cp -r src xgboost/src/src
cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost-R/src/Makevars.win mkdir xgboost/src/wrapper
tar czf $@ xgboost-R cp wrapper/xgboost_wrapper.h xgboost/src/wrapper
rm -rf xgboost-R cp wrapper/xgboost_wrapper.cpp xgboost/src/wrapper
cp ./LICENSE xgboost
cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars
cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.win
R CMD build xgboost
rm -rf xgboost
R CMD check --as-cran xgboost*.tar.gz
clean: clean:
$(RM) $(OBJ) $(BIN) $(SLIB) *.o *~ */*~ */*/*~ $(RM) $(OBJ) $(BIN) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~

View File

@ -1,12 +1,20 @@
Package: xgboost Package: xgboost
Type: Package Type: Package
Title: R wrapper of xgboost Title: eXtreme Gradient Boosting
Version: 0.3-0 Version: 0.3-0
Date: 2014-08-23 Date: 2014-08-23
Author: Tianqi Chen, Tong He Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>
Maintainer: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com> Maintainer: Tong He <hetong007@gmail.com>
Description: xgboost Description: This package is a R wrapper of xgboost, which is short for eXtreme
License: file LICENSE Gradient Boosting. It is an efficient and scalable implementation of
gradient boosting framework. The package includes efficient linear model
solver and tree learning algorithm. The package can automatically do
parallel computation with OpenMP, and it can be more than 10 times faster
than existing gradient boosting packages such as gbm. It supports various
objective functions, including regression, classification and ranking. The
package is made to be extensible, so that user are also allowed to define
their own objectives easily.
License: Apache License (== 2.0) | file LICENSE
URL: https://github.com/tqchen/xgboost URL: https://github.com/tqchen/xgboost
BugReports: https://github.com/tqchen/xgboost/issues BugReports: https://github.com/tqchen/xgboost/issues
Depends: Depends:

View File

@ -10,5 +10,6 @@ export(xgb.save)
export(xgb.train) export(xgb.train)
export(xgboost) export(xgboost)
exportMethods(predict) exportMethods(predict)
import(methods)
importClassesFrom(Matrix,dgCMatrix) importClassesFrom(Matrix,dgCMatrix)
importClassesFrom(Matrix,dgeMatrix) importClassesFrom(Matrix,dgeMatrix)

View File

@ -4,20 +4,23 @@ setClass('xgb.DMatrix')
#' #'
#' Get information of an xgb.DMatrix object #' Get information of an xgb.DMatrix object
#' #'
#' @param object Object of class "xgb.DMatrix"
#' @param name the name of the field to get
#'
#' @examples #' @examples
#' data(iris) #' data(iris)
#' iris[,5] <- as.numeric(iris[,5]) #' iris[,5] <- as.numeric(iris[,5])
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
#' labels <- getinfo(dtrain, "label") #' labels <- getinfo(dtrain, "label")
#' @rdname getinfo
#' @export #' @export
#' #'
getinfo <- function(object, ...){ getinfo <- function(object, ...){
UseMethod("getinfo") UseMethod("getinfo")
} }
#' @param object Object of class "xgb.DMatrix"
#' @param name the name of the field to get
#' @param ... other parameters
#' @rdname getinfo
#' @method getinfo xgb.DMatrix
setMethod("getinfo", signature = "xgb.DMatrix", setMethod("getinfo", signature = "xgb.DMatrix",
definition = function(object, name) { definition = function(object, name) {
if (typeof(name) != "character") { if (typeof(name) != "character") {

View File

@ -11,7 +11,8 @@ setClass("xgb.Booster")
#' value of sum of functions, when outputmargin=TRUE, the prediction is #' value of sum of functions, when outputmargin=TRUE, the prediction is
#' untransformed margin value. In logistic regression, outputmargin=T will #' untransformed margin value. In logistic regression, outputmargin=T will
#' output value before logistic transformation. #' output value before logistic transformation.
#' #' @param ntreelimit limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear.
#' set it to be value bigger than 0. It will use all trees by default.
#' @examples #' @examples
#' data(iris) #' data(iris)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) #' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
@ -19,11 +20,18 @@ setClass("xgb.Booster")
#' @export #' @export
#' #'
setMethod("predict", signature = "xgb.Booster", setMethod("predict", signature = "xgb.Booster",
definition = function(object, newdata, outputmargin = FALSE) { definition = function(object, newdata, outputmargin = FALSE, ntreelimit = NULL) {
if (class(newdata) != "xgb.DMatrix") { if (class(newdata) != "xgb.DMatrix") {
newdata <- xgb.DMatrix(newdata) newdata <- xgb.DMatrix(newdata)
} }
ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), PACKAGE = "xgboost") if (is.null(ntreelimit)) {
ntreelimit <- 0
} else {
if (ntreelimit < 1){
stop("predict: ntreelimit must be equal to or greater than 1")
}
}
ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), as.integer(ntreelimit), PACKAGE = "xgboost")
return(ret) return(ret)
}) })

View File

@ -6,22 +6,25 @@ setClass('xgb.DMatrix')
#' Get a new DMatrix containing the specified rows of #' Get a new DMatrix containing the specified rows of
#' orginal xgb.DMatrix object #' orginal xgb.DMatrix object
#' #'
#' @param object Object of class "xgb.DMatrix"
#' @param idxset a integer vector of indices of rows needed
#'
#' @examples #' @examples
#' data(iris) #' data(iris)
#' iris[,5] <- as.numeric(iris[,5]) #' iris[,5] <- as.numeric(iris[,5])
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
#' dsub <- slice(dtrain, 1:3) #' dsub <- slice(dtrain, 1:3)
#' @rdname slice
#' @export #' @export
#' #'
slice <- function(object, ...){ slice <- function(object, ...){
UseMethod("slice") UseMethod("slice")
} }
#' @param object Object of class "xgb.DMatrix"
#' @param idxset a integer vector of indices of rows needed
#' @param ... other parameters
#' @rdname slice
#' @method slice xgb.DMatrix
setMethod("slice", signature = "xgb.DMatrix", setMethod("slice", signature = "xgb.DMatrix",
definition = function(object, idxset) { definition = function(object, idxset, ...) {
if (class(object) != "xgb.DMatrix") { if (class(object) != "xgb.DMatrix") {
stop("slice: first argument dtrain must be xgb.DMatrix") stop("slice: first argument dtrain must be xgb.DMatrix")
} }

View File

@ -1,4 +1,5 @@
#' @importClassesFrom Matrix dgCMatrix dgeMatrix #' @importClassesFrom Matrix dgCMatrix dgeMatrix
#' @import methods
# depends on matrix # depends on matrix
.onLoad <- function(libname, pkgname) { .onLoad <- function(libname, pkgname) {
@ -48,7 +49,6 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
} }
} }
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost") handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost")
.Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE = "xgboost")
if (length(params) != 0) { if (length(params) != 0) {
for (i in 1:length(params)) { for (i in 1:length(params)) {
p <- params[i] p <- params[i]
@ -121,8 +121,8 @@ xgb.iter.eval <- function(booster, watchlist, iter) {
stop("xgb.eval: watch list can only contain xgb.DMatrix") stop("xgb.eval: watch list can only contain xgb.DMatrix")
} }
} }
evnames <- list()
if (length(watchlist) != 0) { if (length(watchlist) != 0) {
evnames <- list()
for (i in 1:length(watchlist)) { for (i in 1:length(watchlist)) {
w <- watchlist[i] w <- watchlist[i]
if (length(names(w)) == 0) { if (length(names(w)) == 0) {
@ -130,8 +130,10 @@ xgb.iter.eval <- function(booster, watchlist, iter) {
} }
evnames <- append(evnames, names(w)) evnames <- append(evnames, names(w))
} }
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
evnames, PACKAGE = "xgboost")
} else {
msg <- ""
} }
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
evnames, PACKAGE = "xgboost")
return(msg) return(msg)
} }

View File

@ -2,7 +2,7 @@
#' #'
#' Save xgb.DMatrix object to binary file #' Save xgb.DMatrix object to binary file
#' #'
#' @param model the model object. #' @param DMatrix the model object.
#' @param fname the name of the binary file. #' @param fname the name of the binary file.
#' #'
#' @examples #' @examples

View File

@ -4,9 +4,12 @@
#' #'
#' @param model the model object. #' @param model the model object.
#' @param fname the name of the binary file. #' @param fname the name of the binary file.
#' @param fmap feature map file representing the type of feature, to make it #' @param fmap feature map file representing the type of feature.
#' look nice, run demo/demo.R for result and demo/featmap.txt for example #' Detailed description could be found at
#' Format: https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model #' \url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}.
#' Run inst/examples/demo.R for the result and inst/examples/featmap.txt
#' for example Format.
#'
#' #'
#' @examples #' @examples
#' data(iris) #' data(iris)

View File

@ -15,7 +15,7 @@
#' } #' }
#' #'
#' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for #' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
#' further details. See also demo/demo.R for walkthrough example in R. #' further details. See also inst/examples/demo.R for walkthrough example in R.
#' @param dtrain takes an \code{xgb.DMatrix} as the input. #' @param dtrain takes an \code{xgb.DMatrix} as the input.
#' @param nrounds the max number of iterations #' @param nrounds the max number of iterations
#' @param watchlist what information should be printed when \code{verbose=1} or #' @param watchlist what information should be printed when \code{verbose=1} or
@ -24,10 +24,11 @@
#' watchlist=list(validation1=mat1, validation2=mat2) to watch #' watchlist=list(validation1=mat1, validation2=mat2) to watch
#' the performance of each round's model on mat1 and mat2 #' the performance of each round's model on mat1 and mat2
#' #'
#' @param obj customized objective function. Given prediction and dtrain, #' @param obj customized objective function. Returns gradient and second order
#' return gradient and second order gradient. #' gradient with given prediction and dtrain,
#' @param feval custimized evaluation function. Given prediction and dtrain, #' @param feval custimized evaluation function. Returns
#' return a \code{list(metric='metric-name', value='metric-value')}. #' \code{list(metric='metric-name', value='metric-value')} with given
#' prediction and dtrain,
#' @param ... other parameters to pass to \code{params}. #' @param ... other parameters to pass to \code{params}.
#' #'
#' @details #' @details

View File

@ -19,7 +19,7 @@
#' } #' }
#' #'
#' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for #' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
#' further details. See also demo/demo.R for walkthrough example in R. #' further details. See also inst/examples/demo.R for walkthrough example in R.
#' @param nrounds the max number of iterations #' @param nrounds the max number of iterations
#' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print #' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print
#' information of performance. If 2, xgboost will print information of both #' information of performance. If 2, xgboost will print information of both

View File

@ -1,10 +1,21 @@
This is subfolder for experimental version of R package. # R package for xgboost.
Installation: ## Installation
For up-to-date version(which is recommended), please install from github. Windows user will need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first.
```r ```r
require(devtools) require(devtools)
install_github('xgboost','tqchen',subdir='R-package') install_github('xgboost','tqchen',subdir='R-package')
``` ```
Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/demo/demo.R) for more details. For stable version on CRAN, please run
```r
install.packages('xgboost')
```
## Examples
* Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/inst/examples/demo.R) for walk throughe example.
* See also the [example scripts](https://github.com/tqchen/xgboost/tree/master/demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/speedtest.R) on this dataset.

View File

@ -1 +0,0 @@
demo R code for xgboost usages on agaricus data

View File

@ -1,14 +1,20 @@
% Generated by roxygen2 (4.0.1): do not edit by hand % Generated by roxygen2 (4.0.1): do not edit by hand
\docType{methods}
\name{getinfo} \name{getinfo}
\alias{getinfo} \alias{getinfo}
\alias{getinfo,xgb.DMatrix-method}
\title{Get information of an xgb.DMatrix object} \title{Get information of an xgb.DMatrix object}
\usage{ \usage{
getinfo(object, ...) getinfo(object, ...)
\S4method{getinfo}{xgb.DMatrix}(object, name)
} }
\arguments{ \arguments{
\item{object}{Object of class "xgb.DMatrix"} \item{object}{Object of class "xgb.DMatrix"}
\item{name}{the name of the field to get} \item{name}{the name of the field to get}
\item{...}{other parameters}
} }
\description{ \description{
Get information of an xgb.DMatrix object Get information of an xgb.DMatrix object

View File

@ -4,7 +4,8 @@
\alias{predict,xgb.Booster-method} \alias{predict,xgb.Booster-method}
\title{Predict method for eXtreme Gradient Boosting model} \title{Predict method for eXtreme Gradient Boosting model}
\usage{ \usage{
\S4method{predict}{xgb.Booster}(object, newdata, outputmargin = FALSE) \S4method{predict}{xgb.Booster}(object, newdata, outputmargin = FALSE,
ntreelimit = NULL)
} }
\arguments{ \arguments{
\item{object}{Object of class "xgb.Boost"} \item{object}{Object of class "xgb.Boost"}
@ -13,9 +14,12 @@
\code{xgb.DMatrix}.} \code{xgb.DMatrix}.}
\item{outputmargin}{whether the prediction should be shown in the original \item{outputmargin}{whether the prediction should be shown in the original
value of sum of functions, when outputmargin=TRUE, the prediction is value of sum of functions, when outputmargin=TRUE, the prediction is
untransformed margin value. In logistic regression, outputmargin=T will untransformed margin value. In logistic regression, outputmargin=T will
output value before logistic transformation.} output value before logistic transformation.}
\item{ntreelimit}{limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear.
set it to be value bigger than 0. It will use all trees by default.}
} }
\description{ \description{
Predicted values based on xgboost model object. Predicted values based on xgboost model object.

View File

@ -1,15 +1,21 @@
% Generated by roxygen2 (4.0.1): do not edit by hand % Generated by roxygen2 (4.0.1): do not edit by hand
\docType{methods}
\name{slice} \name{slice}
\alias{slice} \alias{slice}
\alias{slice,xgb.DMatrix-method}
\title{Get a new DMatrix containing the specified rows of \title{Get a new DMatrix containing the specified rows of
orginal xgb.DMatrix object} orginal xgb.DMatrix object}
\usage{ \usage{
slice(object, ...) slice(object, ...)
\S4method{slice}{xgb.DMatrix}(object, idxset, ...)
} }
\arguments{ \arguments{
\item{object}{Object of class "xgb.DMatrix"} \item{object}{Object of class "xgb.DMatrix"}
\item{idxset}{a integer vector of indices of rows needed} \item{idxset}{a integer vector of indices of rows needed}
\item{...}{other parameters}
} }
\description{ \description{
Get a new DMatrix containing the specified rows of Get a new DMatrix containing the specified rows of

View File

@ -6,7 +6,7 @@
xgb.DMatrix.save(DMatrix, fname) xgb.DMatrix.save(DMatrix, fname)
} }
\arguments{ \arguments{
\item{model}{the model object.} \item{DMatrix}{the model object.}
\item{fname}{the name of the binary file.} \item{fname}{the name of the binary file.}
} }

View File

@ -10,9 +10,11 @@ xgb.dump(model, fname, fmap = "")
\item{fname}{the name of the binary file.} \item{fname}{the name of the binary file.}
\item{fmap}{feature map file representing the type of feature, to make it \item{fmap}{feature map file representing the type of feature.
look nice, run demo/demo.R for result and demo/featmap.txt for example Detailed description could be found at
Format: https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model} \url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}.
Run inst/examples/demo.R for the result and inst/examples/featmap.txt
for example Format.}
} }
\description{ \description{
Save a xgboost model to text file. Could be parsed later. Save a xgboost model to text file. Could be parsed later.

View File

@ -20,7 +20,7 @@ xgb.train(params = list(), dtrain, nrounds, watchlist = list(),
} }
See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
further details. See also demo/demo.R for walkthrough example in R.} further details. See also inst/examples/demo.R for walkthrough example in R.}
\item{dtrain}{takes an \code{xgb.DMatrix} as the input.} \item{dtrain}{takes an \code{xgb.DMatrix} as the input.}
@ -32,11 +32,12 @@ xgb.train(params = list(), dtrain, nrounds, watchlist = list(),
watchlist=list(validation1=mat1, validation2=mat2) to watch watchlist=list(validation1=mat1, validation2=mat2) to watch
the performance of each round's model on mat1 and mat2} the performance of each round's model on mat1 and mat2}
\item{obj}{customized objective function. Given prediction and dtrain, \item{obj}{customized objective function. Returns gradient and second order
return gradient and second order gradient.} gradient with given prediction and dtrain,}
\item{feval}{custimized evaluation function. Given prediction and dtrain, \item{feval}{custimized evaluation function. Returns
return a \code{list(metric='metric-name', value='metric-value')}.} \code{list(metric='metric-name', value='metric-value')} with given
prediction and dtrain,}
\item{...}{other parameters to pass to \code{params}.} \item{...}{other parameters to pass to \code{params}.}
} }

View File

@ -25,7 +25,7 @@ xgboost(data = NULL, label = NULL, params = list(), nrounds,
} }
See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
further details. See also demo/demo.R for walkthrough example in R.} further details. See also inst/examples/demo.R for walkthrough example in R.}
\item{nrounds}{the max number of iterations} \item{nrounds}{the max number of iterations}

View File

@ -1,32 +1,7 @@
# package root # package root
PKGROOT=../../ PKGROOT=../../
# _*_ mode: Makefile; _*_ # _*_ mode: Makefile; _*_
CXX=`R CMD config CXX` PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -I$(PKGROOT)
TCFLAGS=`R CMD config CFLAGS` PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS)
# expose these flags to R CMD SHLIB PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o
PKG_CPPFLAGS+= $(SHLIB_PTHREAD_FLAGS)
XGBFLAG= $(TCFLAGS) -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
ifeq ($(no_omp),1)
PKG_CPPFLAGS += -DDISABLE_OPENMP
endif
CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o
OBJECTS= xgboost_R.o $(CXXOBJ)
.PHONY: all clean
all: $(SHLIB)
$(SHLIB): $(OBJECTS)
xgboost_wrapper.o: $(PKGROOT)/wrapper/xgboost_wrapper.cpp
xgboost_io.o: $(PKGROOT)/src/io/io.cpp
xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp
xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp
$(CXXOBJ) :
$(CXX) -c $(XGBFLAG) -o $@ $(firstword $(filter %.cpp %.c, $^) )
clean:
rm -rf *.so *.o *~ *.dll

View File

@ -1,33 +1,7 @@
# package root # package root
PKGROOT=../../ PKGROOT=../../
# _*_ mode: Makefile; _*_ # _*_ mode: Makefile; _*_
CXX=`Rcmd config CXX` PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -I$(PKGROOT)
TCFLAGS=`Rcmd config CFLAGS` PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS)
# expose these flags to R CMD SHLIB PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o
PKG_CPPFLAGS+= $(SHLIB_PTHREAD_FLAGS)
XGBFLAG= -O3 -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
ifeq ($(no_omp),1)
PKG_CPPFLAGS += -DDISABLE_OPENMP
endif
CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o
OBJECTS= xgboost_R.o $(CXXOBJ)
.PHONY: all clean
all: $(SHLIB)
$(SHLIB): $(OBJECTS)
xgboost_wrapper.o: $(PKGROOT)/wrapper/xgboost_wrapper.cpp
xgboost_io.o: $(PKGROOT)/src/io/io.cpp
xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp
xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp
$(CXXOBJ) :
$(CXX) -c $(XGBFLAG) -o $@ $(firstword $(filter %.cpp %.c, $^) )
clean:
rm -rf *.so *.o *~ *.dll

View File

@ -2,25 +2,55 @@
#include <string> #include <string>
#include <utility> #include <utility>
#include <cstring> #include <cstring>
#include <cstdio>
#include "xgboost_R.h" #include "xgboost_R.h"
#include "wrapper/xgboost_wrapper.h" #include "wrapper/xgboost_wrapper.h"
#include "src/utils/utils.h" #include "src/utils/utils.h"
#include "src/utils/omp.h" #include "src/utils/omp.h"
#include "src/utils/matrix_csr.h" #include "src/utils/matrix_csr.h"
using namespace std;
using namespace xgboost; using namespace xgboost;
extern "C" {
void XGBoostAssert_R(int exp, const char *fmt, ...);
void XGBoostCheck_R(int exp, const char *fmt, ...);
int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...);
}
// implements error handling // implements error handling
namespace xgboost { namespace xgboost {
namespace utils { namespace utils {
void HandleAssertError(const char *msg) { extern "C" {
error("%s", msg); void (*Printf)(const char *fmt, ...) = Rprintf;
} int (*SPrintf)(char *buf, size_t size, const char *fmt, ...) = XGBoostSPrintf_R;
void HandleCheckError(const char *msg) { void (*Assert)(int exp, const char *fmt, ...) = XGBoostAssert_R;
error("%s", msg); void (*Check)(int exp, const char *fmt, ...) = XGBoostCheck_R;
void (*Error)(const char *fmt, ...) = error;
} }
} // namespace utils } // namespace utils
namespace random {
void Seed(unsigned seed) {
warning("parameter seed is ignored, please set random seed using set.seed");
}
double Uniform(void) {
return unif_rand();
}
double Normal(void) {
return norm_rand();
}
} // namespace random
} // namespace xgboost } // namespace xgboost
// call before wrapper starts
inline void _WrapperBegin(void) {
GetRNGstate();
}
// call after wrapper starts
inline void _WrapperEnd(void) {
PutRNGstate();
}
extern "C" { extern "C" {
void _DMatrixFinalizer(SEXP ext) { void _DMatrixFinalizer(SEXP ext) {
if (R_ExternalPtrAddr(ext) == NULL) return; if (R_ExternalPtrAddr(ext) == NULL) return;
@ -28,14 +58,17 @@ extern "C" {
R_ClearExternalPtr(ext); R_ClearExternalPtr(ext);
} }
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) { SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
_WrapperBegin();
void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent)); void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
UNPROTECT(1); UNPROTECT(1);
_WrapperEnd();
return ret; return ret;
} }
SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP XGDMatrixCreateFromMat_R(SEXP mat,
SEXP missing) { SEXP missing) {
_WrapperBegin();
SEXP dim = getAttrib(mat, R_DimSymbol); SEXP dim = getAttrib(mat, R_DimSymbol);
int nrow = INTEGER(dim)[0]; int nrow = INTEGER(dim)[0];
int ncol = INTEGER(dim)[1]; int ncol = INTEGER(dim)[1];
@ -47,15 +80,17 @@ extern "C" {
data[i * ncol +j] = din[i + nrow * j]; data[i * ncol +j] = din[i + nrow * j];
} }
} }
void *handle = XGDMatrixCreateFromMat(&data[0], nrow, ncol, asReal(missing)); void *handle = XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing));
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
UNPROTECT(1); UNPROTECT(1);
_WrapperEnd();
return ret; return ret;
} }
SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
SEXP indices, SEXP indices,
SEXP data) { SEXP data) {
_WrapperBegin();
const int *col_ptr = INTEGER(indptr); const int *col_ptr = INTEGER(indptr);
const int *row_index = INTEGER(indices); const int *row_index = INTEGER(indices);
const double *col_data = REAL(data); const double *col_data = REAL(data);
@ -85,29 +120,36 @@ extern "C" {
col_index[i] = csr_data[i].first; col_index[i] = csr_data[i].first;
row_data[i] = csr_data[i].second; row_data[i] = csr_data[i].second;
} }
void *handle = XGDMatrixCreateFromCSR(&row_ptr[0], &col_index[0], &row_data[0], row_ptr.size(), ndata ); void *handle = XGDMatrixCreateFromCSR(BeginPtr(row_ptr), BeginPtr(col_index),
BeginPtr(row_data), row_ptr.size(), ndata );
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
UNPROTECT(1); UNPROTECT(1);
_WrapperEnd();
return ret; return ret;
} }
SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) { SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
_WrapperBegin();
int len = length(idxset); int len = length(idxset);
std::vector<int> idxvec(len); std::vector<int> idxvec(len);
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
idxvec[i] = INTEGER(idxset)[i] - 1; idxvec[i] = INTEGER(idxset)[i] - 1;
} }
void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), &idxvec[0], len); void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), BeginPtr(idxvec), len);
SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue)); SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
UNPROTECT(1); UNPROTECT(1);
_WrapperEnd();
return ret; return ret;
} }
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) { void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
_WrapperBegin();
XGDMatrixSaveBinary(R_ExternalPtrAddr(handle), XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
CHAR(asChar(fname)), asInteger(silent)); CHAR(asChar(fname)), asInteger(silent));
_WrapperEnd();
} }
void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) { void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
_WrapperBegin();
int len = length(array); int len = length(array);
const char *name = CHAR(asChar(field)); const char *name = CHAR(asChar(field));
if (!strcmp("group", name)) { if (!strcmp("group", name)) {
@ -116,7 +158,8 @@ extern "C" {
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
vec[i] = static_cast<unsigned>(INTEGER(array)[i]); vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
} }
XGDMatrixSetGroup(R_ExternalPtrAddr(handle), &vec[0], len); XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len);
_WrapperEnd();
return; return;
} }
{ {
@ -127,10 +170,12 @@ extern "C" {
} }
XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)), CHAR(asChar(field)),
&vec[0], len); BeginPtr(vec), len);
} }
_WrapperEnd();
} }
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
_WrapperBegin();
bst_ulong olen; bst_ulong olen;
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)), &olen); CHAR(asChar(field)), &olen);
@ -139,6 +184,7 @@ extern "C" {
REAL(ret)[i] = res[i]; REAL(ret)[i] = res[i];
} }
UNPROTECT(1); UNPROTECT(1);
_WrapperEnd();
return ret; return ret;
} }
// functions related to booster // functions related to booster
@ -148,28 +194,35 @@ extern "C" {
R_ClearExternalPtr(ext); R_ClearExternalPtr(ext);
} }
SEXP XGBoosterCreate_R(SEXP dmats) { SEXP XGBoosterCreate_R(SEXP dmats) {
_WrapperBegin();
int len = length(dmats); int len = length(dmats);
std::vector<void*> dvec; std::vector<void*> dvec;
for (int i = 0; i < len; ++i){ for (int i = 0; i < len; ++i){
dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
} }
void *handle = XGBoosterCreate(&dvec[0], dvec.size()); void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size());
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE); R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
UNPROTECT(1); UNPROTECT(1);
_WrapperEnd();
return ret; return ret;
} }
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) { void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
_WrapperBegin();
XGBoosterSetParam(R_ExternalPtrAddr(handle), XGBoosterSetParam(R_ExternalPtrAddr(handle),
CHAR(asChar(name)), CHAR(asChar(name)),
CHAR(asChar(val))); CHAR(asChar(val)));
_WrapperEnd();
} }
void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) { void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
_WrapperBegin();
XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle), XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
asInteger(iter), asInteger(iter),
R_ExternalPtrAddr(dtrain)); R_ExternalPtrAddr(dtrain));
_WrapperEnd();
} }
void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) { void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
_WrapperBegin();
utils::Check(length(grad) == length(hess), "gradient and hess must have same length"); utils::Check(length(grad) == length(hess), "gradient and hess must have same length");
int len = length(grad); int len = length(grad);
std::vector<float> tgrad(len), thess(len); std::vector<float> tgrad(len), thess(len);
@ -180,9 +233,11 @@ extern "C" {
} }
XGBoosterBoostOneIter(R_ExternalPtrAddr(handle), XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dtrain), R_ExternalPtrAddr(dtrain),
&tgrad[0], &thess[0], len); BeginPtr(tgrad), BeginPtr(thess), len);
_WrapperEnd();
} }
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) { SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
_WrapperBegin();
utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length"); utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length");
int len = length(dmats); int len = length(dmats);
std::vector<void*> vec_dmats; std::vector<void*> vec_dmats;
@ -197,28 +252,37 @@ extern "C" {
} }
return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
asInteger(iter), asInteger(iter),
&vec_dmats[0], &vec_sptr[0], len)); BeginPtr(vec_dmats), BeginPtr(vec_sptr), len));
_WrapperEnd();
} }
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) { SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit) {
_WrapperBegin();
bst_ulong olen; bst_ulong olen;
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle), const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dmat), R_ExternalPtrAddr(dmat),
asInteger(output_margin), asInteger(output_margin),
asInteger(ntree_limit),
&olen); &olen);
SEXP ret = PROTECT(allocVector(REALSXP, olen)); SEXP ret = PROTECT(allocVector(REALSXP, olen));
for (size_t i = 0; i < olen; ++i) { for (size_t i = 0; i < olen; ++i) {
REAL(ret)[i] = res[i]; REAL(ret)[i] = res[i];
} }
UNPROTECT(1); UNPROTECT(1);
_WrapperEnd();
return ret; return ret;
} }
void XGBoosterLoadModel_R(SEXP handle, SEXP fname) { void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
_WrapperBegin();
XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
_WrapperEnd();
} }
void XGBoosterSaveModel_R(SEXP handle, SEXP fname) { void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
_WrapperBegin();
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
_WrapperEnd();
} }
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) { void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
_WrapperBegin();
bst_ulong olen; bst_ulong olen;
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle), const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
CHAR(asChar(fmap)), CHAR(asChar(fmap)),
@ -229,5 +293,6 @@ extern "C" {
fprintf(fo, "%s", res[i]); fprintf(fo, "%s", res[i]);
} }
fclose(fo); fclose(fo);
_WrapperEnd();
} }
} }

View File

@ -7,6 +7,7 @@
*/ */
extern "C" { extern "C" {
#include <Rinternals.h> #include <Rinternals.h>
#include <R_ext/Random.h>
} }
extern "C" { extern "C" {
@ -106,8 +107,9 @@ extern "C" {
* \param handle handle * \param handle handle
* \param dmat data matrix * \param dmat data matrix
* \param output_margin whether only output raw margin value * \param output_margin whether only output raw margin value
* \param ntree_limit limit number of trees used in prediction
*/ */
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin); SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit);
/*! /*!
* \brief load model from existing file * \brief load model from existing file
* \param handle handle * \param handle handle

View File

@ -0,0 +1,33 @@
#include <stdio.h>
#include <stdarg.h>
#include <Rinternals.h>
// implements error handling
void XGBoostAssert_R(int exp, const char *fmt, ...) {
char buf[1024];
if (exp == 0) {
va_list args;
va_start(args, fmt);
vsprintf(buf, fmt, args);
va_end(args);
error("AssertError:%s\n", buf);
}
}
void XGBoostCheck_R(int exp, const char *fmt, ...) {
char buf[1024];
if (exp == 0) {
va_list args;
va_start(args, fmt);
vsprintf(buf, fmt, args);
va_end(args);
error("%s\n", buf);
}
}
int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...) {
int ret;
va_list args;
va_start(args, fmt);
ret = vsnprintf(buf, size, fmt, args);
va_end(args);
return ret;
}

View File

@ -173,7 +173,7 @@ objective function.
We also have \verb@slice@ for row extraction. It is useful in We also have \verb@slice@ for row extraction. It is useful in
cross-validation. cross-validation.
For a walkthrough demo, please see \verb@R-package/demo/demo.R@ for further For a walkthrough demo, please see \verb@R-package/inst/examples/demo.R@ for further
details. details.
\section{The Higgs Boson competition} \section{The Higgs Boson competition}

View File

@ -35,11 +35,11 @@ Version
====== ======
* This version xgboost-0.3, the code has been refactored from 0.2x to be cleaner and more flexibility * This version xgboost-0.3, the code has been refactored from 0.2x to be cleaner and more flexibility
* This version of xgboost is not compatible with 0.2x, due to huge amount of changes in code structure * This version of xgboost is not compatible with 0.2x, due to huge amount of changes in code structure
- This means the model and buffer file of previous version can not be loaded in xgboost-unity - This means the model and buffer file of previous version can not be loaded in xgboost-3.0
* For legacy 0.2x code, refer to [Here](https://github.com/tqchen/xgboost/releases/tag/v0.22) * For legacy 0.2x code, refer to [Here](https://github.com/tqchen/xgboost/releases/tag/v0.22)
* Change log in [CHANGES.md](CHANGES.md) * Change log in [CHANGES.md](CHANGES.md)
XGBoost in Graphlab Create XGBoost in Graphlab Create
====== ======
* XGBoost is adopted as part of boosted tree toolkit in Graphlab Create (GLC). Graphlab Create is a powerful python toolkit that allows you to data manipulation, graph processing, hyper-parameter search, and visualization of big data in one framework. Try the Graphlab Create in http://graphlab.com/products/create/quick-start-guide.html * XGBoost is adopted as part of boosted tree toolkit in Graphlab Create (GLC). Graphlab Create is a powerful python toolkit that allows you to data manipulation, graph processing, hyper-parameter search, and visualization of TeraBytes scale data in one framework. Try the Graphlab Create in http://graphlab.com/products/create/quick-start-guide.html
* Nice blogpost by Jay Gu using GLC boosted tree to solve kaggle bike sharing challenge: http://blog.graphlab.com/using-gradient-boosted-trees-to-predict-bike-sharing-demand * Nice blogpost by Jay Gu using GLC boosted tree to solve kaggle bike sharing challenge: http://blog.graphlab.com/using-gradient-boosted-trees-to-predict-bike-sharing-demand

View File

@ -105,7 +105,10 @@ class GBLinear : public IGradBooster {
virtual void Predict(IFMatrix *p_fmat, virtual void Predict(IFMatrix *p_fmat,
int64_t buffer_offset, int64_t buffer_offset,
const BoosterInfo &info, const BoosterInfo &info,
std::vector<float> *out_preds) { std::vector<float> *out_preds,
unsigned ntree_limit = 0) {
utils::Check(ntree_limit == 0,
"GBLinear::Predict ntrees is only valid for gbtree predictor");
std::vector<float> &preds = *out_preds; std::vector<float> &preds = *out_preds;
preds.resize(0); preds.resize(0);
// start collecting the prediction // start collecting the prediction

View File

@ -1,6 +1,7 @@
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE #define _CRT_SECURE_NO_DEPRECATE
#include <cstring> #include <cstring>
using namespace std;
#include "./gbm.h" #include "./gbm.h"
#include "./gbtree-inl.hpp" #include "./gbtree-inl.hpp"
#include "./gblinear-inl.hpp" #include "./gblinear-inl.hpp"

View File

@ -57,11 +57,14 @@ class IGradBooster {
* the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size") * the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size")
* \param info extra side information that may be needed for prediction * \param info extra side information that may be needed for prediction
* \param out_preds output vector to hold the predictions * \param out_preds output vector to hold the predictions
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
* we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
*/ */
virtual void Predict(IFMatrix *p_fmat, virtual void Predict(IFMatrix *p_fmat,
int64_t buffer_offset, int64_t buffer_offset,
const BoosterInfo &info, const BoosterInfo &info,
std::vector<float> *out_preds) = 0; std::vector<float> *out_preds,
unsigned ntree_limit = 0) = 0;
/*! /*!
* \brief dump the model in text format * \brief dump the model in text format
* \param fmap feature map that may help give interpretations of feature * \param fmap feature map that may help give interpretations of feature

View File

@ -105,7 +105,8 @@ class GBTree : public IGradBooster {
virtual void Predict(IFMatrix *p_fmat, virtual void Predict(IFMatrix *p_fmat,
int64_t buffer_offset, int64_t buffer_offset,
const BoosterInfo &info, const BoosterInfo &info,
std::vector<float> *out_preds) { std::vector<float> *out_preds,
unsigned ntree_limit = 0) {
int nthread; int nthread;
#pragma omp parallel #pragma omp parallel
{ {
@ -137,7 +138,8 @@ class GBTree : public IGradBooster {
this->Pred(batch[i], this->Pred(batch[i],
buffer_offset < 0 ? -1 : buffer_offset + ridx, buffer_offset < 0 ? -1 : buffer_offset + ridx,
gid, info.GetRoot(ridx), &feats, gid, info.GetRoot(ridx), &feats,
&preds[ridx * mparam.num_output_group + gid], stride); &preds[ridx * mparam.num_output_group + gid], stride,
ntree_limit);
} }
} }
} }
@ -212,14 +214,16 @@ class GBTree : public IGradBooster {
int bst_group, int bst_group,
unsigned root_index, unsigned root_index,
tree::RegTree::FVec *p_feats, tree::RegTree::FVec *p_feats,
float *out_pred, size_t stride) { float *out_pred, size_t stride, unsigned ntree_limit) {
size_t itop = 0; size_t itop = 0;
float psum = 0.0f; float psum = 0.0f;
// sum of leaf vector // sum of leaf vector
std::vector<float> vec_psum(mparam.size_leaf_vector, 0.0f); std::vector<float> vec_psum(mparam.size_leaf_vector, 0.0f);
const int64_t bid = mparam.BufferOffset(buffer_index, bst_group); const int64_t bid = mparam.BufferOffset(buffer_index, bst_group);
// number of valid trees
unsigned treeleft = ntree_limit == 0 ? std::numeric_limits<unsigned>::max() : ntree_limit;
// load buffered results if any // load buffered results if any
if (bid >= 0) { if (bid >= 0 && ntree_limit == 0) {
itop = pred_counter[bid]; itop = pred_counter[bid];
psum = pred_buffer[bid]; psum = pred_buffer[bid];
for (int i = 0; i < mparam.size_leaf_vector; ++i) { for (int i = 0; i < mparam.size_leaf_vector; ++i) {
@ -235,12 +239,13 @@ class GBTree : public IGradBooster {
for (int j = 0; j < mparam.size_leaf_vector; ++j) { for (int j = 0; j < mparam.size_leaf_vector; ++j) {
vec_psum[j] += trees[i]->leafvec(tid)[j]; vec_psum[j] += trees[i]->leafvec(tid)[j];
} }
if(--treeleft == 0) break;
} }
} }
p_feats->Drop(inst); p_feats->Drop(inst);
} }
// updated the buffered results // updated the buffered results
if (bid >= 0) { if (bid >= 0 && ntree_limit == 0) {
pred_counter[bid] = static_cast<unsigned>(trees.size()); pred_counter[bid] = static_cast<unsigned>(trees.size());
pred_buffer[bid] = psum; pred_buffer[bid] = psum;
for (int i = 0; i < mparam.size_leaf_vector; ++i) { for (int i = 0; i < mparam.size_leaf_vector; ++i) {

View File

@ -1,6 +1,7 @@
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE #define _CRT_SECURE_NO_DEPRECATE
#include <string> #include <string>
using namespace std;
#include "./io.h" #include "./io.h"
#include "../utils/io.h" #include "../utils/io.h"
#include "../utils/utils.h" #include "../utils/utils.h"

View File

@ -54,8 +54,10 @@ class DMatrixSimple : public DataMatrix {
for (size_t i = 0; i < batch.size; ++i) { for (size_t i = 0; i < batch.size; ++i) {
RowBatch::Inst inst = batch[i]; RowBatch::Inst inst = batch[i];
row_data_.resize(row_data_.size() + inst.length); row_data_.resize(row_data_.size() + inst.length);
memcpy(&row_data_[row_ptr_.back()], inst.data, if (inst.length != 0) {
sizeof(RowBatch::Entry) * inst.length); memcpy(&row_data_[row_ptr_.back()], inst.data,
sizeof(RowBatch::Entry) * inst.length);
}
row_ptr_.push_back(row_ptr_.back() + inst.length); row_ptr_.push_back(row_ptr_.back() + inst.length);
} }
} }
@ -104,10 +106,10 @@ class DMatrixSimple : public DataMatrix {
this->AddRow(feats); this->AddRow(feats);
if (!silent) { if (!silent) {
printf("%lux%lu matrix with %lu entries is loaded from %s\n", utils::Printf("%lux%lu matrix with %lu entries is loaded from %s\n",
static_cast<unsigned long>(info.num_row()), static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()), static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(row_data_.size()), fname); static_cast<unsigned long>(row_data_.size()), fname);
} }
fclose(file); fclose(file);
// try to load in additional file // try to load in additional file
@ -147,26 +149,26 @@ class DMatrixSimple : public DataMatrix {
* \param fname file name, used to print message * \param fname file name, used to print message
*/ */
inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) { inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) {
int magic; int tmagic;
utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format"); utils::Check(fs.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format");
utils::Check(magic == kMagic, "invalid format,magic number mismatch"); utils::Check(tmagic == kMagic, "invalid format,magic number mismatch");
info.LoadBinary(fs); info.LoadBinary(fs);
FMatrixS::LoadBinary(fs, &row_ptr_, &row_data_); FMatrixS::LoadBinary(fs, &row_ptr_, &row_data_);
fmat_->LoadColAccess(fs); fmat_->LoadColAccess(fs);
if (!silent) { if (!silent) {
printf("%lux%lu matrix with %lu entries is loaded", utils::Printf("%lux%lu matrix with %lu entries is loaded",
static_cast<unsigned long>(info.num_row()), static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()), static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(row_data_.size())); static_cast<unsigned long>(row_data_.size()));
if (fname != NULL) { if (fname != NULL) {
printf(" from %s\n", fname); utils::Printf(" from %s\n", fname);
} else { } else {
printf("\n"); utils::Printf("\n");
} }
if (info.group_ptr.size() != 0) { if (info.group_ptr.size() != 0) {
printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1); utils::Printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1);
} }
} }
} }
@ -177,8 +179,8 @@ class DMatrixSimple : public DataMatrix {
*/ */
inline void SaveBinary(const char* fname, bool silent = false) const { inline void SaveBinary(const char* fname, bool silent = false) const {
utils::FileStream fs(utils::FopenCheck(fname, "wb")); utils::FileStream fs(utils::FopenCheck(fname, "wb"));
int magic = kMagic; int tmagic = kMagic;
fs.Write(&magic, sizeof(magic)); fs.Write(&tmagic, sizeof(tmagic));
info.SaveBinary(fs); info.SaveBinary(fs);
FMatrixS::SaveBinary(fs, row_ptr_, row_data_); FMatrixS::SaveBinary(fs, row_ptr_, row_data_);
@ -186,13 +188,13 @@ class DMatrixSimple : public DataMatrix {
fs.Close(); fs.Close();
if (!silent) { if (!silent) {
printf("%lux%lu matrix with %lu entries is saved to %s\n", utils::Printf("%lux%lu matrix with %lu entries is saved to %s\n",
static_cast<unsigned long>(info.num_row()), static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()), static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(row_data_.size()), fname); static_cast<unsigned long>(row_data_.size()), fname);
if (info.group_ptr.size() != 0) { if (info.group_ptr.size() != 0) {
printf("data contains %u groups\n", utils::Printf("data contains %u groups\n",
static_cast<unsigned>(info.group_ptr.size()-1)); static_cast<unsigned>(info.group_ptr.size()-1));
} }
} }
} }
@ -244,8 +246,8 @@ class DMatrixSimple : public DataMatrix {
at_first_ = false; at_first_ = false;
batch_.size = parent_->row_ptr_.size() - 1; batch_.size = parent_->row_ptr_.size() - 1;
batch_.base_rowid = 0; batch_.base_rowid = 0;
batch_.ind_ptr = &parent_->row_ptr_[0]; batch_.ind_ptr = BeginPtr(parent_->row_ptr_);
batch_.data_ptr = &parent_->row_data_[0]; batch_.data_ptr = BeginPtr(parent_->row_data_);
return true; return true;
} }
virtual const RowBatch &Value(void) const { virtual const RowBatch &Value(void) const {

View File

@ -110,9 +110,9 @@ class FMatrixS : public IFMatrix{
const std::vector<RowBatch::Entry> &data) { const std::vector<RowBatch::Entry> &data) {
size_t nrow = ptr.size() - 1; size_t nrow = ptr.size() - 1;
fo.Write(&nrow, sizeof(size_t)); fo.Write(&nrow, sizeof(size_t));
fo.Write(&ptr[0], ptr.size() * sizeof(size_t)); fo.Write(BeginPtr(ptr), ptr.size() * sizeof(size_t));
if (data.size() != 0) { if (data.size() != 0) {
fo.Write(&data[0], data.size() * sizeof(RowBatch::Entry)); fo.Write(BeginPtr(data), data.size() * sizeof(RowBatch::Entry));
} }
} }
/*! /*!
@ -127,11 +127,11 @@ class FMatrixS : public IFMatrix{
size_t nrow; size_t nrow;
utils::Check(fi.Read(&nrow, sizeof(size_t)) != 0, "invalid input file format"); utils::Check(fi.Read(&nrow, sizeof(size_t)) != 0, "invalid input file format");
out_ptr->resize(nrow + 1); out_ptr->resize(nrow + 1);
utils::Check(fi.Read(&(*out_ptr)[0], out_ptr->size() * sizeof(size_t)) != 0, utils::Check(fi.Read(BeginPtr(*out_ptr), out_ptr->size() * sizeof(size_t)) != 0,
"invalid input file format"); "invalid input file format");
out_data->resize(out_ptr->back()); out_data->resize(out_ptr->back());
if (out_data->size() != 0) { if (out_data->size() != 0) {
utils::Assert(fi.Read(&(*out_data)[0], out_data->size() * sizeof(RowBatch::Entry)) != 0, utils::Assert(fi.Read(BeginPtr(*out_data), out_data->size() * sizeof(RowBatch::Entry)) != 0,
"invalid input file format"); "invalid input file format");
} }
} }
@ -213,8 +213,8 @@ class FMatrixS : public IFMatrix{
col_data_[i] = SparseBatch::Inst(&data[0] + ptr[ridx], col_data_[i] = SparseBatch::Inst(&data[0] + ptr[ridx],
static_cast<bst_uint>(ptr[ridx+1] - ptr[ridx])); static_cast<bst_uint>(ptr[ridx+1] - ptr[ridx]));
} }
batch_.col_index = &col_index_[0]; batch_.col_index = BeginPtr(col_index_);
batch_.col_data = &col_data_[0]; batch_.col_data = BeginPtr(col_data_);
this->BeforeFirst(); this->BeforeFirst();
} }
// data content // data content

View File

@ -98,8 +98,8 @@ struct MetaInfo {
group_ptr.push_back(group_ptr.back()+nline); group_ptr.push_back(group_ptr.back()+nline);
} }
if (!silent) { if (!silent) {
printf("%u groups are loaded from %s\n", utils::Printf("%u groups are loaded from %s\n",
static_cast<unsigned>(group_ptr.size()-1), fname); static_cast<unsigned>(group_ptr.size()-1), fname);
} }
fclose(fi); fclose(fi);
return true; return true;
@ -125,15 +125,15 @@ struct MetaInfo {
} }
// try to load weight information from file, if exists // try to load weight information from file, if exists
inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) { inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
std::vector<float> &weights = this->GetFloatInfo(field); std::vector<float> &data = this->GetFloatInfo(field);
FILE *fi = fopen64(fname, "r"); FILE *fi = fopen64(fname, "r");
if (fi == NULL) return false; if (fi == NULL) return false;
float wt; float wt;
while (fscanf(fi, "%f", &wt) == 1) { while (fscanf(fi, "%f", &wt) == 1) {
weights.push_back(wt); data.push_back(wt);
} }
if (!silent) { if (!silent) {
printf("loading %s from %s\n", field, fname); utils::Printf("loading %s from %s\n", field, fname);
} }
fclose(fi); fclose(fi);
return true; return true;

View File

@ -8,8 +8,8 @@
#include <vector> #include <vector>
#include <utility> #include <utility>
#include <string> #include <string>
#include <climits>
#include <cmath> #include <cmath>
#include <climits>
#include <algorithm> #include <algorithm>
#include "./evaluation.h" #include "./evaluation.h"
#include "./helper_utils.h" #include "./helper_utils.h"
@ -183,7 +183,7 @@ struct EvalAMS : public IEvaluator {
} }
} }
if (ntop == ndata) { if (ntop == ndata) {
fprintf(stderr, "\tams-ratio=%g", static_cast<float>(thresindex) / ndata); utils::Printf("\tams-ratio=%g", static_cast<float>(thresindex) / ndata);
return static_cast<float>(tams); return static_cast<float>(tams);
} else { } else {
return static_cast<float>(sqrt(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp))); return static_cast<float>(sqrt(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp)));

View File

@ -73,7 +73,7 @@ class EvalSet{
for (size_t i = 0; i < evals_.size(); ++i) { for (size_t i = 0; i < evals_.size(); ++i) {
float res = evals_[i]->Eval(preds, info); float res = evals_[i]->Eval(preds, info);
char tmp[1024]; char tmp[1024];
snprintf(tmp, sizeof(tmp), "\t%s-%s:%f", evname, evals_[i]->Name(), res); utils::SPrintf(tmp, sizeof(tmp), "\t%s-%s:%f", evname, evals_[i]->Name(), res);
result += tmp; result += tmp;
} }
return result; return result;

View File

@ -7,6 +7,7 @@
*/ */
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <cmath>
#include <algorithm> #include <algorithm>
namespace xgboost { namespace xgboost {
namespace learner { namespace learner {

View File

@ -63,14 +63,14 @@ class BoostLearner {
} }
char str_temp[25]; char str_temp[25];
if (num_feature > mparam.num_feature) { if (num_feature > mparam.num_feature) {
snprintf(str_temp, sizeof(str_temp), "%u", num_feature); utils::SPrintf(str_temp, sizeof(str_temp), "%u", num_feature);
this->SetParam("bst:num_feature", str_temp); this->SetParam("bst:num_feature", str_temp);
} }
snprintf(str_temp, sizeof(str_temp), "%lu", utils::SPrintf(str_temp, sizeof(str_temp), "%lu",
static_cast<unsigned long>(buffer_size)); static_cast<unsigned long>(buffer_size));
this->SetParam("num_pbuffer", str_temp); this->SetParam("num_pbuffer", str_temp);
if (!silent) { if (!silent) {
printf("buffer_size=%ld\n", static_cast<long>(buffer_size)); utils::Printf("buffer_size=%ld\n", static_cast<long>(buffer_size));
} }
} }
/*! /*!
@ -183,7 +183,7 @@ class BoostLearner {
const std::vector<std::string> &evname) { const std::vector<std::string> &evname) {
std::string res; std::string res;
char tmp[256]; char tmp[256];
snprintf(tmp, sizeof(tmp), "[%d]", iter); utils::SPrintf(tmp, sizeof(tmp), "[%d]", iter);
res = tmp; res = tmp;
for (size_t i = 0; i < evals.size(); ++i) { for (size_t i = 0; i < evals.size(); ++i) {
this->PredictRaw(*evals[i], &preds_); this->PredictRaw(*evals[i], &preds_);
@ -212,11 +212,14 @@ class BoostLearner {
* \param data input data * \param data input data
* \param output_margin whether to only predict margin value instead of transformed prediction * \param output_margin whether to only predict margin value instead of transformed prediction
* \param out_preds output vector that stores the prediction * \param out_preds output vector that stores the prediction
* \param ntree_limit limit number of trees used for boosted tree
* predictor, when it equals 0, this means we are using all the trees
*/ */
inline void Predict(const DMatrix &data, inline void Predict(const DMatrix &data,
bool output_margin, bool output_margin,
std::vector<float> *out_preds) const { std::vector<float> *out_preds,
this->PredictRaw(data, out_preds); unsigned ntree_limit = 0) const {
this->PredictRaw(data, out_preds, ntree_limit);
if (!output_margin) { if (!output_margin) {
obj_->PredTransform(out_preds); obj_->PredTransform(out_preds);
} }
@ -246,11 +249,14 @@ class BoostLearner {
* \brief get un-transformed prediction * \brief get un-transformed prediction
* \param data training data matrix * \param data training data matrix
* \param out_preds output vector that stores the prediction * \param out_preds output vector that stores the prediction
* \param ntree_limit limit number of trees used for boosted tree
* predictor, when it equals 0, this means we are using all the trees
*/ */
inline void PredictRaw(const DMatrix &data, inline void PredictRaw(const DMatrix &data,
std::vector<float> *out_preds) const { std::vector<float> *out_preds,
unsigned ntree_limit = 0) const {
gbm_->Predict(data.fmat(), this->FindBufferOffset(data), gbm_->Predict(data.fmat(), this->FindBufferOffset(data),
data.info.info, out_preds); data.info.info, out_preds, ntree_limit);
// add base margin // add base margin
std::vector<float> &preds = *out_preds; std::vector<float> &preds = *out_preds;
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size()); const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());

View File

@ -6,9 +6,9 @@
* \author Tianqi Chen, Kailong Chen * \author Tianqi Chen, Kailong Chen
*/ */
#include <vector> #include <vector>
#include <cmath>
#include <algorithm> #include <algorithm>
#include <utility> #include <utility>
#include <cmath>
#include <functional> #include <functional>
#include "../data.h" #include "../data.h"
#include "./objective.h" #include "./objective.h"
@ -37,7 +37,7 @@ struct LossType {
case kLogisticRaw: case kLogisticRaw:
case kLinearSquare: return x; case kLinearSquare: return x;
case kLogisticClassify: case kLogisticClassify:
case kLogisticNeglik: return 1.0f / (1.0f + expf(-x)); case kLogisticNeglik: return 1.0f / (1.0f + std::exp(-x));
default: utils::Error("unknown loss_type"); return 0.0f; default: utils::Error("unknown loss_type"); return 0.0f;
} }
} }
@ -50,7 +50,7 @@ struct LossType {
inline float FirstOrderGradient(float predt, float label) const { inline float FirstOrderGradient(float predt, float label) const {
switch (loss_type) { switch (loss_type) {
case kLinearSquare: return predt - label; case kLinearSquare: return predt - label;
case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt)); case kLogisticRaw: predt = 1.0f / (1.0f + std::exp(-predt));
case kLogisticClassify: case kLogisticClassify:
case kLogisticNeglik: return predt - label; case kLogisticNeglik: return predt - label;
default: utils::Error("unknown loss_type"); return 0.0f; default: utils::Error("unknown loss_type"); return 0.0f;
@ -65,7 +65,7 @@ struct LossType {
inline float SecondOrderGradient(float predt, float label) const { inline float SecondOrderGradient(float predt, float label) const {
switch (loss_type) { switch (loss_type) {
case kLinearSquare: return 1.0f; case kLinearSquare: return 1.0f;
case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt)); case kLogisticRaw: predt = 1.0f / (1.0f + std::exp(-predt));
case kLogisticClassify: case kLogisticClassify:
case kLogisticNeglik: return predt * (1 - predt); case kLogisticNeglik: return predt * (1 - predt);
default: utils::Error("unknown loss_type"); return 0.0f; default: utils::Error("unknown loss_type"); return 0.0f;
@ -80,7 +80,7 @@ struct LossType {
loss_type == kLogisticNeglik ) { loss_type == kLogisticNeglik ) {
utils::Check(base_score > 0.0f && base_score < 1.0f, utils::Check(base_score > 0.0f && base_score < 1.0f,
"base_score must be in (0,1) for logistic loss"); "base_score must be in (0,1) for logistic loss");
base_score = -logf(1.0f / base_score - 1.0f); base_score = -std::log(1.0f / base_score - 1.0f);
} }
return base_score; return base_score;
} }
@ -419,8 +419,8 @@ class LambdaRankObjNDCG : public LambdaRankObj {
for (size_t i = 0; i < pairs.size(); ++i) { for (size_t i = 0; i < pairs.size(); ++i) {
unsigned pos_idx = pairs[i].pos_index; unsigned pos_idx = pairs[i].pos_index;
unsigned neg_idx = pairs[i].neg_index; unsigned neg_idx = pairs[i].neg_index;
float pos_loginv = 1.0f / logf(pos_idx + 2.0f); float pos_loginv = 1.0f / std::log(pos_idx + 2.0f);
float neg_loginv = 1.0f / logf(neg_idx + 2.0f); float neg_loginv = 1.0f / std::log(neg_idx + 2.0f);
int pos_label = static_cast<int>(sorted_list[pos_idx].label); int pos_label = static_cast<int>(sorted_list[pos_idx].label);
int neg_label = static_cast<int>(sorted_list[neg_idx].label); int neg_label = static_cast<int>(sorted_list[neg_idx].label);
float original = float original =
@ -438,7 +438,7 @@ class LambdaRankObjNDCG : public LambdaRankObj {
for (size_t i = 0; i < labels.size(); ++i) { for (size_t i = 0; i < labels.size(); ++i) {
const unsigned rel = static_cast<unsigned>(labels[i]); const unsigned rel = static_cast<unsigned>(labels[i]);
if (rel != 0) { if (rel != 0) {
sumdcg += ((1 << rel) - 1) / logf(static_cast<float>(i + 2)); sumdcg += ((1 << rel) - 1) / std::log(static_cast<float>(i + 2));
} }
} }
return static_cast<float>(sumdcg); return static_cast<float>(sumdcg);

View File

@ -302,11 +302,11 @@ struct SplitEntry{
* \param loss_chg the loss reduction get through the split * \param loss_chg the loss reduction get through the split
* \param split_index the feature index where the split is on * \param split_index the feature index where the split is on
*/ */
inline bool NeedReplace(bst_float loss_chg, unsigned split_index) const { inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {
if (this->split_index() <= split_index) { if (this->split_index() <= split_index) {
return loss_chg > this->loss_chg; return new_loss_chg > this->loss_chg;
} else { } else {
return !(this->loss_chg > loss_chg); return !(this->loss_chg > new_loss_chg);
} }
} }
/*! /*!
@ -332,13 +332,13 @@ struct SplitEntry{
* \param default_left whether the missing value goes to left * \param default_left whether the missing value goes to left
* \return whether the proposed split is better and can replace current split * \return whether the proposed split is better and can replace current split
*/ */
inline bool Update(bst_float loss_chg, unsigned split_index, inline bool Update(bst_float new_loss_chg, unsigned split_index,
float split_value, bool default_left) { float new_split_value, bool default_left) {
if (this->NeedReplace(loss_chg, split_index)) { if (this->NeedReplace(new_loss_chg, split_index)) {
this->loss_chg = loss_chg; this->loss_chg = new_loss_chg;
if (default_left) split_index |= (1U << 31); if (default_left) split_index |= (1U << 31);
this->sindex = split_index; this->sindex = split_index;
this->split_value = split_value; this->split_value = new_split_value;
return true; return true;
} else { } else {
return false; return false;

View File

@ -1,6 +1,7 @@
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE #define _CRT_SECURE_NO_DEPRECATE
#include <cstring> #include <cstring>
using namespace std;
#include "./updater.h" #include "./updater.h"
#include "./updater_prune-inl.hpp" #include "./updater_prune-inl.hpp"
#include "./updater_refresh-inl.hpp" #include "./updater_refresh-inl.hpp"

View File

@ -63,8 +63,8 @@ class TreePruner: public IUpdater {
} }
} }
if (silent == 0) { if (silent == 0) {
printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n", utils::Printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n",
tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth()); tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth());
} }
} }

View File

@ -26,14 +26,14 @@ class TreeRefresher: public IUpdater {
virtual void Update(const std::vector<bst_gpair> &gpair, virtual void Update(const std::vector<bst_gpair> &gpair,
IFMatrix *p_fmat, IFMatrix *p_fmat,
const BoosterInfo &info, const BoosterInfo &info,
const std::vector<RegTree*> &trees) { const std::vector<RegTree*> &trees) {
if (trees.size() == 0) return; if (trees.size() == 0) return;
// number of threads // number of threads
int nthread;
// thread temporal space // thread temporal space
std::vector< std::vector<TStats> > stemp; std::vector< std::vector<TStats> > stemp;
std::vector<RegTree::FVec> fvec_temp; std::vector<RegTree::FVec> fvec_temp;
// setup temp space for each thread // setup temp space for each thread
int nthread;
#pragma omp parallel #pragma omp parallel
{ {
nthread = omp_get_num_threads(); nthread = omp_get_num_threads();
@ -127,8 +127,6 @@ class TreeRefresher: public IUpdater {
this->Refresh(gstats, tree[nid].cright(), p_tree); this->Refresh(gstats, tree[nid].cright(), p_tree);
} }
} }
// number of thread in the data
int nthread;
// training parameter // training parameter
TrainParam param; TrainParam param;
}; };

View File

@ -100,12 +100,10 @@ class ISeekStream: public IStream {
/*! \brief implementation of file i/o stream */ /*! \brief implementation of file i/o stream */
class FileStream : public ISeekStream { class FileStream : public ISeekStream {
public: public:
explicit FileStream(FILE *fp) : fp(fp) {}
explicit FileStream(void) { explicit FileStream(void) {
this->fp = NULL; this->fp = NULL;
} }
explicit FileStream(FILE *fp) {
this->fp = fp;
}
virtual size_t Read(void *ptr, size_t size) { virtual size_t Read(void *ptr, size_t size) {
return fread(ptr, size, 1, fp); return fread(ptr, size, 1, fp);
} }

View File

@ -163,7 +163,7 @@ struct SparseCSRFileBuilder {
fo->Write(rptr); fo->Write(rptr);
// setup buffer space // setup buffer space
buffer_rptr.resize(rptr.size()); buffer_rptr.resize(rptr.size());
buffer.reserve(buffer_size); buffer_temp.reserve(buffer_size);
buffer_data.resize(buffer_size); buffer_data.resize(buffer_size);
saved_offset.clear(); saved_offset.clear();
saved_offset.resize(rptr.size() - 1, 0); saved_offset.resize(rptr.size() - 1, 0);

View File

@ -9,13 +9,8 @@
#include <omp.h> #include <omp.h>
#else #else
#ifndef DISABLE_OPENMP #ifndef DISABLE_OPENMP
#ifndef _MSC_VER // use pragma message instead of warning
#warning "OpenMP is not available, compile to single thread code."\ #pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single-thread code. Use OpenMP-enabled compiler to get benefit of multi-threading")
"You may want to ungrade your compiler to enable OpenMP support,"\
"to get benefit of multi-threading."
#else
// TODO add warning for msvc
#endif
#endif #endif
inline int omp_get_thread_num() { return 0; } inline int omp_get_thread_num() { return 0; }
inline int omp_get_num_threads() { return 1; } inline int omp_get_num_threads() { return 1; }

View File

@ -16,30 +16,21 @@
/*! namespace of PRNG */ /*! namespace of PRNG */
namespace xgboost { namespace xgboost {
namespace random { namespace random {
#ifndef XGBOOST_CUSTOMIZE_PRNG_
/*! \brief seed the PRNG */ /*! \brief seed the PRNG */
inline void Seed(uint32_t seed) { inline void Seed(unsigned seed) {
srand(seed); srand(seed);
} }
/*! \brief return a real number uniform in [0,1) */ /*! \brief basic function, uniform */
inline double NextDouble(void) { inline double Uniform(void) {
return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0); return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0);
} }
/*! \brief return a real numer uniform in (0,1) */ /*! \brief return a real numer uniform in (0,1) */
inline double NextDouble2(void) { inline double NextDouble2(void) {
return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0); return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0);
} }
/*! \brief return a random number */
inline uint32_t NextUInt32(void) {
return (uint32_t)rand();
}
/*! \brief return a random number in n */
inline uint32_t NextUInt32(uint32_t n) {
return (uint32_t)floor(NextDouble() * n);
}
/*! \brief return x~N(0,1) */ /*! \brief return x~N(0,1) */
inline double SampleNormal() { inline double Normal(void) {
double x, y, s; double x, y, s;
do { do {
x = 2 * NextDouble2() - 1.0; x = 2 * NextDouble2() - 1.0;
@ -49,22 +40,24 @@ inline double SampleNormal() {
return x * sqrt(-2.0 * log(s) / s); return x * sqrt(-2.0 * log(s) / s);
} }
#else
// include declarations, to be implemented
void Seed(unsigned seed);
double Uniform(void);
double Normal(void);
#endif
/*! \brief return iid x,y ~N(0,1) */ /*! \brief return a real number uniform in [0,1) */
inline void SampleNormal2D(double &xx, double &yy) { inline double NextDouble(void) {
double x, y, s; return Uniform();
do { }
x = 2 * NextDouble2() - 1.0; /*! \brief return a random number in n */
y = 2 * NextDouble2() - 1.0; inline uint32_t NextUInt32(uint32_t n) {
s = x*x + y*y; return (uint32_t)floor(NextDouble() * n);
} while (s >= 1.0 || s == 0.0);
double t = sqrt(-2.0 * log(s) / s);
xx = x * t;
yy = y * t;
} }
/*! \brief return x~N(mu,sigma^2) */ /*! \brief return x~N(mu,sigma^2) */
inline double SampleNormal(double mu, double sigma) { inline double SampleNormal(double mu, double sigma) {
return SampleNormal() * sigma + mu; return Normal() * sigma + mu;
} }
/*! \brief return 1 with probability p, coin flip */ /*! \brief return 1 with probability p, coin flip */
inline int SampleBinary(double p) { inline int SampleBinary(double p) {
@ -90,7 +83,7 @@ struct Random{
inline void Seed(unsigned sd) { inline void Seed(unsigned sd) {
this->rseed = sd; this->rseed = sd;
#if defined(_MSC_VER)||defined(_WIN32) #if defined(_MSC_VER)||defined(_WIN32)
srand(rseed); ::xgboost::random::Seed(sd);
#endif #endif
} }
/*! \brief return a real number uniform in [0,1) */ /*! \brief return a real number uniform in [0,1) */
@ -98,8 +91,8 @@ struct Random{
// use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe // use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe
// For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general // For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general
// todo, replace with another PRNG // todo, replace with another PRNG
#if defined(_MSC_VER)||defined(_WIN32) #if defined(_MSC_VER)||defined(_WIN32)||defined(XGBOOST_STRICT_CXX98_)
return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX) + 1.0); return Uniform();
#else #else
return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0); return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0);
#endif #endif

View File

@ -7,11 +7,18 @@
*/ */
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#include <cstdio> #include <cstdio>
#include <cstdarg>
#include <string> #include <string>
#include <cstdlib> #include <cstdlib>
#include <vector>
#ifndef XGBOOST_STRICT_CXX98_
#include <cstdarg>
#endif
#if !defined(__GNUC__)
#define fopen64 std::fopen
#endif
#ifdef _MSC_VER #ifdef _MSC_VER
#define fopen64 fopen
// NOTE: sprintf_s is not equivalent to snprintf, // NOTE: sprintf_s is not equivalent to snprintf,
// they are equivalent when success, which is sufficient for our case // they are equivalent when success, which is sufficient for our case
#define snprintf sprintf_s #define snprintf sprintf_s
@ -19,16 +26,15 @@
#else #else
#ifdef _FILE_OFFSET_BITS #ifdef _FILE_OFFSET_BITS
#if _FILE_OFFSET_BITS == 32 #if _FILE_OFFSET_BITS == 32
#warning "FILE OFFSET BITS defined to be 32 bit" #pragma message ("Warning: FILE OFFSET BITS defined to be 32 bit")
#endif #endif
#endif #endif
#ifdef __APPLE__ #ifdef __APPLE__
#define off64_t off_t #define off64_t off_t
#define fopen64 fopen #define fopen64 std::fopen
#endif #endif
#define _FILE_OFFSET_BITS 64
extern "C" { extern "C" {
#include <sys/types.h> #include <sys/types.h>
} }
@ -47,10 +53,11 @@ typedef long int64_t;
namespace xgboost { namespace xgboost {
/*! \brief namespace for helper utils of the project */ /*! \brief namespace for helper utils of the project */
namespace utils { namespace utils {
/*! \brief error message buffer length */
const int kErrorBuffer = 1 << 12;
#ifndef XGBOOST_CUSTOMIZE_ERROR_ /*! \brief error message buffer length */
const int kPrintBuffer = 1 << 12;
#ifndef XGBOOST_CUSTOMIZE_MSG_
/*! /*!
* \brief handling of Assert error, caused by in-apropriate input * \brief handling of Assert error, caused by in-apropriate input
* \param msg error message * \param msg error message
@ -67,19 +74,50 @@ inline void HandleCheckError(const char *msg) {
fprintf(stderr, "%s\n", msg); fprintf(stderr, "%s\n", msg);
exit(-1); exit(-1);
} }
inline void HandlePrint(const char *msg) {
printf("%s", msg);
}
#else #else
#ifndef XGBOOST_STRICT_CXX98_
// include declarations, some one must implement this // include declarations, some one must implement this
void HandleAssertError(const char *msg); void HandleAssertError(const char *msg);
void HandleCheckError(const char *msg); void HandleCheckError(const char *msg);
void HandlePrint(const char *msg);
#endif #endif
#endif
#ifdef XGBOOST_STRICT_CXX98_
// these function pointers are to be assigned
extern "C" void (*Printf)(const char *fmt, ...);
extern "C" int (*SPrintf)(char *buf, size_t size, const char *fmt, ...);
extern "C" void (*Assert)(int exp, const char *fmt, ...);
extern "C" void (*Check)(int exp, const char *fmt, ...);
extern "C" void (*Error)(const char *fmt, ...);
#else
/*! \brief printf, print message to the console */
inline void Printf(const char *fmt, ...) {
std::string msg(kPrintBuffer, '\0');
va_list args;
va_start(args, fmt);
vsnprintf(&msg[0], kPrintBuffer, fmt, args);
va_end(args);
HandlePrint(msg.c_str());
}
/*! \brief portable version of snprintf */
inline int SPrintf(char *buf, size_t size, const char *fmt, ...) {
va_list args;
va_start(args, fmt);
int ret = vsnprintf(buf, size, fmt, args);
va_end(args);
return ret;
}
/*! \brief assert an condition is true, use this to handle debug information */ /*! \brief assert an condition is true, use this to handle debug information */
inline void Assert(bool exp, const char *fmt, ...) { inline void Assert(bool exp, const char *fmt, ...) {
if (!exp) { if (!exp) {
std::string msg(kErrorBuffer, '\0'); std::string msg(kPrintBuffer, '\0');
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
vsnprintf(&msg[0], kErrorBuffer, fmt, args); vsnprintf(&msg[0], kPrintBuffer, fmt, args);
va_end(args); va_end(args);
HandleAssertError(msg.c_str()); HandleAssertError(msg.c_str());
} }
@ -88,10 +126,10 @@ inline void Assert(bool exp, const char *fmt, ...) {
/*!\brief same as assert, but this is intended to be used as message for user*/ /*!\brief same as assert, but this is intended to be used as message for user*/
inline void Check(bool exp, const char *fmt, ...) { inline void Check(bool exp, const char *fmt, ...) {
if (!exp) { if (!exp) {
std::string msg(kErrorBuffer, '\0'); std::string msg(kPrintBuffer, '\0');
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
vsnprintf(&msg[0], kErrorBuffer, fmt, args); vsnprintf(&msg[0], kPrintBuffer, fmt, args);
va_end(args); va_end(args);
HandleCheckError(msg.c_str()); HandleCheckError(msg.c_str());
} }
@ -100,14 +138,15 @@ inline void Check(bool exp, const char *fmt, ...) {
/*! \brief report error message, same as check */ /*! \brief report error message, same as check */
inline void Error(const char *fmt, ...) { inline void Error(const char *fmt, ...) {
{ {
std::string msg(kErrorBuffer, '\0'); std::string msg(kPrintBuffer, '\0');
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
vsnprintf(&msg[0], kErrorBuffer, fmt, args); vsnprintf(&msg[0], kPrintBuffer, fmt, args);
va_end(args); va_end(args);
HandleCheckError(msg.c_str()); HandleCheckError(msg.c_str());
} }
} }
#endif
/*! \brief replace fopen, report error when the file open fails */ /*! \brief replace fopen, report error when the file open fails */
inline FILE *FopenCheck(const char *fname, const char *flag) { inline FILE *FopenCheck(const char *fname, const char *flag) {
@ -115,7 +154,25 @@ inline FILE *FopenCheck(const char *fname, const char *flag) {
Check(fp != NULL, "can not open file \"%s\"\n", fname); Check(fp != NULL, "can not open file \"%s\"\n", fname);
return fp; return fp;
} }
} // namespace utils
} // namespace utils // easy utils that can be directly acessed in xgboost
/*! \brief get the beginning address of a vector */
template<typename T>
inline T *BeginPtr(std::vector<T> &vec) {
if (vec.size() == 0) {
return NULL;
} else {
return &vec[0];
}
}
/*! \brief get the beginning address of a vector */
template<typename T>
inline const T *BeginPtr(const std::vector<T> &vec) {
if (vec.size() == 0) {
return NULL;
} else {
return &vec[0];
}
}
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_UTILS_UTILS_H_ #endif // XGBOOST_UTILS_UTILS_H_

View File

@ -50,6 +50,7 @@ class BoostLearnTask{
if (!strcmp("use_buffer", name)) use_buffer = atoi(val); if (!strcmp("use_buffer", name)) use_buffer = atoi(val);
if (!strcmp("num_round", name)) num_round = atoi(val); if (!strcmp("num_round", name)) num_round = atoi(val);
if (!strcmp("pred_margin", name)) pred_margin = atoi(val); if (!strcmp("pred_margin", name)) pred_margin = atoi(val);
if (!strcmp("ntree_limit", name)) ntree_limit = atoi(val);
if (!strcmp("save_period", name)) save_period = atoi(val); if (!strcmp("save_period", name)) save_period = atoi(val);
if (!strcmp("eval_train", name)) eval_train = atoi(val); if (!strcmp("eval_train", name)) eval_train = atoi(val);
if (!strcmp("task", name)) task = val; if (!strcmp("task", name)) task = val;
@ -79,6 +80,7 @@ class BoostLearnTask{
save_period = 0; save_period = 0;
eval_train = 0; eval_train = 0;
pred_margin = 0; pred_margin = 0;
ntree_limit = 0;
dump_model_stats = 0; dump_model_stats = 0;
task = "train"; task = "train";
model_in = "NULL"; model_in = "NULL";
@ -186,7 +188,7 @@ class BoostLearnTask{
inline void TaskPred(void) { inline void TaskPred(void) {
std::vector<float> preds; std::vector<float> preds;
if (!silent) printf("start prediction...\n"); if (!silent) printf("start prediction...\n");
learner.Predict(*data, pred_margin != 0, &preds); learner.Predict(*data, pred_margin != 0, &preds, ntree_limit);
if (!silent) printf("writing prediction to %s\n", name_pred.c_str()); if (!silent) printf("writing prediction to %s\n", name_pred.c_str());
FILE *fo = utils::FopenCheck(name_pred.c_str(), "w"); FILE *fo = utils::FopenCheck(name_pred.c_str(), "w");
for (size_t i = 0; i < preds.size(); i++) { for (size_t i = 0; i < preds.size(); i++) {
@ -217,6 +219,8 @@ class BoostLearnTask{
std::string task; std::string task;
/*! \brief name of predict file */ /*! \brief name of predict file */
std::string name_pred; std::string name_pred;
/*!\brief limit number of trees in prediction */
int ntree_limit;
/*!\brief whether to directly output margin value */ /*!\brief whether to directly output margin value */
int pred_margin; int pred_margin;
/*! \brief whether dump statistics along with model */ /*! \brief whether dump statistics along with model */

View File

@ -365,7 +365,7 @@ class Booster:
return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals)) return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
def eval(self, mat, name = 'eval', it = 0): def eval(self, mat, name = 'eval', it = 0):
return self.eval_set( [(mat,name)], it) return self.eval_set( [(mat,name)], it)
def predict(self, data, output_margin=False): def predict(self, data, output_margin=False, ntree_limit=0):
""" """
predict with data predict with data
Args: Args:
@ -373,12 +373,14 @@ class Booster:
the dmatrix storing the input the dmatrix storing the input
output_margin: bool output_margin: bool
whether output raw margin value that is untransformed whether output raw margin value that is untransformed
ntree_limit: limit number of trees in prediction, default to 0, 0 means using all the trees
Returns: Returns:
numpy array of prediction numpy array of prediction
""" """
length = ctypes.c_ulong() length = ctypes.c_ulong()
preds = xglib.XGBoosterPredict(self.handle, data.handle, preds = xglib.XGBoosterPredict(self.handle, data.handle,
int(output_margin), ctypes.byref(length)) int(output_margin), ntree_limit, ctypes.byref(length))
return ctypes2numpy(preds, length.value, 'float32') return ctypes2numpy(preds, length.value, 'float32')
def save_model(self, fname): def save_model(self, fname):
""" save model to file """ save model to file

View File

@ -6,10 +6,14 @@
#include <string> #include <string>
#include <cstring> #include <cstring>
#include <algorithm> #include <algorithm>
// include all std functions
using namespace std;
#include "./xgboost_wrapper.h" #include "./xgboost_wrapper.h"
#include "../src/data.h" #include "../src/data.h"
#include "../src/learner/learner-inl.hpp" #include "../src/learner/learner-inl.hpp"
#include "../src/io/io.h" #include "../src/io/io.h"
#include "../src/utils/utils.h"
#include "../src/io/simple_dmatrix-inl.hpp" #include "../src/io/simple_dmatrix-inl.hpp"
using namespace xgboost; using namespace xgboost;
@ -25,11 +29,11 @@ class Booster: public learner::BoostLearner {
this->init_model = false; this->init_model = false;
this->SetCacheData(mats); this->SetCacheData(mats);
} }
const float *Pred(const DataMatrix &dmat, int output_margin, bst_ulong *len) { inline const float *Pred(const DataMatrix &dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
this->CheckInitModel(); this->CheckInitModel();
this->Predict(dmat, output_margin != 0, &this->preds_); this->Predict(dmat, output_margin != 0, &this->preds_, ntree_limit);
*len = static_cast<bst_ulong>(this->preds_.size()); *len = static_cast<bst_ulong>(this->preds_.size());
return &this->preds_[0]; return BeginPtr(this->preds_);
} }
inline void BoostOneIter(const DataMatrix &train, inline void BoostOneIter(const DataMatrix &train,
float *grad, float *hess, bst_ulong len) { float *grad, float *hess, bst_ulong len) {
@ -57,7 +61,7 @@ class Booster: public learner::BoostLearner {
model_dump_cptr[i] = model_dump[i].c_str(); model_dump_cptr[i] = model_dump[i].c_str();
} }
*len = static_cast<bst_ulong>(model_dump.size()); *len = static_cast<bst_ulong>(model_dump.size());
return &model_dump_cptr[0]; return BeginPtr(model_dump_cptr);
} }
// temporal fields // temporal fields
// temporal data to save evaluation dump // temporal data to save evaluation dump
@ -174,13 +178,13 @@ extern "C"{
std::vector<float> &vec = std::vector<float> &vec =
static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field); static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
vec.resize(len); vec.resize(len);
memcpy(&vec[0], info, sizeof(float) * len); memcpy(BeginPtr(vec), info, sizeof(float) * len);
} }
void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) { void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) {
std::vector<unsigned> &vec = std::vector<unsigned> &vec =
static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field); static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
vec.resize(len); vec.resize(len);
memcpy(&vec[0], info, sizeof(unsigned) * len); memcpy(BeginPtr(vec), info, sizeof(unsigned) * len);
} }
void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) { void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) {
DataMatrix *pmat = static_cast<DataMatrix*>(handle); DataMatrix *pmat = static_cast<DataMatrix*>(handle);
@ -194,13 +198,13 @@ extern "C"{
const std::vector<float> &vec = const std::vector<float> &vec =
static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field); static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field);
*len = static_cast<bst_ulong>(vec.size()); *len = static_cast<bst_ulong>(vec.size());
return &vec[0]; return BeginPtr(vec);
} }
const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) { const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) {
const std::vector<unsigned> &vec = const std::vector<unsigned> &vec =
static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field); static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
*len = static_cast<bst_ulong>(vec.size()); *len = static_cast<bst_ulong>(vec.size());
return &vec[0]; return BeginPtr(vec);
} }
bst_ulong XGDMatrixNumRow(const void *handle) { bst_ulong XGDMatrixNumRow(const void *handle) {
return static_cast<bst_ulong>(static_cast<const DataMatrix*>(handle)->info.num_row()); return static_cast<bst_ulong>(static_cast<const DataMatrix*>(handle)->info.num_row());
@ -249,8 +253,8 @@ extern "C"{
bst->eval_str = bst->EvalOneIter(iter, mats, names); bst->eval_str = bst->EvalOneIter(iter, mats, names);
return bst->eval_str.c_str(); return bst->eval_str.c_str();
} }
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len) { const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len); return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, ntree_limit, len);
} }
void XGBoosterLoadModel(void *handle, const char *fname) { void XGBoosterLoadModel(void *handle, const char *fname) {
static_cast<Booster*>(handle)->LoadModel(fname); static_cast<Booster*>(handle)->LoadModel(fname);

View File

@ -165,9 +165,11 @@ extern "C" {
* \param handle handle * \param handle handle
* \param dmat data matrix * \param dmat data matrix
* \param output_margin whether only output raw margin value * \param output_margin whether only output raw margin value
* \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
* when the parameter is set to 0, we will use all the trees
* \param len used to store length of returning result * \param len used to store length of returning result
*/ */
XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len); XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len);
/*! /*!
* \brief load model from existing file * \brief load model from existing file
* \param handle handle * \param handle handle