t push origin unityMerge branch 'master' into unity
This commit is contained in:
commit
76c513b191
5
.gitignore
vendored
5
.gitignore
vendored
@ -6,12 +6,15 @@
|
||||
# Compiled Dynamic libraries
|
||||
*.so
|
||||
*.dylib
|
||||
|
||||
*.page
|
||||
# Compiled Static libraries
|
||||
*.lai
|
||||
*.la
|
||||
*.a
|
||||
*~
|
||||
*.Rcheck
|
||||
*.rds
|
||||
*.tar.gz
|
||||
*txt*
|
||||
*conf
|
||||
*buffer
|
||||
|
||||
36
Makefile
36
Makefile
@ -15,7 +15,7 @@ BIN = xgboost
|
||||
OBJ = updater.o gbm.o io.o
|
||||
SLIB = wrapper/libxgboostwrapper.so
|
||||
|
||||
.PHONY: clean all python
|
||||
.PHONY: clean all python Rpack
|
||||
|
||||
all: $(BIN) $(OBJ) $(SLIB)
|
||||
|
||||
@ -40,19 +40,25 @@ $(OBJ) :
|
||||
install:
|
||||
cp -f -r $(BIN) $(INSTALL_PATH)
|
||||
|
||||
R-package.tar.gz:
|
||||
rm -rf xgboost-R
|
||||
cp -r R-package xgboost-R
|
||||
rm -rf xgboost-R/src/*.o xgboost-R/src/*.so xgboost-R/src/*.dll
|
||||
cp -r src xgboost-R/src/src
|
||||
mkdir xgboost-R/src/wrapper
|
||||
cp wrapper/xgboost_wrapper.h xgboost-R/src/wrapper
|
||||
cp wrapper/xgboost_wrapper.cpp xgboost-R/src/wrapper
|
||||
cp ./LICENSE xgboost-R
|
||||
cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost-R/src/Makevars
|
||||
cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost-R/src/Makevars.win
|
||||
tar czf $@ xgboost-R
|
||||
rm -rf xgboost-R
|
||||
Rpack:
|
||||
make clean
|
||||
rm -rf xgboost xgboost*.tar.gz
|
||||
cp -r R-package xgboost
|
||||
rm -rf xgboost/inst/examples/*.buffer
|
||||
rm -rf xgboost/inst/examples/*.model
|
||||
rm -rf xgboost/inst/examples/dump*
|
||||
rm -rf xgboost/src/*.o xgboost/src/*.so xgboost/src/*.dll
|
||||
rm -rf xgboost/demo/*.model xgboost/demo/*.buffer
|
||||
cp -r src xgboost/src/src
|
||||
mkdir xgboost/src/wrapper
|
||||
cp wrapper/xgboost_wrapper.h xgboost/src/wrapper
|
||||
cp wrapper/xgboost_wrapper.cpp xgboost/src/wrapper
|
||||
cp ./LICENSE xgboost
|
||||
cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars
|
||||
cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.win
|
||||
R CMD build xgboost
|
||||
rm -rf xgboost
|
||||
R CMD check --as-cran xgboost*.tar.gz
|
||||
|
||||
clean:
|
||||
$(RM) $(OBJ) $(BIN) $(SLIB) *.o *~ */*~ */*/*~
|
||||
$(RM) $(OBJ) $(BIN) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~
|
||||
|
||||
@ -1,12 +1,20 @@
|
||||
Package: xgboost
|
||||
Type: Package
|
||||
Title: R wrapper of xgboost
|
||||
Title: eXtreme Gradient Boosting
|
||||
Version: 0.3-0
|
||||
Date: 2014-08-23
|
||||
Author: Tianqi Chen, Tong He
|
||||
Maintainer: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>
|
||||
Description: xgboost
|
||||
License: file LICENSE
|
||||
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>
|
||||
Maintainer: Tong He <hetong007@gmail.com>
|
||||
Description: This package is a R wrapper of xgboost, which is short for eXtreme
|
||||
Gradient Boosting. It is an efficient and scalable implementation of
|
||||
gradient boosting framework. The package includes efficient linear model
|
||||
solver and tree learning algorithm. The package can automatically do
|
||||
parallel computation with OpenMP, and it can be more than 10 times faster
|
||||
than existing gradient boosting packages such as gbm. It supports various
|
||||
objective functions, including regression, classification and ranking. The
|
||||
package is made to be extensible, so that user are also allowed to define
|
||||
their own objectives easily.
|
||||
License: Apache License (== 2.0) | file LICENSE
|
||||
URL: https://github.com/tqchen/xgboost
|
||||
BugReports: https://github.com/tqchen/xgboost/issues
|
||||
Depends:
|
||||
|
||||
@ -10,5 +10,6 @@ export(xgb.save)
|
||||
export(xgb.train)
|
||||
export(xgboost)
|
||||
exportMethods(predict)
|
||||
import(methods)
|
||||
importClassesFrom(Matrix,dgCMatrix)
|
||||
importClassesFrom(Matrix,dgeMatrix)
|
||||
|
||||
@ -4,20 +4,23 @@ setClass('xgb.DMatrix')
|
||||
#'
|
||||
#' Get information of an xgb.DMatrix object
|
||||
#'
|
||||
#' @param object Object of class "xgb.DMatrix"
|
||||
#' @param name the name of the field to get
|
||||
#'
|
||||
#' @examples
|
||||
#' data(iris)
|
||||
#' iris[,5] <- as.numeric(iris[,5])
|
||||
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
|
||||
#' labels <- getinfo(dtrain, "label")
|
||||
#' @rdname getinfo
|
||||
#' @export
|
||||
#'
|
||||
getinfo <- function(object, ...){
|
||||
UseMethod("getinfo")
|
||||
}
|
||||
|
||||
#' @param object Object of class "xgb.DMatrix"
|
||||
#' @param name the name of the field to get
|
||||
#' @param ... other parameters
|
||||
#' @rdname getinfo
|
||||
#' @method getinfo xgb.DMatrix
|
||||
setMethod("getinfo", signature = "xgb.DMatrix",
|
||||
definition = function(object, name) {
|
||||
if (typeof(name) != "character") {
|
||||
|
||||
@ -11,7 +11,8 @@ setClass("xgb.Booster")
|
||||
#' value of sum of functions, when outputmargin=TRUE, the prediction is
|
||||
#' untransformed margin value. In logistic regression, outputmargin=T will
|
||||
#' output value before logistic transformation.
|
||||
#'
|
||||
#' @param ntreelimit limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear.
|
||||
#' set it to be value bigger than 0. It will use all trees by default.
|
||||
#' @examples
|
||||
#' data(iris)
|
||||
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
|
||||
@ -19,11 +20,18 @@ setClass("xgb.Booster")
|
||||
#' @export
|
||||
#'
|
||||
setMethod("predict", signature = "xgb.Booster",
|
||||
definition = function(object, newdata, outputmargin = FALSE) {
|
||||
definition = function(object, newdata, outputmargin = FALSE, ntreelimit = NULL) {
|
||||
if (class(newdata) != "xgb.DMatrix") {
|
||||
newdata <- xgb.DMatrix(newdata)
|
||||
}
|
||||
ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), PACKAGE = "xgboost")
|
||||
if (is.null(ntreelimit)) {
|
||||
ntreelimit <- 0
|
||||
} else {
|
||||
if (ntreelimit < 1){
|
||||
stop("predict: ntreelimit must be equal to or greater than 1")
|
||||
}
|
||||
}
|
||||
ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), as.integer(ntreelimit), PACKAGE = "xgboost")
|
||||
return(ret)
|
||||
})
|
||||
|
||||
|
||||
@ -6,22 +6,25 @@ setClass('xgb.DMatrix')
|
||||
#' Get a new DMatrix containing the specified rows of
|
||||
#' orginal xgb.DMatrix object
|
||||
#'
|
||||
#' @param object Object of class "xgb.DMatrix"
|
||||
#' @param idxset a integer vector of indices of rows needed
|
||||
#'
|
||||
#' @examples
|
||||
#' data(iris)
|
||||
#' iris[,5] <- as.numeric(iris[,5])
|
||||
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
|
||||
#' dsub <- slice(dtrain, 1:3)
|
||||
#' @rdname slice
|
||||
#' @export
|
||||
#'
|
||||
slice <- function(object, ...){
|
||||
UseMethod("slice")
|
||||
}
|
||||
|
||||
#' @param object Object of class "xgb.DMatrix"
|
||||
#' @param idxset a integer vector of indices of rows needed
|
||||
#' @param ... other parameters
|
||||
#' @rdname slice
|
||||
#' @method slice xgb.DMatrix
|
||||
setMethod("slice", signature = "xgb.DMatrix",
|
||||
definition = function(object, idxset) {
|
||||
definition = function(object, idxset, ...) {
|
||||
if (class(object) != "xgb.DMatrix") {
|
||||
stop("slice: first argument dtrain must be xgb.DMatrix")
|
||||
}
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
#' @importClassesFrom Matrix dgCMatrix dgeMatrix
|
||||
#' @import methods
|
||||
|
||||
# depends on matrix
|
||||
.onLoad <- function(libname, pkgname) {
|
||||
@ -48,7 +49,6 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
||||
}
|
||||
}
|
||||
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost")
|
||||
.Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE = "xgboost")
|
||||
if (length(params) != 0) {
|
||||
for (i in 1:length(params)) {
|
||||
p <- params[i]
|
||||
@ -121,8 +121,8 @@ xgb.iter.eval <- function(booster, watchlist, iter) {
|
||||
stop("xgb.eval: watch list can only contain xgb.DMatrix")
|
||||
}
|
||||
}
|
||||
evnames <- list()
|
||||
if (length(watchlist) != 0) {
|
||||
evnames <- list()
|
||||
for (i in 1:length(watchlist)) {
|
||||
w <- watchlist[i]
|
||||
if (length(names(w)) == 0) {
|
||||
@ -130,8 +130,10 @@ xgb.iter.eval <- function(booster, watchlist, iter) {
|
||||
}
|
||||
evnames <- append(evnames, names(w))
|
||||
}
|
||||
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
|
||||
evnames, PACKAGE = "xgboost")
|
||||
} else {
|
||||
msg <- ""
|
||||
}
|
||||
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
|
||||
evnames, PACKAGE = "xgboost")
|
||||
return(msg)
|
||||
}
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
#'
|
||||
#' Save xgb.DMatrix object to binary file
|
||||
#'
|
||||
#' @param model the model object.
|
||||
#' @param DMatrix the model object.
|
||||
#' @param fname the name of the binary file.
|
||||
#'
|
||||
#' @examples
|
||||
|
||||
@ -4,9 +4,12 @@
|
||||
#'
|
||||
#' @param model the model object.
|
||||
#' @param fname the name of the binary file.
|
||||
#' @param fmap feature map file representing the type of feature, to make it
|
||||
#' look nice, run demo/demo.R for result and demo/featmap.txt for example
|
||||
#' Format: https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model
|
||||
#' @param fmap feature map file representing the type of feature.
|
||||
#' Detailed description could be found at
|
||||
#' \url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}.
|
||||
#' Run inst/examples/demo.R for the result and inst/examples/featmap.txt
|
||||
#' for example Format.
|
||||
#'
|
||||
#'
|
||||
#' @examples
|
||||
#' data(iris)
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
#' }
|
||||
#'
|
||||
#' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
||||
#' further details. See also demo/demo.R for walkthrough example in R.
|
||||
#' further details. See also inst/examples/demo.R for walkthrough example in R.
|
||||
#' @param dtrain takes an \code{xgb.DMatrix} as the input.
|
||||
#' @param nrounds the max number of iterations
|
||||
#' @param watchlist what information should be printed when \code{verbose=1} or
|
||||
@ -24,10 +24,11 @@
|
||||
#' watchlist=list(validation1=mat1, validation2=mat2) to watch
|
||||
#' the performance of each round's model on mat1 and mat2
|
||||
#'
|
||||
#' @param obj customized objective function. Given prediction and dtrain,
|
||||
#' return gradient and second order gradient.
|
||||
#' @param feval custimized evaluation function. Given prediction and dtrain,
|
||||
#' return a \code{list(metric='metric-name', value='metric-value')}.
|
||||
#' @param obj customized objective function. Returns gradient and second order
|
||||
#' gradient with given prediction and dtrain,
|
||||
#' @param feval custimized evaluation function. Returns
|
||||
#' \code{list(metric='metric-name', value='metric-value')} with given
|
||||
#' prediction and dtrain,
|
||||
#' @param ... other parameters to pass to \code{params}.
|
||||
#'
|
||||
#' @details
|
||||
|
||||
@ -19,7 +19,7 @@
|
||||
#' }
|
||||
#'
|
||||
#' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
||||
#' further details. See also demo/demo.R for walkthrough example in R.
|
||||
#' further details. See also inst/examples/demo.R for walkthrough example in R.
|
||||
#' @param nrounds the max number of iterations
|
||||
#' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print
|
||||
#' information of performance. If 2, xgboost will print information of both
|
||||
|
||||
@ -1,10 +1,21 @@
|
||||
This is subfolder for experimental version of R package.
|
||||
# R package for xgboost.
|
||||
|
||||
Installation:
|
||||
## Installation
|
||||
|
||||
For up-to-date version(which is recommended), please install from github. Windows user will need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first.
|
||||
|
||||
```r
|
||||
require(devtools)
|
||||
install_github('xgboost','tqchen',subdir='R-package')
|
||||
```
|
||||
|
||||
Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/demo/demo.R) for more details.
|
||||
For stable version on CRAN, please run
|
||||
|
||||
```r
|
||||
install.packages('xgboost')
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
* Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/inst/examples/demo.R) for walk throughe example.
|
||||
* See also the [example scripts](https://github.com/tqchen/xgboost/tree/master/demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/speedtest.R) on this dataset.
|
||||
|
||||
@ -1 +0,0 @@
|
||||
demo R code for xgboost usages on agaricus data
|
||||
@ -1,14 +1,20 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
\docType{methods}
|
||||
\name{getinfo}
|
||||
\alias{getinfo}
|
||||
\alias{getinfo,xgb.DMatrix-method}
|
||||
\title{Get information of an xgb.DMatrix object}
|
||||
\usage{
|
||||
getinfo(object, ...)
|
||||
|
||||
\S4method{getinfo}{xgb.DMatrix}(object, name)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class "xgb.DMatrix"}
|
||||
|
||||
\item{name}{the name of the field to get}
|
||||
|
||||
\item{...}{other parameters}
|
||||
}
|
||||
\description{
|
||||
Get information of an xgb.DMatrix object
|
||||
|
||||
@ -4,7 +4,8 @@
|
||||
\alias{predict,xgb.Booster-method}
|
||||
\title{Predict method for eXtreme Gradient Boosting model}
|
||||
\usage{
|
||||
\S4method{predict}{xgb.Booster}(object, newdata, outputmargin = FALSE)
|
||||
\S4method{predict}{xgb.Booster}(object, newdata, outputmargin = FALSE,
|
||||
ntreelimit = NULL)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class "xgb.Boost"}
|
||||
@ -13,9 +14,12 @@
|
||||
\code{xgb.DMatrix}.}
|
||||
|
||||
\item{outputmargin}{whether the prediction should be shown in the original
|
||||
value of sum of functions, when outputmargin=TRUE, the prediction is
|
||||
untransformed margin value. In logistic regression, outputmargin=T will
|
||||
output value before logistic transformation.}
|
||||
value of sum of functions, when outputmargin=TRUE, the prediction is
|
||||
untransformed margin value. In logistic regression, outputmargin=T will
|
||||
output value before logistic transformation.}
|
||||
|
||||
\item{ntreelimit}{limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear.
|
||||
set it to be value bigger than 0. It will use all trees by default.}
|
||||
}
|
||||
\description{
|
||||
Predicted values based on xgboost model object.
|
||||
|
||||
@ -1,15 +1,21 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
\docType{methods}
|
||||
\name{slice}
|
||||
\alias{slice}
|
||||
\alias{slice,xgb.DMatrix-method}
|
||||
\title{Get a new DMatrix containing the specified rows of
|
||||
orginal xgb.DMatrix object}
|
||||
\usage{
|
||||
slice(object, ...)
|
||||
|
||||
\S4method{slice}{xgb.DMatrix}(object, idxset, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class "xgb.DMatrix"}
|
||||
|
||||
\item{idxset}{a integer vector of indices of rows needed}
|
||||
|
||||
\item{...}{other parameters}
|
||||
}
|
||||
\description{
|
||||
Get a new DMatrix containing the specified rows of
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
xgb.DMatrix.save(DMatrix, fname)
|
||||
}
|
||||
\arguments{
|
||||
\item{model}{the model object.}
|
||||
\item{DMatrix}{the model object.}
|
||||
|
||||
\item{fname}{the name of the binary file.}
|
||||
}
|
||||
|
||||
@ -10,9 +10,11 @@ xgb.dump(model, fname, fmap = "")
|
||||
|
||||
\item{fname}{the name of the binary file.}
|
||||
|
||||
\item{fmap}{feature map file representing the type of feature, to make it
|
||||
look nice, run demo/demo.R for result and demo/featmap.txt for example
|
||||
Format: https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}
|
||||
\item{fmap}{feature map file representing the type of feature.
|
||||
Detailed description could be found at
|
||||
\url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}.
|
||||
Run inst/examples/demo.R for the result and inst/examples/featmap.txt
|
||||
for example Format.}
|
||||
}
|
||||
\description{
|
||||
Save a xgboost model to text file. Could be parsed later.
|
||||
|
||||
@ -20,7 +20,7 @@ xgb.train(params = list(), dtrain, nrounds, watchlist = list(),
|
||||
}
|
||||
|
||||
See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
||||
further details. See also demo/demo.R for walkthrough example in R.}
|
||||
further details. See also inst/examples/demo.R for walkthrough example in R.}
|
||||
|
||||
\item{dtrain}{takes an \code{xgb.DMatrix} as the input.}
|
||||
|
||||
@ -32,11 +32,12 @@ xgb.train(params = list(), dtrain, nrounds, watchlist = list(),
|
||||
watchlist=list(validation1=mat1, validation2=mat2) to watch
|
||||
the performance of each round's model on mat1 and mat2}
|
||||
|
||||
\item{obj}{customized objective function. Given prediction and dtrain,
|
||||
return gradient and second order gradient.}
|
||||
\item{obj}{customized objective function. Returns gradient and second order
|
||||
gradient with given prediction and dtrain,}
|
||||
|
||||
\item{feval}{custimized evaluation function. Given prediction and dtrain,
|
||||
return a \code{list(metric='metric-name', value='metric-value')}.}
|
||||
\item{feval}{custimized evaluation function. Returns
|
||||
\code{list(metric='metric-name', value='metric-value')} with given
|
||||
prediction and dtrain,}
|
||||
|
||||
\item{...}{other parameters to pass to \code{params}.}
|
||||
}
|
||||
|
||||
@ -25,7 +25,7 @@ xgboost(data = NULL, label = NULL, params = list(), nrounds,
|
||||
}
|
||||
|
||||
See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
||||
further details. See also demo/demo.R for walkthrough example in R.}
|
||||
further details. See also inst/examples/demo.R for walkthrough example in R.}
|
||||
|
||||
\item{nrounds}{the max number of iterations}
|
||||
|
||||
|
||||
@ -1,32 +1,7 @@
|
||||
# package root
|
||||
PKGROOT=../../
|
||||
# _*_ mode: Makefile; _*_
|
||||
CXX=`R CMD config CXX`
|
||||
TCFLAGS=`R CMD config CFLAGS`
|
||||
# expose these flags to R CMD SHLIB
|
||||
PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS)
|
||||
PKG_CPPFLAGS+= $(SHLIB_PTHREAD_FLAGS)
|
||||
XGBFLAG= $(TCFLAGS) -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
|
||||
|
||||
ifeq ($(no_omp),1)
|
||||
PKG_CPPFLAGS += -DDISABLE_OPENMP
|
||||
endif
|
||||
|
||||
CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o
|
||||
OBJECTS= xgboost_R.o $(CXXOBJ)
|
||||
|
||||
.PHONY: all clean
|
||||
all: $(SHLIB)
|
||||
$(SHLIB): $(OBJECTS)
|
||||
|
||||
xgboost_wrapper.o: $(PKGROOT)/wrapper/xgboost_wrapper.cpp
|
||||
xgboost_io.o: $(PKGROOT)/src/io/io.cpp
|
||||
xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp
|
||||
xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp
|
||||
|
||||
$(CXXOBJ) :
|
||||
$(CXX) -c $(XGBFLAG) -o $@ $(firstword $(filter %.cpp %.c, $^) )
|
||||
|
||||
clean:
|
||||
rm -rf *.so *.o *~ *.dll
|
||||
|
||||
PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -I$(PKGROOT)
|
||||
PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS)
|
||||
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
|
||||
OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o
|
||||
|
||||
@ -1,33 +1,7 @@
|
||||
# package root
|
||||
PKGROOT=../../
|
||||
# _*_ mode: Makefile; _*_
|
||||
CXX=`Rcmd config CXX`
|
||||
TCFLAGS=`Rcmd config CFLAGS`
|
||||
# expose these flags to R CMD SHLIB
|
||||
PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS)
|
||||
PKG_CPPFLAGS+= $(SHLIB_PTHREAD_FLAGS)
|
||||
XGBFLAG= -O3 -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
|
||||
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
|
||||
|
||||
ifeq ($(no_omp),1)
|
||||
PKG_CPPFLAGS += -DDISABLE_OPENMP
|
||||
endif
|
||||
|
||||
CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o
|
||||
OBJECTS= xgboost_R.o $(CXXOBJ)
|
||||
|
||||
.PHONY: all clean
|
||||
all: $(SHLIB)
|
||||
$(SHLIB): $(OBJECTS)
|
||||
|
||||
xgboost_wrapper.o: $(PKGROOT)/wrapper/xgboost_wrapper.cpp
|
||||
xgboost_io.o: $(PKGROOT)/src/io/io.cpp
|
||||
xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp
|
||||
xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp
|
||||
|
||||
$(CXXOBJ) :
|
||||
$(CXX) -c $(XGBFLAG) -o $@ $(firstword $(filter %.cpp %.c, $^) )
|
||||
|
||||
clean:
|
||||
rm -rf *.so *.o *~ *.dll
|
||||
|
||||
PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -I$(PKGROOT)
|
||||
PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS)
|
||||
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
|
||||
OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o
|
||||
|
||||
@ -2,25 +2,55 @@
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include "xgboost_R.h"
|
||||
#include "wrapper/xgboost_wrapper.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/utils/omp.h"
|
||||
#include "src/utils/matrix_csr.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace xgboost;
|
||||
|
||||
extern "C" {
|
||||
void XGBoostAssert_R(int exp, const char *fmt, ...);
|
||||
void XGBoostCheck_R(int exp, const char *fmt, ...);
|
||||
int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...);
|
||||
}
|
||||
|
||||
// implements error handling
|
||||
namespace xgboost {
|
||||
namespace utils {
|
||||
void HandleAssertError(const char *msg) {
|
||||
error("%s", msg);
|
||||
}
|
||||
void HandleCheckError(const char *msg) {
|
||||
error("%s", msg);
|
||||
extern "C" {
|
||||
void (*Printf)(const char *fmt, ...) = Rprintf;
|
||||
int (*SPrintf)(char *buf, size_t size, const char *fmt, ...) = XGBoostSPrintf_R;
|
||||
void (*Assert)(int exp, const char *fmt, ...) = XGBoostAssert_R;
|
||||
void (*Check)(int exp, const char *fmt, ...) = XGBoostCheck_R;
|
||||
void (*Error)(const char *fmt, ...) = error;
|
||||
}
|
||||
} // namespace utils
|
||||
|
||||
namespace random {
|
||||
void Seed(unsigned seed) {
|
||||
warning("parameter seed is ignored, please set random seed using set.seed");
|
||||
}
|
||||
double Uniform(void) {
|
||||
return unif_rand();
|
||||
}
|
||||
double Normal(void) {
|
||||
return norm_rand();
|
||||
}
|
||||
} // namespace random
|
||||
} // namespace xgboost
|
||||
|
||||
// call before wrapper starts
|
||||
inline void _WrapperBegin(void) {
|
||||
GetRNGstate();
|
||||
}
|
||||
// call after wrapper starts
|
||||
inline void _WrapperEnd(void) {
|
||||
PutRNGstate();
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
void _DMatrixFinalizer(SEXP ext) {
|
||||
if (R_ExternalPtrAddr(ext) == NULL) return;
|
||||
@ -28,14 +58,17 @@ extern "C" {
|
||||
R_ClearExternalPtr(ext);
|
||||
}
|
||||
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
|
||||
_WrapperBegin();
|
||||
void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||
UNPROTECT(1);
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
|
||||
SEXP missing) {
|
||||
_WrapperBegin();
|
||||
SEXP dim = getAttrib(mat, R_DimSymbol);
|
||||
int nrow = INTEGER(dim)[0];
|
||||
int ncol = INTEGER(dim)[1];
|
||||
@ -47,15 +80,17 @@ extern "C" {
|
||||
data[i * ncol +j] = din[i + nrow * j];
|
||||
}
|
||||
}
|
||||
void *handle = XGDMatrixCreateFromMat(&data[0], nrow, ncol, asReal(missing));
|
||||
void *handle = XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing));
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||
UNPROTECT(1);
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
|
||||
SEXP indices,
|
||||
SEXP data) {
|
||||
_WrapperBegin();
|
||||
const int *col_ptr = INTEGER(indptr);
|
||||
const int *row_index = INTEGER(indices);
|
||||
const double *col_data = REAL(data);
|
||||
@ -85,29 +120,36 @@ extern "C" {
|
||||
col_index[i] = csr_data[i].first;
|
||||
row_data[i] = csr_data[i].second;
|
||||
}
|
||||
void *handle = XGDMatrixCreateFromCSR(&row_ptr[0], &col_index[0], &row_data[0], row_ptr.size(), ndata );
|
||||
void *handle = XGDMatrixCreateFromCSR(BeginPtr(row_ptr), BeginPtr(col_index),
|
||||
BeginPtr(row_data), row_ptr.size(), ndata );
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||
UNPROTECT(1);
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
|
||||
_WrapperBegin();
|
||||
int len = length(idxset);
|
||||
std::vector<int> idxvec(len);
|
||||
for (int i = 0; i < len; ++i) {
|
||||
idxvec[i] = INTEGER(idxset)[i] - 1;
|
||||
}
|
||||
void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), &idxvec[0], len);
|
||||
void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), BeginPtr(idxvec), len);
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
|
||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||
UNPROTECT(1);
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
|
||||
_WrapperBegin();
|
||||
XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
|
||||
CHAR(asChar(fname)), asInteger(silent));
|
||||
_WrapperEnd();
|
||||
}
|
||||
void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
|
||||
_WrapperBegin();
|
||||
int len = length(array);
|
||||
const char *name = CHAR(asChar(field));
|
||||
if (!strcmp("group", name)) {
|
||||
@ -116,7 +158,8 @@ extern "C" {
|
||||
for (int i = 0; i < len; ++i) {
|
||||
vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
|
||||
}
|
||||
XGDMatrixSetGroup(R_ExternalPtrAddr(handle), &vec[0], len);
|
||||
XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len);
|
||||
_WrapperEnd();
|
||||
return;
|
||||
}
|
||||
{
|
||||
@ -127,10 +170,12 @@ extern "C" {
|
||||
}
|
||||
XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
|
||||
CHAR(asChar(field)),
|
||||
&vec[0], len);
|
||||
BeginPtr(vec), len);
|
||||
}
|
||||
_WrapperEnd();
|
||||
}
|
||||
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
|
||||
_WrapperBegin();
|
||||
bst_ulong olen;
|
||||
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
|
||||
CHAR(asChar(field)), &olen);
|
||||
@ -139,6 +184,7 @@ extern "C" {
|
||||
REAL(ret)[i] = res[i];
|
||||
}
|
||||
UNPROTECT(1);
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
// functions related to booster
|
||||
@ -148,28 +194,35 @@ extern "C" {
|
||||
R_ClearExternalPtr(ext);
|
||||
}
|
||||
SEXP XGBoosterCreate_R(SEXP dmats) {
|
||||
_WrapperBegin();
|
||||
int len = length(dmats);
|
||||
std::vector<void*> dvec;
|
||||
for (int i = 0; i < len; ++i){
|
||||
dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
|
||||
}
|
||||
void *handle = XGBoosterCreate(&dvec[0], dvec.size());
|
||||
void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size());
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
|
||||
UNPROTECT(1);
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
|
||||
_WrapperBegin();
|
||||
XGBoosterSetParam(R_ExternalPtrAddr(handle),
|
||||
CHAR(asChar(name)),
|
||||
CHAR(asChar(val)));
|
||||
_WrapperEnd();
|
||||
}
|
||||
void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
|
||||
_WrapperBegin();
|
||||
XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
|
||||
asInteger(iter),
|
||||
R_ExternalPtrAddr(dtrain));
|
||||
_WrapperEnd();
|
||||
}
|
||||
void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
|
||||
_WrapperBegin();
|
||||
utils::Check(length(grad) == length(hess), "gradient and hess must have same length");
|
||||
int len = length(grad);
|
||||
std::vector<float> tgrad(len), thess(len);
|
||||
@ -180,9 +233,11 @@ extern "C" {
|
||||
}
|
||||
XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
|
||||
R_ExternalPtrAddr(dtrain),
|
||||
&tgrad[0], &thess[0], len);
|
||||
BeginPtr(tgrad), BeginPtr(thess), len);
|
||||
_WrapperEnd();
|
||||
}
|
||||
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
|
||||
_WrapperBegin();
|
||||
utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length");
|
||||
int len = length(dmats);
|
||||
std::vector<void*> vec_dmats;
|
||||
@ -197,28 +252,37 @@ extern "C" {
|
||||
}
|
||||
return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
|
||||
asInteger(iter),
|
||||
&vec_dmats[0], &vec_sptr[0], len));
|
||||
BeginPtr(vec_dmats), BeginPtr(vec_sptr), len));
|
||||
_WrapperEnd();
|
||||
}
|
||||
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
|
||||
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit) {
|
||||
_WrapperBegin();
|
||||
bst_ulong olen;
|
||||
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
|
||||
R_ExternalPtrAddr(dmat),
|
||||
asInteger(output_margin),
|
||||
asInteger(ntree_limit),
|
||||
&olen);
|
||||
SEXP ret = PROTECT(allocVector(REALSXP, olen));
|
||||
for (size_t i = 0; i < olen; ++i) {
|
||||
REAL(ret)[i] = res[i];
|
||||
}
|
||||
UNPROTECT(1);
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
|
||||
_WrapperBegin();
|
||||
XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
|
||||
_WrapperEnd();
|
||||
}
|
||||
void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
|
||||
_WrapperBegin();
|
||||
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
|
||||
_WrapperEnd();
|
||||
}
|
||||
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
|
||||
_WrapperBegin();
|
||||
bst_ulong olen;
|
||||
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
|
||||
CHAR(asChar(fmap)),
|
||||
@ -229,5 +293,6 @@ extern "C" {
|
||||
fprintf(fo, "%s", res[i]);
|
||||
}
|
||||
fclose(fo);
|
||||
_WrapperEnd();
|
||||
}
|
||||
}
|
||||
|
||||
@ -7,6 +7,7 @@
|
||||
*/
|
||||
extern "C" {
|
||||
#include <Rinternals.h>
|
||||
#include <R_ext/Random.h>
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
@ -106,8 +107,9 @@ extern "C" {
|
||||
* \param handle handle
|
||||
* \param dmat data matrix
|
||||
* \param output_margin whether only output raw margin value
|
||||
* \param ntree_limit limit number of trees used in prediction
|
||||
*/
|
||||
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin);
|
||||
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit);
|
||||
/*!
|
||||
* \brief load model from existing file
|
||||
* \param handle handle
|
||||
|
||||
33
R-package/src/xgboost_assert.c
Normal file
33
R-package/src/xgboost_assert.c
Normal file
@ -0,0 +1,33 @@
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <Rinternals.h>
|
||||
|
||||
// implements error handling
|
||||
void XGBoostAssert_R(int exp, const char *fmt, ...) {
|
||||
char buf[1024];
|
||||
if (exp == 0) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vsprintf(buf, fmt, args);
|
||||
va_end(args);
|
||||
error("AssertError:%s\n", buf);
|
||||
}
|
||||
}
|
||||
void XGBoostCheck_R(int exp, const char *fmt, ...) {
|
||||
char buf[1024];
|
||||
if (exp == 0) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vsprintf(buf, fmt, args);
|
||||
va_end(args);
|
||||
error("%s\n", buf);
|
||||
}
|
||||
}
|
||||
int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...) {
|
||||
int ret;
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
ret = vsnprintf(buf, size, fmt, args);
|
||||
va_end(args);
|
||||
return ret;
|
||||
}
|
||||
@ -173,7 +173,7 @@ objective function.
|
||||
We also have \verb@slice@ for row extraction. It is useful in
|
||||
cross-validation.
|
||||
|
||||
For a walkthrough demo, please see \verb@R-package/demo/demo.R@ for further
|
||||
For a walkthrough demo, please see \verb@R-package/inst/examples/demo.R@ for further
|
||||
details.
|
||||
|
||||
\section{The Higgs Boson competition}
|
||||
|
||||
@ -35,11 +35,11 @@ Version
|
||||
======
|
||||
* This version xgboost-0.3, the code has been refactored from 0.2x to be cleaner and more flexibility
|
||||
* This version of xgboost is not compatible with 0.2x, due to huge amount of changes in code structure
|
||||
- This means the model and buffer file of previous version can not be loaded in xgboost-unity
|
||||
- This means the model and buffer file of previous version can not be loaded in xgboost-3.0
|
||||
* For legacy 0.2x code, refer to [Here](https://github.com/tqchen/xgboost/releases/tag/v0.22)
|
||||
* Change log in [CHANGES.md](CHANGES.md)
|
||||
|
||||
XGBoost in Graphlab Create
|
||||
======
|
||||
* XGBoost is adopted as part of boosted tree toolkit in Graphlab Create (GLC). Graphlab Create is a powerful python toolkit that allows you to data manipulation, graph processing, hyper-parameter search, and visualization of big data in one framework. Try the Graphlab Create in http://graphlab.com/products/create/quick-start-guide.html
|
||||
* XGBoost is adopted as part of boosted tree toolkit in Graphlab Create (GLC). Graphlab Create is a powerful python toolkit that allows you to data manipulation, graph processing, hyper-parameter search, and visualization of TeraBytes scale data in one framework. Try the Graphlab Create in http://graphlab.com/products/create/quick-start-guide.html
|
||||
* Nice blogpost by Jay Gu using GLC boosted tree to solve kaggle bike sharing challenge: http://blog.graphlab.com/using-gradient-boosted-trees-to-predict-bike-sharing-demand
|
||||
|
||||
@ -105,7 +105,10 @@ class GBLinear : public IGradBooster {
|
||||
virtual void Predict(IFMatrix *p_fmat,
|
||||
int64_t buffer_offset,
|
||||
const BoosterInfo &info,
|
||||
std::vector<float> *out_preds) {
|
||||
std::vector<float> *out_preds,
|
||||
unsigned ntree_limit = 0) {
|
||||
utils::Check(ntree_limit == 0,
|
||||
"GBLinear::Predict ntrees is only valid for gbtree predictor");
|
||||
std::vector<float> &preds = *out_preds;
|
||||
preds.resize(0);
|
||||
// start collecting the prediction
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define _CRT_SECURE_NO_DEPRECATE
|
||||
#include <cstring>
|
||||
using namespace std;
|
||||
#include "./gbm.h"
|
||||
#include "./gbtree-inl.hpp"
|
||||
#include "./gblinear-inl.hpp"
|
||||
|
||||
@ -57,11 +57,14 @@ class IGradBooster {
|
||||
* the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size")
|
||||
* \param info extra side information that may be needed for prediction
|
||||
* \param out_preds output vector to hold the predictions
|
||||
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
|
||||
* we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
|
||||
*/
|
||||
virtual void Predict(IFMatrix *p_fmat,
|
||||
int64_t buffer_offset,
|
||||
const BoosterInfo &info,
|
||||
std::vector<float> *out_preds) = 0;
|
||||
std::vector<float> *out_preds,
|
||||
unsigned ntree_limit = 0) = 0;
|
||||
/*!
|
||||
* \brief dump the model in text format
|
||||
* \param fmap feature map that may help give interpretations of feature
|
||||
|
||||
@ -105,7 +105,8 @@ class GBTree : public IGradBooster {
|
||||
virtual void Predict(IFMatrix *p_fmat,
|
||||
int64_t buffer_offset,
|
||||
const BoosterInfo &info,
|
||||
std::vector<float> *out_preds) {
|
||||
std::vector<float> *out_preds,
|
||||
unsigned ntree_limit = 0) {
|
||||
int nthread;
|
||||
#pragma omp parallel
|
||||
{
|
||||
@ -137,7 +138,8 @@ class GBTree : public IGradBooster {
|
||||
this->Pred(batch[i],
|
||||
buffer_offset < 0 ? -1 : buffer_offset + ridx,
|
||||
gid, info.GetRoot(ridx), &feats,
|
||||
&preds[ridx * mparam.num_output_group + gid], stride);
|
||||
&preds[ridx * mparam.num_output_group + gid], stride,
|
||||
ntree_limit);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -212,14 +214,16 @@ class GBTree : public IGradBooster {
|
||||
int bst_group,
|
||||
unsigned root_index,
|
||||
tree::RegTree::FVec *p_feats,
|
||||
float *out_pred, size_t stride) {
|
||||
float *out_pred, size_t stride, unsigned ntree_limit) {
|
||||
size_t itop = 0;
|
||||
float psum = 0.0f;
|
||||
// sum of leaf vector
|
||||
std::vector<float> vec_psum(mparam.size_leaf_vector, 0.0f);
|
||||
const int64_t bid = mparam.BufferOffset(buffer_index, bst_group);
|
||||
// number of valid trees
|
||||
unsigned treeleft = ntree_limit == 0 ? std::numeric_limits<unsigned>::max() : ntree_limit;
|
||||
// load buffered results if any
|
||||
if (bid >= 0) {
|
||||
if (bid >= 0 && ntree_limit == 0) {
|
||||
itop = pred_counter[bid];
|
||||
psum = pred_buffer[bid];
|
||||
for (int i = 0; i < mparam.size_leaf_vector; ++i) {
|
||||
@ -235,12 +239,13 @@ class GBTree : public IGradBooster {
|
||||
for (int j = 0; j < mparam.size_leaf_vector; ++j) {
|
||||
vec_psum[j] += trees[i]->leafvec(tid)[j];
|
||||
}
|
||||
if(--treeleft == 0) break;
|
||||
}
|
||||
}
|
||||
p_feats->Drop(inst);
|
||||
}
|
||||
// updated the buffered results
|
||||
if (bid >= 0) {
|
||||
if (bid >= 0 && ntree_limit == 0) {
|
||||
pred_counter[bid] = static_cast<unsigned>(trees.size());
|
||||
pred_buffer[bid] = psum;
|
||||
for (int i = 0; i < mparam.size_leaf_vector; ++i) {
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define _CRT_SECURE_NO_DEPRECATE
|
||||
#include <string>
|
||||
using namespace std;
|
||||
#include "./io.h"
|
||||
#include "../utils/io.h"
|
||||
#include "../utils/utils.h"
|
||||
|
||||
@ -54,8 +54,10 @@ class DMatrixSimple : public DataMatrix {
|
||||
for (size_t i = 0; i < batch.size; ++i) {
|
||||
RowBatch::Inst inst = batch[i];
|
||||
row_data_.resize(row_data_.size() + inst.length);
|
||||
memcpy(&row_data_[row_ptr_.back()], inst.data,
|
||||
sizeof(RowBatch::Entry) * inst.length);
|
||||
if (inst.length != 0) {
|
||||
memcpy(&row_data_[row_ptr_.back()], inst.data,
|
||||
sizeof(RowBatch::Entry) * inst.length);
|
||||
}
|
||||
row_ptr_.push_back(row_ptr_.back() + inst.length);
|
||||
}
|
||||
}
|
||||
@ -104,10 +106,10 @@ class DMatrixSimple : public DataMatrix {
|
||||
this->AddRow(feats);
|
||||
|
||||
if (!silent) {
|
||||
printf("%lux%lu matrix with %lu entries is loaded from %s\n",
|
||||
static_cast<unsigned long>(info.num_row()),
|
||||
static_cast<unsigned long>(info.num_col()),
|
||||
static_cast<unsigned long>(row_data_.size()), fname);
|
||||
utils::Printf("%lux%lu matrix with %lu entries is loaded from %s\n",
|
||||
static_cast<unsigned long>(info.num_row()),
|
||||
static_cast<unsigned long>(info.num_col()),
|
||||
static_cast<unsigned long>(row_data_.size()), fname);
|
||||
}
|
||||
fclose(file);
|
||||
// try to load in additional file
|
||||
@ -147,26 +149,26 @@ class DMatrixSimple : public DataMatrix {
|
||||
* \param fname file name, used to print message
|
||||
*/
|
||||
inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) {
|
||||
int magic;
|
||||
utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
|
||||
utils::Check(magic == kMagic, "invalid format,magic number mismatch");
|
||||
int tmagic;
|
||||
utils::Check(fs.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format");
|
||||
utils::Check(tmagic == kMagic, "invalid format,magic number mismatch");
|
||||
|
||||
info.LoadBinary(fs);
|
||||
FMatrixS::LoadBinary(fs, &row_ptr_, &row_data_);
|
||||
fmat_->LoadColAccess(fs);
|
||||
|
||||
if (!silent) {
|
||||
printf("%lux%lu matrix with %lu entries is loaded",
|
||||
static_cast<unsigned long>(info.num_row()),
|
||||
static_cast<unsigned long>(info.num_col()),
|
||||
static_cast<unsigned long>(row_data_.size()));
|
||||
utils::Printf("%lux%lu matrix with %lu entries is loaded",
|
||||
static_cast<unsigned long>(info.num_row()),
|
||||
static_cast<unsigned long>(info.num_col()),
|
||||
static_cast<unsigned long>(row_data_.size()));
|
||||
if (fname != NULL) {
|
||||
printf(" from %s\n", fname);
|
||||
utils::Printf(" from %s\n", fname);
|
||||
} else {
|
||||
printf("\n");
|
||||
utils::Printf("\n");
|
||||
}
|
||||
if (info.group_ptr.size() != 0) {
|
||||
printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1);
|
||||
utils::Printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -177,8 +179,8 @@ class DMatrixSimple : public DataMatrix {
|
||||
*/
|
||||
inline void SaveBinary(const char* fname, bool silent = false) const {
|
||||
utils::FileStream fs(utils::FopenCheck(fname, "wb"));
|
||||
int magic = kMagic;
|
||||
fs.Write(&magic, sizeof(magic));
|
||||
int tmagic = kMagic;
|
||||
fs.Write(&tmagic, sizeof(tmagic));
|
||||
|
||||
info.SaveBinary(fs);
|
||||
FMatrixS::SaveBinary(fs, row_ptr_, row_data_);
|
||||
@ -186,13 +188,13 @@ class DMatrixSimple : public DataMatrix {
|
||||
fs.Close();
|
||||
|
||||
if (!silent) {
|
||||
printf("%lux%lu matrix with %lu entries is saved to %s\n",
|
||||
static_cast<unsigned long>(info.num_row()),
|
||||
static_cast<unsigned long>(info.num_col()),
|
||||
static_cast<unsigned long>(row_data_.size()), fname);
|
||||
utils::Printf("%lux%lu matrix with %lu entries is saved to %s\n",
|
||||
static_cast<unsigned long>(info.num_row()),
|
||||
static_cast<unsigned long>(info.num_col()),
|
||||
static_cast<unsigned long>(row_data_.size()), fname);
|
||||
if (info.group_ptr.size() != 0) {
|
||||
printf("data contains %u groups\n",
|
||||
static_cast<unsigned>(info.group_ptr.size()-1));
|
||||
utils::Printf("data contains %u groups\n",
|
||||
static_cast<unsigned>(info.group_ptr.size()-1));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -244,8 +246,8 @@ class DMatrixSimple : public DataMatrix {
|
||||
at_first_ = false;
|
||||
batch_.size = parent_->row_ptr_.size() - 1;
|
||||
batch_.base_rowid = 0;
|
||||
batch_.ind_ptr = &parent_->row_ptr_[0];
|
||||
batch_.data_ptr = &parent_->row_data_[0];
|
||||
batch_.ind_ptr = BeginPtr(parent_->row_ptr_);
|
||||
batch_.data_ptr = BeginPtr(parent_->row_data_);
|
||||
return true;
|
||||
}
|
||||
virtual const RowBatch &Value(void) const {
|
||||
|
||||
@ -110,9 +110,9 @@ class FMatrixS : public IFMatrix{
|
||||
const std::vector<RowBatch::Entry> &data) {
|
||||
size_t nrow = ptr.size() - 1;
|
||||
fo.Write(&nrow, sizeof(size_t));
|
||||
fo.Write(&ptr[0], ptr.size() * sizeof(size_t));
|
||||
fo.Write(BeginPtr(ptr), ptr.size() * sizeof(size_t));
|
||||
if (data.size() != 0) {
|
||||
fo.Write(&data[0], data.size() * sizeof(RowBatch::Entry));
|
||||
fo.Write(BeginPtr(data), data.size() * sizeof(RowBatch::Entry));
|
||||
}
|
||||
}
|
||||
/*!
|
||||
@ -127,11 +127,11 @@ class FMatrixS : public IFMatrix{
|
||||
size_t nrow;
|
||||
utils::Check(fi.Read(&nrow, sizeof(size_t)) != 0, "invalid input file format");
|
||||
out_ptr->resize(nrow + 1);
|
||||
utils::Check(fi.Read(&(*out_ptr)[0], out_ptr->size() * sizeof(size_t)) != 0,
|
||||
utils::Check(fi.Read(BeginPtr(*out_ptr), out_ptr->size() * sizeof(size_t)) != 0,
|
||||
"invalid input file format");
|
||||
out_data->resize(out_ptr->back());
|
||||
if (out_data->size() != 0) {
|
||||
utils::Assert(fi.Read(&(*out_data)[0], out_data->size() * sizeof(RowBatch::Entry)) != 0,
|
||||
utils::Assert(fi.Read(BeginPtr(*out_data), out_data->size() * sizeof(RowBatch::Entry)) != 0,
|
||||
"invalid input file format");
|
||||
}
|
||||
}
|
||||
@ -213,8 +213,8 @@ class FMatrixS : public IFMatrix{
|
||||
col_data_[i] = SparseBatch::Inst(&data[0] + ptr[ridx],
|
||||
static_cast<bst_uint>(ptr[ridx+1] - ptr[ridx]));
|
||||
}
|
||||
batch_.col_index = &col_index_[0];
|
||||
batch_.col_data = &col_data_[0];
|
||||
batch_.col_index = BeginPtr(col_index_);
|
||||
batch_.col_data = BeginPtr(col_data_);
|
||||
this->BeforeFirst();
|
||||
}
|
||||
// data content
|
||||
|
||||
@ -98,8 +98,8 @@ struct MetaInfo {
|
||||
group_ptr.push_back(group_ptr.back()+nline);
|
||||
}
|
||||
if (!silent) {
|
||||
printf("%u groups are loaded from %s\n",
|
||||
static_cast<unsigned>(group_ptr.size()-1), fname);
|
||||
utils::Printf("%u groups are loaded from %s\n",
|
||||
static_cast<unsigned>(group_ptr.size()-1), fname);
|
||||
}
|
||||
fclose(fi);
|
||||
return true;
|
||||
@ -125,15 +125,15 @@ struct MetaInfo {
|
||||
}
|
||||
// try to load weight information from file, if exists
|
||||
inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
|
||||
std::vector<float> &weights = this->GetFloatInfo(field);
|
||||
std::vector<float> &data = this->GetFloatInfo(field);
|
||||
FILE *fi = fopen64(fname, "r");
|
||||
if (fi == NULL) return false;
|
||||
float wt;
|
||||
while (fscanf(fi, "%f", &wt) == 1) {
|
||||
weights.push_back(wt);
|
||||
data.push_back(wt);
|
||||
}
|
||||
if (!silent) {
|
||||
printf("loading %s from %s\n", field, fname);
|
||||
utils::Printf("loading %s from %s\n", field, fname);
|
||||
}
|
||||
fclose(fi);
|
||||
return true;
|
||||
|
||||
@ -8,8 +8,8 @@
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <climits>
|
||||
#include <cmath>
|
||||
#include <climits>
|
||||
#include <algorithm>
|
||||
#include "./evaluation.h"
|
||||
#include "./helper_utils.h"
|
||||
@ -183,7 +183,7 @@ struct EvalAMS : public IEvaluator {
|
||||
}
|
||||
}
|
||||
if (ntop == ndata) {
|
||||
fprintf(stderr, "\tams-ratio=%g", static_cast<float>(thresindex) / ndata);
|
||||
utils::Printf("\tams-ratio=%g", static_cast<float>(thresindex) / ndata);
|
||||
return static_cast<float>(tams);
|
||||
} else {
|
||||
return static_cast<float>(sqrt(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp)));
|
||||
|
||||
@ -73,7 +73,7 @@ class EvalSet{
|
||||
for (size_t i = 0; i < evals_.size(); ++i) {
|
||||
float res = evals_[i]->Eval(preds, info);
|
||||
char tmp[1024];
|
||||
snprintf(tmp, sizeof(tmp), "\t%s-%s:%f", evname, evals_[i]->Name(), res);
|
||||
utils::SPrintf(tmp, sizeof(tmp), "\t%s-%s:%f", evname, evals_[i]->Name(), res);
|
||||
result += tmp;
|
||||
}
|
||||
return result;
|
||||
|
||||
@ -7,6 +7,7 @@
|
||||
*/
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
namespace xgboost {
|
||||
namespace learner {
|
||||
|
||||
@ -63,14 +63,14 @@ class BoostLearner {
|
||||
}
|
||||
char str_temp[25];
|
||||
if (num_feature > mparam.num_feature) {
|
||||
snprintf(str_temp, sizeof(str_temp), "%u", num_feature);
|
||||
utils::SPrintf(str_temp, sizeof(str_temp), "%u", num_feature);
|
||||
this->SetParam("bst:num_feature", str_temp);
|
||||
}
|
||||
snprintf(str_temp, sizeof(str_temp), "%lu",
|
||||
utils::SPrintf(str_temp, sizeof(str_temp), "%lu",
|
||||
static_cast<unsigned long>(buffer_size));
|
||||
this->SetParam("num_pbuffer", str_temp);
|
||||
if (!silent) {
|
||||
printf("buffer_size=%ld\n", static_cast<long>(buffer_size));
|
||||
utils::Printf("buffer_size=%ld\n", static_cast<long>(buffer_size));
|
||||
}
|
||||
}
|
||||
/*!
|
||||
@ -183,7 +183,7 @@ class BoostLearner {
|
||||
const std::vector<std::string> &evname) {
|
||||
std::string res;
|
||||
char tmp[256];
|
||||
snprintf(tmp, sizeof(tmp), "[%d]", iter);
|
||||
utils::SPrintf(tmp, sizeof(tmp), "[%d]", iter);
|
||||
res = tmp;
|
||||
for (size_t i = 0; i < evals.size(); ++i) {
|
||||
this->PredictRaw(*evals[i], &preds_);
|
||||
@ -212,11 +212,14 @@ class BoostLearner {
|
||||
* \param data input data
|
||||
* \param output_margin whether to only predict margin value instead of transformed prediction
|
||||
* \param out_preds output vector that stores the prediction
|
||||
* \param ntree_limit limit number of trees used for boosted tree
|
||||
* predictor, when it equals 0, this means we are using all the trees
|
||||
*/
|
||||
inline void Predict(const DMatrix &data,
|
||||
bool output_margin,
|
||||
std::vector<float> *out_preds) const {
|
||||
this->PredictRaw(data, out_preds);
|
||||
std::vector<float> *out_preds,
|
||||
unsigned ntree_limit = 0) const {
|
||||
this->PredictRaw(data, out_preds, ntree_limit);
|
||||
if (!output_margin) {
|
||||
obj_->PredTransform(out_preds);
|
||||
}
|
||||
@ -246,11 +249,14 @@ class BoostLearner {
|
||||
* \brief get un-transformed prediction
|
||||
* \param data training data matrix
|
||||
* \param out_preds output vector that stores the prediction
|
||||
* \param ntree_limit limit number of trees used for boosted tree
|
||||
* predictor, when it equals 0, this means we are using all the trees
|
||||
*/
|
||||
inline void PredictRaw(const DMatrix &data,
|
||||
std::vector<float> *out_preds) const {
|
||||
std::vector<float> *out_preds,
|
||||
unsigned ntree_limit = 0) const {
|
||||
gbm_->Predict(data.fmat(), this->FindBufferOffset(data),
|
||||
data.info.info, out_preds);
|
||||
data.info.info, out_preds, ntree_limit);
|
||||
// add base margin
|
||||
std::vector<float> &preds = *out_preds;
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
|
||||
|
||||
@ -6,9 +6,9 @@
|
||||
* \author Tianqi Chen, Kailong Chen
|
||||
*/
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <cmath>
|
||||
#include <functional>
|
||||
#include "../data.h"
|
||||
#include "./objective.h"
|
||||
@ -37,7 +37,7 @@ struct LossType {
|
||||
case kLogisticRaw:
|
||||
case kLinearSquare: return x;
|
||||
case kLogisticClassify:
|
||||
case kLogisticNeglik: return 1.0f / (1.0f + expf(-x));
|
||||
case kLogisticNeglik: return 1.0f / (1.0f + std::exp(-x));
|
||||
default: utils::Error("unknown loss_type"); return 0.0f;
|
||||
}
|
||||
}
|
||||
@ -50,7 +50,7 @@ struct LossType {
|
||||
inline float FirstOrderGradient(float predt, float label) const {
|
||||
switch (loss_type) {
|
||||
case kLinearSquare: return predt - label;
|
||||
case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
|
||||
case kLogisticRaw: predt = 1.0f / (1.0f + std::exp(-predt));
|
||||
case kLogisticClassify:
|
||||
case kLogisticNeglik: return predt - label;
|
||||
default: utils::Error("unknown loss_type"); return 0.0f;
|
||||
@ -65,7 +65,7 @@ struct LossType {
|
||||
inline float SecondOrderGradient(float predt, float label) const {
|
||||
switch (loss_type) {
|
||||
case kLinearSquare: return 1.0f;
|
||||
case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
|
||||
case kLogisticRaw: predt = 1.0f / (1.0f + std::exp(-predt));
|
||||
case kLogisticClassify:
|
||||
case kLogisticNeglik: return predt * (1 - predt);
|
||||
default: utils::Error("unknown loss_type"); return 0.0f;
|
||||
@ -80,7 +80,7 @@ struct LossType {
|
||||
loss_type == kLogisticNeglik ) {
|
||||
utils::Check(base_score > 0.0f && base_score < 1.0f,
|
||||
"base_score must be in (0,1) for logistic loss");
|
||||
base_score = -logf(1.0f / base_score - 1.0f);
|
||||
base_score = -std::log(1.0f / base_score - 1.0f);
|
||||
}
|
||||
return base_score;
|
||||
}
|
||||
@ -419,8 +419,8 @@ class LambdaRankObjNDCG : public LambdaRankObj {
|
||||
for (size_t i = 0; i < pairs.size(); ++i) {
|
||||
unsigned pos_idx = pairs[i].pos_index;
|
||||
unsigned neg_idx = pairs[i].neg_index;
|
||||
float pos_loginv = 1.0f / logf(pos_idx + 2.0f);
|
||||
float neg_loginv = 1.0f / logf(neg_idx + 2.0f);
|
||||
float pos_loginv = 1.0f / std::log(pos_idx + 2.0f);
|
||||
float neg_loginv = 1.0f / std::log(neg_idx + 2.0f);
|
||||
int pos_label = static_cast<int>(sorted_list[pos_idx].label);
|
||||
int neg_label = static_cast<int>(sorted_list[neg_idx].label);
|
||||
float original =
|
||||
@ -438,7 +438,7 @@ class LambdaRankObjNDCG : public LambdaRankObj {
|
||||
for (size_t i = 0; i < labels.size(); ++i) {
|
||||
const unsigned rel = static_cast<unsigned>(labels[i]);
|
||||
if (rel != 0) {
|
||||
sumdcg += ((1 << rel) - 1) / logf(static_cast<float>(i + 2));
|
||||
sumdcg += ((1 << rel) - 1) / std::log(static_cast<float>(i + 2));
|
||||
}
|
||||
}
|
||||
return static_cast<float>(sumdcg);
|
||||
|
||||
@ -302,11 +302,11 @@ struct SplitEntry{
|
||||
* \param loss_chg the loss reduction get through the split
|
||||
* \param split_index the feature index where the split is on
|
||||
*/
|
||||
inline bool NeedReplace(bst_float loss_chg, unsigned split_index) const {
|
||||
inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {
|
||||
if (this->split_index() <= split_index) {
|
||||
return loss_chg > this->loss_chg;
|
||||
return new_loss_chg > this->loss_chg;
|
||||
} else {
|
||||
return !(this->loss_chg > loss_chg);
|
||||
return !(this->loss_chg > new_loss_chg);
|
||||
}
|
||||
}
|
||||
/*!
|
||||
@ -332,13 +332,13 @@ struct SplitEntry{
|
||||
* \param default_left whether the missing value goes to left
|
||||
* \return whether the proposed split is better and can replace current split
|
||||
*/
|
||||
inline bool Update(bst_float loss_chg, unsigned split_index,
|
||||
float split_value, bool default_left) {
|
||||
if (this->NeedReplace(loss_chg, split_index)) {
|
||||
this->loss_chg = loss_chg;
|
||||
inline bool Update(bst_float new_loss_chg, unsigned split_index,
|
||||
float new_split_value, bool default_left) {
|
||||
if (this->NeedReplace(new_loss_chg, split_index)) {
|
||||
this->loss_chg = new_loss_chg;
|
||||
if (default_left) split_index |= (1U << 31);
|
||||
this->sindex = split_index;
|
||||
this->split_value = split_value;
|
||||
this->split_value = new_split_value;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define _CRT_SECURE_NO_DEPRECATE
|
||||
#include <cstring>
|
||||
using namespace std;
|
||||
#include "./updater.h"
|
||||
#include "./updater_prune-inl.hpp"
|
||||
#include "./updater_refresh-inl.hpp"
|
||||
|
||||
@ -63,8 +63,8 @@ class TreePruner: public IUpdater {
|
||||
}
|
||||
}
|
||||
if (silent == 0) {
|
||||
printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n",
|
||||
tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth());
|
||||
utils::Printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n",
|
||||
tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -26,14 +26,14 @@ class TreeRefresher: public IUpdater {
|
||||
virtual void Update(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
const std::vector<RegTree*> &trees) {
|
||||
const std::vector<RegTree*> &trees) {
|
||||
if (trees.size() == 0) return;
|
||||
// number of threads
|
||||
int nthread;
|
||||
// thread temporal space
|
||||
std::vector< std::vector<TStats> > stemp;
|
||||
std::vector<RegTree::FVec> fvec_temp;
|
||||
// setup temp space for each thread
|
||||
int nthread;
|
||||
#pragma omp parallel
|
||||
{
|
||||
nthread = omp_get_num_threads();
|
||||
@ -127,8 +127,6 @@ class TreeRefresher: public IUpdater {
|
||||
this->Refresh(gstats, tree[nid].cright(), p_tree);
|
||||
}
|
||||
}
|
||||
// number of thread in the data
|
||||
int nthread;
|
||||
// training parameter
|
||||
TrainParam param;
|
||||
};
|
||||
|
||||
@ -100,12 +100,10 @@ class ISeekStream: public IStream {
|
||||
/*! \brief implementation of file i/o stream */
|
||||
class FileStream : public ISeekStream {
|
||||
public:
|
||||
explicit FileStream(FILE *fp) : fp(fp) {}
|
||||
explicit FileStream(void) {
|
||||
this->fp = NULL;
|
||||
}
|
||||
explicit FileStream(FILE *fp) {
|
||||
this->fp = fp;
|
||||
}
|
||||
virtual size_t Read(void *ptr, size_t size) {
|
||||
return fread(ptr, size, 1, fp);
|
||||
}
|
||||
|
||||
@ -163,7 +163,7 @@ struct SparseCSRFileBuilder {
|
||||
fo->Write(rptr);
|
||||
// setup buffer space
|
||||
buffer_rptr.resize(rptr.size());
|
||||
buffer.reserve(buffer_size);
|
||||
buffer_temp.reserve(buffer_size);
|
||||
buffer_data.resize(buffer_size);
|
||||
saved_offset.clear();
|
||||
saved_offset.resize(rptr.size() - 1, 0);
|
||||
|
||||
@ -9,13 +9,8 @@
|
||||
#include <omp.h>
|
||||
#else
|
||||
#ifndef DISABLE_OPENMP
|
||||
#ifndef _MSC_VER
|
||||
#warning "OpenMP is not available, compile to single thread code."\
|
||||
"You may want to ungrade your compiler to enable OpenMP support,"\
|
||||
"to get benefit of multi-threading."
|
||||
#else
|
||||
// TODO add warning for msvc
|
||||
#endif
|
||||
// use pragma message instead of warning
|
||||
#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single-thread code. Use OpenMP-enabled compiler to get benefit of multi-threading")
|
||||
#endif
|
||||
inline int omp_get_thread_num() { return 0; }
|
||||
inline int omp_get_num_threads() { return 1; }
|
||||
|
||||
@ -16,30 +16,21 @@
|
||||
/*! namespace of PRNG */
|
||||
namespace xgboost {
|
||||
namespace random {
|
||||
|
||||
#ifndef XGBOOST_CUSTOMIZE_PRNG_
|
||||
/*! \brief seed the PRNG */
|
||||
inline void Seed(uint32_t seed) {
|
||||
inline void Seed(unsigned seed) {
|
||||
srand(seed);
|
||||
}
|
||||
/*! \brief return a real number uniform in [0,1) */
|
||||
inline double NextDouble(void) {
|
||||
/*! \brief basic function, uniform */
|
||||
inline double Uniform(void) {
|
||||
return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0);
|
||||
}
|
||||
/*! \brief return a real numer uniform in (0,1) */
|
||||
inline double NextDouble2(void) {
|
||||
return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0);
|
||||
}
|
||||
|
||||
/*! \brief return a random number */
|
||||
inline uint32_t NextUInt32(void) {
|
||||
return (uint32_t)rand();
|
||||
}
|
||||
/*! \brief return a random number in n */
|
||||
inline uint32_t NextUInt32(uint32_t n) {
|
||||
return (uint32_t)floor(NextDouble() * n);
|
||||
}
|
||||
/*! \brief return x~N(0,1) */
|
||||
inline double SampleNormal() {
|
||||
inline double Normal(void) {
|
||||
double x, y, s;
|
||||
do {
|
||||
x = 2 * NextDouble2() - 1.0;
|
||||
@ -49,22 +40,24 @@ inline double SampleNormal() {
|
||||
|
||||
return x * sqrt(-2.0 * log(s) / s);
|
||||
}
|
||||
#else
|
||||
// include declarations, to be implemented
|
||||
void Seed(unsigned seed);
|
||||
double Uniform(void);
|
||||
double Normal(void);
|
||||
#endif
|
||||
|
||||
/*! \brief return iid x,y ~N(0,1) */
|
||||
inline void SampleNormal2D(double &xx, double &yy) {
|
||||
double x, y, s;
|
||||
do {
|
||||
x = 2 * NextDouble2() - 1.0;
|
||||
y = 2 * NextDouble2() - 1.0;
|
||||
s = x*x + y*y;
|
||||
} while (s >= 1.0 || s == 0.0);
|
||||
double t = sqrt(-2.0 * log(s) / s);
|
||||
xx = x * t;
|
||||
yy = y * t;
|
||||
/*! \brief return a real number uniform in [0,1) */
|
||||
inline double NextDouble(void) {
|
||||
return Uniform();
|
||||
}
|
||||
/*! \brief return a random number in n */
|
||||
inline uint32_t NextUInt32(uint32_t n) {
|
||||
return (uint32_t)floor(NextDouble() * n);
|
||||
}
|
||||
/*! \brief return x~N(mu,sigma^2) */
|
||||
inline double SampleNormal(double mu, double sigma) {
|
||||
return SampleNormal() * sigma + mu;
|
||||
return Normal() * sigma + mu;
|
||||
}
|
||||
/*! \brief return 1 with probability p, coin flip */
|
||||
inline int SampleBinary(double p) {
|
||||
@ -90,7 +83,7 @@ struct Random{
|
||||
inline void Seed(unsigned sd) {
|
||||
this->rseed = sd;
|
||||
#if defined(_MSC_VER)||defined(_WIN32)
|
||||
srand(rseed);
|
||||
::xgboost::random::Seed(sd);
|
||||
#endif
|
||||
}
|
||||
/*! \brief return a real number uniform in [0,1) */
|
||||
@ -98,8 +91,8 @@ struct Random{
|
||||
// use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe
|
||||
// For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general
|
||||
// todo, replace with another PRNG
|
||||
#if defined(_MSC_VER)||defined(_WIN32)
|
||||
return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX) + 1.0);
|
||||
#if defined(_MSC_VER)||defined(_WIN32)||defined(XGBOOST_STRICT_CXX98_)
|
||||
return Uniform();
|
||||
#else
|
||||
return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0);
|
||||
#endif
|
||||
|
||||
@ -7,11 +7,18 @@
|
||||
*/
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#include <cstdio>
|
||||
#include <cstdarg>
|
||||
#include <string>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
|
||||
#ifndef XGBOOST_STRICT_CXX98_
|
||||
#include <cstdarg>
|
||||
#endif
|
||||
|
||||
#if !defined(__GNUC__)
|
||||
#define fopen64 std::fopen
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
#define fopen64 fopen
|
||||
// NOTE: sprintf_s is not equivalent to snprintf,
|
||||
// they are equivalent when success, which is sufficient for our case
|
||||
#define snprintf sprintf_s
|
||||
@ -19,16 +26,15 @@
|
||||
#else
|
||||
#ifdef _FILE_OFFSET_BITS
|
||||
#if _FILE_OFFSET_BITS == 32
|
||||
#warning "FILE OFFSET BITS defined to be 32 bit"
|
||||
#pragma message ("Warning: FILE OFFSET BITS defined to be 32 bit")
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#ifdef __APPLE__
|
||||
#define off64_t off_t
|
||||
#define fopen64 fopen
|
||||
#define fopen64 std::fopen
|
||||
#endif
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
extern "C" {
|
||||
#include <sys/types.h>
|
||||
}
|
||||
@ -47,10 +53,11 @@ typedef long int64_t;
|
||||
namespace xgboost {
|
||||
/*! \brief namespace for helper utils of the project */
|
||||
namespace utils {
|
||||
/*! \brief error message buffer length */
|
||||
const int kErrorBuffer = 1 << 12;
|
||||
|
||||
#ifndef XGBOOST_CUSTOMIZE_ERROR_
|
||||
/*! \brief error message buffer length */
|
||||
const int kPrintBuffer = 1 << 12;
|
||||
|
||||
#ifndef XGBOOST_CUSTOMIZE_MSG_
|
||||
/*!
|
||||
* \brief handling of Assert error, caused by in-apropriate input
|
||||
* \param msg error message
|
||||
@ -67,19 +74,50 @@ inline void HandleCheckError(const char *msg) {
|
||||
fprintf(stderr, "%s\n", msg);
|
||||
exit(-1);
|
||||
}
|
||||
inline void HandlePrint(const char *msg) {
|
||||
printf("%s", msg);
|
||||
}
|
||||
#else
|
||||
#ifndef XGBOOST_STRICT_CXX98_
|
||||
// include declarations, some one must implement this
|
||||
void HandleAssertError(const char *msg);
|
||||
void HandleCheckError(const char *msg);
|
||||
void HandlePrint(const char *msg);
|
||||
#endif
|
||||
#endif
|
||||
#ifdef XGBOOST_STRICT_CXX98_
|
||||
// these function pointers are to be assigned
|
||||
extern "C" void (*Printf)(const char *fmt, ...);
|
||||
extern "C" int (*SPrintf)(char *buf, size_t size, const char *fmt, ...);
|
||||
extern "C" void (*Assert)(int exp, const char *fmt, ...);
|
||||
extern "C" void (*Check)(int exp, const char *fmt, ...);
|
||||
extern "C" void (*Error)(const char *fmt, ...);
|
||||
#else
|
||||
/*! \brief printf, print message to the console */
|
||||
inline void Printf(const char *fmt, ...) {
|
||||
std::string msg(kPrintBuffer, '\0');
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vsnprintf(&msg[0], kPrintBuffer, fmt, args);
|
||||
va_end(args);
|
||||
HandlePrint(msg.c_str());
|
||||
}
|
||||
/*! \brief portable version of snprintf */
|
||||
inline int SPrintf(char *buf, size_t size, const char *fmt, ...) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
int ret = vsnprintf(buf, size, fmt, args);
|
||||
va_end(args);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*! \brief assert an condition is true, use this to handle debug information */
|
||||
inline void Assert(bool exp, const char *fmt, ...) {
|
||||
if (!exp) {
|
||||
std::string msg(kErrorBuffer, '\0');
|
||||
std::string msg(kPrintBuffer, '\0');
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vsnprintf(&msg[0], kErrorBuffer, fmt, args);
|
||||
vsnprintf(&msg[0], kPrintBuffer, fmt, args);
|
||||
va_end(args);
|
||||
HandleAssertError(msg.c_str());
|
||||
}
|
||||
@ -88,10 +126,10 @@ inline void Assert(bool exp, const char *fmt, ...) {
|
||||
/*!\brief same as assert, but this is intended to be used as message for user*/
|
||||
inline void Check(bool exp, const char *fmt, ...) {
|
||||
if (!exp) {
|
||||
std::string msg(kErrorBuffer, '\0');
|
||||
std::string msg(kPrintBuffer, '\0');
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vsnprintf(&msg[0], kErrorBuffer, fmt, args);
|
||||
vsnprintf(&msg[0], kPrintBuffer, fmt, args);
|
||||
va_end(args);
|
||||
HandleCheckError(msg.c_str());
|
||||
}
|
||||
@ -100,14 +138,15 @@ inline void Check(bool exp, const char *fmt, ...) {
|
||||
/*! \brief report error message, same as check */
|
||||
inline void Error(const char *fmt, ...) {
|
||||
{
|
||||
std::string msg(kErrorBuffer, '\0');
|
||||
std::string msg(kPrintBuffer, '\0');
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vsnprintf(&msg[0], kErrorBuffer, fmt, args);
|
||||
vsnprintf(&msg[0], kPrintBuffer, fmt, args);
|
||||
va_end(args);
|
||||
HandleCheckError(msg.c_str());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*! \brief replace fopen, report error when the file open fails */
|
||||
inline FILE *FopenCheck(const char *fname, const char *flag) {
|
||||
@ -115,7 +154,25 @@ inline FILE *FopenCheck(const char *fname, const char *flag) {
|
||||
Check(fp != NULL, "can not open file \"%s\"\n", fname);
|
||||
return fp;
|
||||
}
|
||||
|
||||
} // namespace utils
|
||||
} // namespace utils
|
||||
// easy utils that can be directly acessed in xgboost
|
||||
/*! \brief get the beginning address of a vector */
|
||||
template<typename T>
|
||||
inline T *BeginPtr(std::vector<T> &vec) {
|
||||
if (vec.size() == 0) {
|
||||
return NULL;
|
||||
} else {
|
||||
return &vec[0];
|
||||
}
|
||||
}
|
||||
/*! \brief get the beginning address of a vector */
|
||||
template<typename T>
|
||||
inline const T *BeginPtr(const std::vector<T> &vec) {
|
||||
if (vec.size() == 0) {
|
||||
return NULL;
|
||||
} else {
|
||||
return &vec[0];
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_UTILS_UTILS_H_
|
||||
|
||||
@ -50,6 +50,7 @@ class BoostLearnTask{
|
||||
if (!strcmp("use_buffer", name)) use_buffer = atoi(val);
|
||||
if (!strcmp("num_round", name)) num_round = atoi(val);
|
||||
if (!strcmp("pred_margin", name)) pred_margin = atoi(val);
|
||||
if (!strcmp("ntree_limit", name)) ntree_limit = atoi(val);
|
||||
if (!strcmp("save_period", name)) save_period = atoi(val);
|
||||
if (!strcmp("eval_train", name)) eval_train = atoi(val);
|
||||
if (!strcmp("task", name)) task = val;
|
||||
@ -79,6 +80,7 @@ class BoostLearnTask{
|
||||
save_period = 0;
|
||||
eval_train = 0;
|
||||
pred_margin = 0;
|
||||
ntree_limit = 0;
|
||||
dump_model_stats = 0;
|
||||
task = "train";
|
||||
model_in = "NULL";
|
||||
@ -186,7 +188,7 @@ class BoostLearnTask{
|
||||
inline void TaskPred(void) {
|
||||
std::vector<float> preds;
|
||||
if (!silent) printf("start prediction...\n");
|
||||
learner.Predict(*data, pred_margin != 0, &preds);
|
||||
learner.Predict(*data, pred_margin != 0, &preds, ntree_limit);
|
||||
if (!silent) printf("writing prediction to %s\n", name_pred.c_str());
|
||||
FILE *fo = utils::FopenCheck(name_pred.c_str(), "w");
|
||||
for (size_t i = 0; i < preds.size(); i++) {
|
||||
@ -217,6 +219,8 @@ class BoostLearnTask{
|
||||
std::string task;
|
||||
/*! \brief name of predict file */
|
||||
std::string name_pred;
|
||||
/*!\brief limit number of trees in prediction */
|
||||
int ntree_limit;
|
||||
/*!\brief whether to directly output margin value */
|
||||
int pred_margin;
|
||||
/*! \brief whether dump statistics along with model */
|
||||
|
||||
@ -365,7 +365,7 @@ class Booster:
|
||||
return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
|
||||
def eval(self, mat, name = 'eval', it = 0):
|
||||
return self.eval_set( [(mat,name)], it)
|
||||
def predict(self, data, output_margin=False):
|
||||
def predict(self, data, output_margin=False, ntree_limit=0):
|
||||
"""
|
||||
predict with data
|
||||
Args:
|
||||
@ -373,12 +373,14 @@ class Booster:
|
||||
the dmatrix storing the input
|
||||
output_margin: bool
|
||||
whether output raw margin value that is untransformed
|
||||
|
||||
ntree_limit: limit number of trees in prediction, default to 0, 0 means using all the trees
|
||||
Returns:
|
||||
numpy array of prediction
|
||||
"""
|
||||
length = ctypes.c_ulong()
|
||||
preds = xglib.XGBoosterPredict(self.handle, data.handle,
|
||||
int(output_margin), ctypes.byref(length))
|
||||
int(output_margin), ntree_limit, ctypes.byref(length))
|
||||
return ctypes2numpy(preds, length.value, 'float32')
|
||||
def save_model(self, fname):
|
||||
""" save model to file
|
||||
|
||||
@ -6,10 +6,14 @@
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
// include all std functions
|
||||
using namespace std;
|
||||
|
||||
#include "./xgboost_wrapper.h"
|
||||
#include "../src/data.h"
|
||||
#include "../src/learner/learner-inl.hpp"
|
||||
#include "../src/io/io.h"
|
||||
#include "../src/utils/utils.h"
|
||||
#include "../src/io/simple_dmatrix-inl.hpp"
|
||||
|
||||
using namespace xgboost;
|
||||
@ -25,11 +29,11 @@ class Booster: public learner::BoostLearner {
|
||||
this->init_model = false;
|
||||
this->SetCacheData(mats);
|
||||
}
|
||||
const float *Pred(const DataMatrix &dmat, int output_margin, bst_ulong *len) {
|
||||
inline const float *Pred(const DataMatrix &dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
|
||||
this->CheckInitModel();
|
||||
this->Predict(dmat, output_margin != 0, &this->preds_);
|
||||
this->Predict(dmat, output_margin != 0, &this->preds_, ntree_limit);
|
||||
*len = static_cast<bst_ulong>(this->preds_.size());
|
||||
return &this->preds_[0];
|
||||
return BeginPtr(this->preds_);
|
||||
}
|
||||
inline void BoostOneIter(const DataMatrix &train,
|
||||
float *grad, float *hess, bst_ulong len) {
|
||||
@ -57,7 +61,7 @@ class Booster: public learner::BoostLearner {
|
||||
model_dump_cptr[i] = model_dump[i].c_str();
|
||||
}
|
||||
*len = static_cast<bst_ulong>(model_dump.size());
|
||||
return &model_dump_cptr[0];
|
||||
return BeginPtr(model_dump_cptr);
|
||||
}
|
||||
// temporal fields
|
||||
// temporal data to save evaluation dump
|
||||
@ -174,13 +178,13 @@ extern "C"{
|
||||
std::vector<float> &vec =
|
||||
static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
|
||||
vec.resize(len);
|
||||
memcpy(&vec[0], info, sizeof(float) * len);
|
||||
memcpy(BeginPtr(vec), info, sizeof(float) * len);
|
||||
}
|
||||
void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) {
|
||||
std::vector<unsigned> &vec =
|
||||
static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
|
||||
vec.resize(len);
|
||||
memcpy(&vec[0], info, sizeof(unsigned) * len);
|
||||
memcpy(BeginPtr(vec), info, sizeof(unsigned) * len);
|
||||
}
|
||||
void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) {
|
||||
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
|
||||
@ -194,13 +198,13 @@ extern "C"{
|
||||
const std::vector<float> &vec =
|
||||
static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field);
|
||||
*len = static_cast<bst_ulong>(vec.size());
|
||||
return &vec[0];
|
||||
return BeginPtr(vec);
|
||||
}
|
||||
const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) {
|
||||
const std::vector<unsigned> &vec =
|
||||
static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
|
||||
*len = static_cast<bst_ulong>(vec.size());
|
||||
return &vec[0];
|
||||
return BeginPtr(vec);
|
||||
}
|
||||
bst_ulong XGDMatrixNumRow(const void *handle) {
|
||||
return static_cast<bst_ulong>(static_cast<const DataMatrix*>(handle)->info.num_row());
|
||||
@ -249,8 +253,8 @@ extern "C"{
|
||||
bst->eval_str = bst->EvalOneIter(iter, mats, names);
|
||||
return bst->eval_str.c_str();
|
||||
}
|
||||
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len) {
|
||||
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
|
||||
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
|
||||
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, ntree_limit, len);
|
||||
}
|
||||
void XGBoosterLoadModel(void *handle, const char *fname) {
|
||||
static_cast<Booster*>(handle)->LoadModel(fname);
|
||||
|
||||
@ -165,9 +165,11 @@ extern "C" {
|
||||
* \param handle handle
|
||||
* \param dmat data matrix
|
||||
* \param output_margin whether only output raw margin value
|
||||
* \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
|
||||
* when the parameter is set to 0, we will use all the trees
|
||||
* \param len used to store length of returning result
|
||||
*/
|
||||
XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len);
|
||||
XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len);
|
||||
/*!
|
||||
* \brief load model from existing file
|
||||
* \param handle handle
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user