t push origin unityMerge branch 'master' into unity
This commit is contained in:
commit
76c513b191
5
.gitignore
vendored
5
.gitignore
vendored
@ -6,12 +6,15 @@
|
|||||||
# Compiled Dynamic libraries
|
# Compiled Dynamic libraries
|
||||||
*.so
|
*.so
|
||||||
*.dylib
|
*.dylib
|
||||||
|
*.page
|
||||||
# Compiled Static libraries
|
# Compiled Static libraries
|
||||||
*.lai
|
*.lai
|
||||||
*.la
|
*.la
|
||||||
*.a
|
*.a
|
||||||
*~
|
*~
|
||||||
|
*.Rcheck
|
||||||
|
*.rds
|
||||||
|
*.tar.gz
|
||||||
*txt*
|
*txt*
|
||||||
*conf
|
*conf
|
||||||
*buffer
|
*buffer
|
||||||
|
|||||||
36
Makefile
36
Makefile
@ -15,7 +15,7 @@ BIN = xgboost
|
|||||||
OBJ = updater.o gbm.o io.o
|
OBJ = updater.o gbm.o io.o
|
||||||
SLIB = wrapper/libxgboostwrapper.so
|
SLIB = wrapper/libxgboostwrapper.so
|
||||||
|
|
||||||
.PHONY: clean all python
|
.PHONY: clean all python Rpack
|
||||||
|
|
||||||
all: $(BIN) $(OBJ) $(SLIB)
|
all: $(BIN) $(OBJ) $(SLIB)
|
||||||
|
|
||||||
@ -40,19 +40,25 @@ $(OBJ) :
|
|||||||
install:
|
install:
|
||||||
cp -f -r $(BIN) $(INSTALL_PATH)
|
cp -f -r $(BIN) $(INSTALL_PATH)
|
||||||
|
|
||||||
R-package.tar.gz:
|
Rpack:
|
||||||
rm -rf xgboost-R
|
make clean
|
||||||
cp -r R-package xgboost-R
|
rm -rf xgboost xgboost*.tar.gz
|
||||||
rm -rf xgboost-R/src/*.o xgboost-R/src/*.so xgboost-R/src/*.dll
|
cp -r R-package xgboost
|
||||||
cp -r src xgboost-R/src/src
|
rm -rf xgboost/inst/examples/*.buffer
|
||||||
mkdir xgboost-R/src/wrapper
|
rm -rf xgboost/inst/examples/*.model
|
||||||
cp wrapper/xgboost_wrapper.h xgboost-R/src/wrapper
|
rm -rf xgboost/inst/examples/dump*
|
||||||
cp wrapper/xgboost_wrapper.cpp xgboost-R/src/wrapper
|
rm -rf xgboost/src/*.o xgboost/src/*.so xgboost/src/*.dll
|
||||||
cp ./LICENSE xgboost-R
|
rm -rf xgboost/demo/*.model xgboost/demo/*.buffer
|
||||||
cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost-R/src/Makevars
|
cp -r src xgboost/src/src
|
||||||
cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost-R/src/Makevars.win
|
mkdir xgboost/src/wrapper
|
||||||
tar czf $@ xgboost-R
|
cp wrapper/xgboost_wrapper.h xgboost/src/wrapper
|
||||||
rm -rf xgboost-R
|
cp wrapper/xgboost_wrapper.cpp xgboost/src/wrapper
|
||||||
|
cp ./LICENSE xgboost
|
||||||
|
cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars
|
||||||
|
cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.win
|
||||||
|
R CMD build xgboost
|
||||||
|
rm -rf xgboost
|
||||||
|
R CMD check --as-cran xgboost*.tar.gz
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
$(RM) $(OBJ) $(BIN) $(SLIB) *.o *~ */*~ */*/*~
|
$(RM) $(OBJ) $(BIN) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~
|
||||||
|
|||||||
@ -1,12 +1,20 @@
|
|||||||
Package: xgboost
|
Package: xgboost
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: R wrapper of xgboost
|
Title: eXtreme Gradient Boosting
|
||||||
Version: 0.3-0
|
Version: 0.3-0
|
||||||
Date: 2014-08-23
|
Date: 2014-08-23
|
||||||
Author: Tianqi Chen, Tong He
|
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>
|
||||||
Maintainer: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>
|
Maintainer: Tong He <hetong007@gmail.com>
|
||||||
Description: xgboost
|
Description: This package is a R wrapper of xgboost, which is short for eXtreme
|
||||||
License: file LICENSE
|
Gradient Boosting. It is an efficient and scalable implementation of
|
||||||
|
gradient boosting framework. The package includes efficient linear model
|
||||||
|
solver and tree learning algorithm. The package can automatically do
|
||||||
|
parallel computation with OpenMP, and it can be more than 10 times faster
|
||||||
|
than existing gradient boosting packages such as gbm. It supports various
|
||||||
|
objective functions, including regression, classification and ranking. The
|
||||||
|
package is made to be extensible, so that user are also allowed to define
|
||||||
|
their own objectives easily.
|
||||||
|
License: Apache License (== 2.0) | file LICENSE
|
||||||
URL: https://github.com/tqchen/xgboost
|
URL: https://github.com/tqchen/xgboost
|
||||||
BugReports: https://github.com/tqchen/xgboost/issues
|
BugReports: https://github.com/tqchen/xgboost/issues
|
||||||
Depends:
|
Depends:
|
||||||
|
|||||||
@ -10,5 +10,6 @@ export(xgb.save)
|
|||||||
export(xgb.train)
|
export(xgb.train)
|
||||||
export(xgboost)
|
export(xgboost)
|
||||||
exportMethods(predict)
|
exportMethods(predict)
|
||||||
|
import(methods)
|
||||||
importClassesFrom(Matrix,dgCMatrix)
|
importClassesFrom(Matrix,dgCMatrix)
|
||||||
importClassesFrom(Matrix,dgeMatrix)
|
importClassesFrom(Matrix,dgeMatrix)
|
||||||
|
|||||||
@ -4,20 +4,23 @@ setClass('xgb.DMatrix')
|
|||||||
#'
|
#'
|
||||||
#' Get information of an xgb.DMatrix object
|
#' Get information of an xgb.DMatrix object
|
||||||
#'
|
#'
|
||||||
#' @param object Object of class "xgb.DMatrix"
|
|
||||||
#' @param name the name of the field to get
|
|
||||||
#'
|
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(iris)
|
#' data(iris)
|
||||||
#' iris[,5] <- as.numeric(iris[,5])
|
#' iris[,5] <- as.numeric(iris[,5])
|
||||||
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
|
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
|
||||||
#' labels <- getinfo(dtrain, "label")
|
#' labels <- getinfo(dtrain, "label")
|
||||||
|
#' @rdname getinfo
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
#'
|
||||||
getinfo <- function(object, ...){
|
getinfo <- function(object, ...){
|
||||||
UseMethod("getinfo")
|
UseMethod("getinfo")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#' @param object Object of class "xgb.DMatrix"
|
||||||
|
#' @param name the name of the field to get
|
||||||
|
#' @param ... other parameters
|
||||||
|
#' @rdname getinfo
|
||||||
|
#' @method getinfo xgb.DMatrix
|
||||||
setMethod("getinfo", signature = "xgb.DMatrix",
|
setMethod("getinfo", signature = "xgb.DMatrix",
|
||||||
definition = function(object, name) {
|
definition = function(object, name) {
|
||||||
if (typeof(name) != "character") {
|
if (typeof(name) != "character") {
|
||||||
|
|||||||
@ -11,7 +11,8 @@ setClass("xgb.Booster")
|
|||||||
#' value of sum of functions, when outputmargin=TRUE, the prediction is
|
#' value of sum of functions, when outputmargin=TRUE, the prediction is
|
||||||
#' untransformed margin value. In logistic regression, outputmargin=T will
|
#' untransformed margin value. In logistic regression, outputmargin=T will
|
||||||
#' output value before logistic transformation.
|
#' output value before logistic transformation.
|
||||||
#'
|
#' @param ntreelimit limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear.
|
||||||
|
#' set it to be value bigger than 0. It will use all trees by default.
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(iris)
|
#' data(iris)
|
||||||
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
|
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
|
||||||
@ -19,11 +20,18 @@ setClass("xgb.Booster")
|
|||||||
#' @export
|
#' @export
|
||||||
#'
|
#'
|
||||||
setMethod("predict", signature = "xgb.Booster",
|
setMethod("predict", signature = "xgb.Booster",
|
||||||
definition = function(object, newdata, outputmargin = FALSE) {
|
definition = function(object, newdata, outputmargin = FALSE, ntreelimit = NULL) {
|
||||||
if (class(newdata) != "xgb.DMatrix") {
|
if (class(newdata) != "xgb.DMatrix") {
|
||||||
newdata <- xgb.DMatrix(newdata)
|
newdata <- xgb.DMatrix(newdata)
|
||||||
}
|
}
|
||||||
ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), PACKAGE = "xgboost")
|
if (is.null(ntreelimit)) {
|
||||||
|
ntreelimit <- 0
|
||||||
|
} else {
|
||||||
|
if (ntreelimit < 1){
|
||||||
|
stop("predict: ntreelimit must be equal to or greater than 1")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), as.integer(ntreelimit), PACKAGE = "xgboost")
|
||||||
return(ret)
|
return(ret)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@ -6,22 +6,25 @@ setClass('xgb.DMatrix')
|
|||||||
#' Get a new DMatrix containing the specified rows of
|
#' Get a new DMatrix containing the specified rows of
|
||||||
#' orginal xgb.DMatrix object
|
#' orginal xgb.DMatrix object
|
||||||
#'
|
#'
|
||||||
#' @param object Object of class "xgb.DMatrix"
|
|
||||||
#' @param idxset a integer vector of indices of rows needed
|
|
||||||
#'
|
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(iris)
|
#' data(iris)
|
||||||
#' iris[,5] <- as.numeric(iris[,5])
|
#' iris[,5] <- as.numeric(iris[,5])
|
||||||
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
|
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
|
||||||
#' dsub <- slice(dtrain, 1:3)
|
#' dsub <- slice(dtrain, 1:3)
|
||||||
|
#' @rdname slice
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
#'
|
||||||
slice <- function(object, ...){
|
slice <- function(object, ...){
|
||||||
UseMethod("slice")
|
UseMethod("slice")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#' @param object Object of class "xgb.DMatrix"
|
||||||
|
#' @param idxset a integer vector of indices of rows needed
|
||||||
|
#' @param ... other parameters
|
||||||
|
#' @rdname slice
|
||||||
|
#' @method slice xgb.DMatrix
|
||||||
setMethod("slice", signature = "xgb.DMatrix",
|
setMethod("slice", signature = "xgb.DMatrix",
|
||||||
definition = function(object, idxset) {
|
definition = function(object, idxset, ...) {
|
||||||
if (class(object) != "xgb.DMatrix") {
|
if (class(object) != "xgb.DMatrix") {
|
||||||
stop("slice: first argument dtrain must be xgb.DMatrix")
|
stop("slice: first argument dtrain must be xgb.DMatrix")
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
#' @importClassesFrom Matrix dgCMatrix dgeMatrix
|
#' @importClassesFrom Matrix dgCMatrix dgeMatrix
|
||||||
|
#' @import methods
|
||||||
|
|
||||||
# depends on matrix
|
# depends on matrix
|
||||||
.onLoad <- function(libname, pkgname) {
|
.onLoad <- function(libname, pkgname) {
|
||||||
@ -48,7 +49,6 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost")
|
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost")
|
||||||
.Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE = "xgboost")
|
|
||||||
if (length(params) != 0) {
|
if (length(params) != 0) {
|
||||||
for (i in 1:length(params)) {
|
for (i in 1:length(params)) {
|
||||||
p <- params[i]
|
p <- params[i]
|
||||||
@ -121,8 +121,8 @@ xgb.iter.eval <- function(booster, watchlist, iter) {
|
|||||||
stop("xgb.eval: watch list can only contain xgb.DMatrix")
|
stop("xgb.eval: watch list can only contain xgb.DMatrix")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
evnames <- list()
|
|
||||||
if (length(watchlist) != 0) {
|
if (length(watchlist) != 0) {
|
||||||
|
evnames <- list()
|
||||||
for (i in 1:length(watchlist)) {
|
for (i in 1:length(watchlist)) {
|
||||||
w <- watchlist[i]
|
w <- watchlist[i]
|
||||||
if (length(names(w)) == 0) {
|
if (length(names(w)) == 0) {
|
||||||
@ -130,8 +130,10 @@ xgb.iter.eval <- function(booster, watchlist, iter) {
|
|||||||
}
|
}
|
||||||
evnames <- append(evnames, names(w))
|
evnames <- append(evnames, names(w))
|
||||||
}
|
}
|
||||||
|
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
|
||||||
|
evnames, PACKAGE = "xgboost")
|
||||||
|
} else {
|
||||||
|
msg <- ""
|
||||||
}
|
}
|
||||||
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
|
|
||||||
evnames, PACKAGE = "xgboost")
|
|
||||||
return(msg)
|
return(msg)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
#'
|
#'
|
||||||
#' Save xgb.DMatrix object to binary file
|
#' Save xgb.DMatrix object to binary file
|
||||||
#'
|
#'
|
||||||
#' @param model the model object.
|
#' @param DMatrix the model object.
|
||||||
#' @param fname the name of the binary file.
|
#' @param fname the name of the binary file.
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
|
|||||||
@ -4,9 +4,12 @@
|
|||||||
#'
|
#'
|
||||||
#' @param model the model object.
|
#' @param model the model object.
|
||||||
#' @param fname the name of the binary file.
|
#' @param fname the name of the binary file.
|
||||||
#' @param fmap feature map file representing the type of feature, to make it
|
#' @param fmap feature map file representing the type of feature.
|
||||||
#' look nice, run demo/demo.R for result and demo/featmap.txt for example
|
#' Detailed description could be found at
|
||||||
#' Format: https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model
|
#' \url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}.
|
||||||
|
#' Run inst/examples/demo.R for the result and inst/examples/featmap.txt
|
||||||
|
#' for example Format.
|
||||||
|
#'
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(iris)
|
#' data(iris)
|
||||||
|
|||||||
@ -15,7 +15,7 @@
|
|||||||
#' }
|
#' }
|
||||||
#'
|
#'
|
||||||
#' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
#' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
||||||
#' further details. See also demo/demo.R for walkthrough example in R.
|
#' further details. See also inst/examples/demo.R for walkthrough example in R.
|
||||||
#' @param dtrain takes an \code{xgb.DMatrix} as the input.
|
#' @param dtrain takes an \code{xgb.DMatrix} as the input.
|
||||||
#' @param nrounds the max number of iterations
|
#' @param nrounds the max number of iterations
|
||||||
#' @param watchlist what information should be printed when \code{verbose=1} or
|
#' @param watchlist what information should be printed when \code{verbose=1} or
|
||||||
@ -24,10 +24,11 @@
|
|||||||
#' watchlist=list(validation1=mat1, validation2=mat2) to watch
|
#' watchlist=list(validation1=mat1, validation2=mat2) to watch
|
||||||
#' the performance of each round's model on mat1 and mat2
|
#' the performance of each round's model on mat1 and mat2
|
||||||
#'
|
#'
|
||||||
#' @param obj customized objective function. Given prediction and dtrain,
|
#' @param obj customized objective function. Returns gradient and second order
|
||||||
#' return gradient and second order gradient.
|
#' gradient with given prediction and dtrain,
|
||||||
#' @param feval custimized evaluation function. Given prediction and dtrain,
|
#' @param feval custimized evaluation function. Returns
|
||||||
#' return a \code{list(metric='metric-name', value='metric-value')}.
|
#' \code{list(metric='metric-name', value='metric-value')} with given
|
||||||
|
#' prediction and dtrain,
|
||||||
#' @param ... other parameters to pass to \code{params}.
|
#' @param ... other parameters to pass to \code{params}.
|
||||||
#'
|
#'
|
||||||
#' @details
|
#' @details
|
||||||
|
|||||||
@ -19,7 +19,7 @@
|
|||||||
#' }
|
#' }
|
||||||
#'
|
#'
|
||||||
#' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
#' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
||||||
#' further details. See also demo/demo.R for walkthrough example in R.
|
#' further details. See also inst/examples/demo.R for walkthrough example in R.
|
||||||
#' @param nrounds the max number of iterations
|
#' @param nrounds the max number of iterations
|
||||||
#' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print
|
#' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print
|
||||||
#' information of performance. If 2, xgboost will print information of both
|
#' information of performance. If 2, xgboost will print information of both
|
||||||
|
|||||||
@ -1,10 +1,21 @@
|
|||||||
This is subfolder for experimental version of R package.
|
# R package for xgboost.
|
||||||
|
|
||||||
Installation:
|
## Installation
|
||||||
|
|
||||||
|
For up-to-date version(which is recommended), please install from github. Windows user will need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first.
|
||||||
|
|
||||||
```r
|
```r
|
||||||
require(devtools)
|
require(devtools)
|
||||||
install_github('xgboost','tqchen',subdir='R-package')
|
install_github('xgboost','tqchen',subdir='R-package')
|
||||||
```
|
```
|
||||||
|
|
||||||
Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/demo/demo.R) for more details.
|
For stable version on CRAN, please run
|
||||||
|
|
||||||
|
```r
|
||||||
|
install.packages('xgboost')
|
||||||
|
```
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
* Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/inst/examples/demo.R) for walk throughe example.
|
||||||
|
* See also the [example scripts](https://github.com/tqchen/xgboost/tree/master/demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/speedtest.R) on this dataset.
|
||||||
|
|||||||
@ -1 +0,0 @@
|
|||||||
demo R code for xgboost usages on agaricus data
|
|
||||||
@ -1,14 +1,20 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||||
|
\docType{methods}
|
||||||
\name{getinfo}
|
\name{getinfo}
|
||||||
\alias{getinfo}
|
\alias{getinfo}
|
||||||
|
\alias{getinfo,xgb.DMatrix-method}
|
||||||
\title{Get information of an xgb.DMatrix object}
|
\title{Get information of an xgb.DMatrix object}
|
||||||
\usage{
|
\usage{
|
||||||
getinfo(object, ...)
|
getinfo(object, ...)
|
||||||
|
|
||||||
|
\S4method{getinfo}{xgb.DMatrix}(object, name)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{object}{Object of class "xgb.DMatrix"}
|
\item{object}{Object of class "xgb.DMatrix"}
|
||||||
|
|
||||||
\item{name}{the name of the field to get}
|
\item{name}{the name of the field to get}
|
||||||
|
|
||||||
|
\item{...}{other parameters}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Get information of an xgb.DMatrix object
|
Get information of an xgb.DMatrix object
|
||||||
|
|||||||
@ -4,7 +4,8 @@
|
|||||||
\alias{predict,xgb.Booster-method}
|
\alias{predict,xgb.Booster-method}
|
||||||
\title{Predict method for eXtreme Gradient Boosting model}
|
\title{Predict method for eXtreme Gradient Boosting model}
|
||||||
\usage{
|
\usage{
|
||||||
\S4method{predict}{xgb.Booster}(object, newdata, outputmargin = FALSE)
|
\S4method{predict}{xgb.Booster}(object, newdata, outputmargin = FALSE,
|
||||||
|
ntreelimit = NULL)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{object}{Object of class "xgb.Boost"}
|
\item{object}{Object of class "xgb.Boost"}
|
||||||
@ -13,9 +14,12 @@
|
|||||||
\code{xgb.DMatrix}.}
|
\code{xgb.DMatrix}.}
|
||||||
|
|
||||||
\item{outputmargin}{whether the prediction should be shown in the original
|
\item{outputmargin}{whether the prediction should be shown in the original
|
||||||
value of sum of functions, when outputmargin=TRUE, the prediction is
|
value of sum of functions, when outputmargin=TRUE, the prediction is
|
||||||
untransformed margin value. In logistic regression, outputmargin=T will
|
untransformed margin value. In logistic regression, outputmargin=T will
|
||||||
output value before logistic transformation.}
|
output value before logistic transformation.}
|
||||||
|
|
||||||
|
\item{ntreelimit}{limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear.
|
||||||
|
set it to be value bigger than 0. It will use all trees by default.}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Predicted values based on xgboost model object.
|
Predicted values based on xgboost model object.
|
||||||
|
|||||||
@ -1,15 +1,21 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||||
|
\docType{methods}
|
||||||
\name{slice}
|
\name{slice}
|
||||||
\alias{slice}
|
\alias{slice}
|
||||||
|
\alias{slice,xgb.DMatrix-method}
|
||||||
\title{Get a new DMatrix containing the specified rows of
|
\title{Get a new DMatrix containing the specified rows of
|
||||||
orginal xgb.DMatrix object}
|
orginal xgb.DMatrix object}
|
||||||
\usage{
|
\usage{
|
||||||
slice(object, ...)
|
slice(object, ...)
|
||||||
|
|
||||||
|
\S4method{slice}{xgb.DMatrix}(object, idxset, ...)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{object}{Object of class "xgb.DMatrix"}
|
\item{object}{Object of class "xgb.DMatrix"}
|
||||||
|
|
||||||
\item{idxset}{a integer vector of indices of rows needed}
|
\item{idxset}{a integer vector of indices of rows needed}
|
||||||
|
|
||||||
|
\item{...}{other parameters}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Get a new DMatrix containing the specified rows of
|
Get a new DMatrix containing the specified rows of
|
||||||
|
|||||||
@ -6,7 +6,7 @@
|
|||||||
xgb.DMatrix.save(DMatrix, fname)
|
xgb.DMatrix.save(DMatrix, fname)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{model}{the model object.}
|
\item{DMatrix}{the model object.}
|
||||||
|
|
||||||
\item{fname}{the name of the binary file.}
|
\item{fname}{the name of the binary file.}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -10,9 +10,11 @@ xgb.dump(model, fname, fmap = "")
|
|||||||
|
|
||||||
\item{fname}{the name of the binary file.}
|
\item{fname}{the name of the binary file.}
|
||||||
|
|
||||||
\item{fmap}{feature map file representing the type of feature, to make it
|
\item{fmap}{feature map file representing the type of feature.
|
||||||
look nice, run demo/demo.R for result and demo/featmap.txt for example
|
Detailed description could be found at
|
||||||
Format: https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}
|
\url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}.
|
||||||
|
Run inst/examples/demo.R for the result and inst/examples/featmap.txt
|
||||||
|
for example Format.}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Save a xgboost model to text file. Could be parsed later.
|
Save a xgboost model to text file. Could be parsed later.
|
||||||
|
|||||||
@ -20,7 +20,7 @@ xgb.train(params = list(), dtrain, nrounds, watchlist = list(),
|
|||||||
}
|
}
|
||||||
|
|
||||||
See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
||||||
further details. See also demo/demo.R for walkthrough example in R.}
|
further details. See also inst/examples/demo.R for walkthrough example in R.}
|
||||||
|
|
||||||
\item{dtrain}{takes an \code{xgb.DMatrix} as the input.}
|
\item{dtrain}{takes an \code{xgb.DMatrix} as the input.}
|
||||||
|
|
||||||
@ -32,11 +32,12 @@ xgb.train(params = list(), dtrain, nrounds, watchlist = list(),
|
|||||||
watchlist=list(validation1=mat1, validation2=mat2) to watch
|
watchlist=list(validation1=mat1, validation2=mat2) to watch
|
||||||
the performance of each round's model on mat1 and mat2}
|
the performance of each round's model on mat1 and mat2}
|
||||||
|
|
||||||
\item{obj}{customized objective function. Given prediction and dtrain,
|
\item{obj}{customized objective function. Returns gradient and second order
|
||||||
return gradient and second order gradient.}
|
gradient with given prediction and dtrain,}
|
||||||
|
|
||||||
\item{feval}{custimized evaluation function. Given prediction and dtrain,
|
\item{feval}{custimized evaluation function. Returns
|
||||||
return a \code{list(metric='metric-name', value='metric-value')}.}
|
\code{list(metric='metric-name', value='metric-value')} with given
|
||||||
|
prediction and dtrain,}
|
||||||
|
|
||||||
\item{...}{other parameters to pass to \code{params}.}
|
\item{...}{other parameters to pass to \code{params}.}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -25,7 +25,7 @@ xgboost(data = NULL, label = NULL, params = list(), nrounds,
|
|||||||
}
|
}
|
||||||
|
|
||||||
See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
||||||
further details. See also demo/demo.R for walkthrough example in R.}
|
further details. See also inst/examples/demo.R for walkthrough example in R.}
|
||||||
|
|
||||||
\item{nrounds}{the max number of iterations}
|
\item{nrounds}{the max number of iterations}
|
||||||
|
|
||||||
|
|||||||
@ -1,32 +1,7 @@
|
|||||||
# package root
|
# package root
|
||||||
PKGROOT=../../
|
PKGROOT=../../
|
||||||
# _*_ mode: Makefile; _*_
|
# _*_ mode: Makefile; _*_
|
||||||
CXX=`R CMD config CXX`
|
PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -I$(PKGROOT)
|
||||||
TCFLAGS=`R CMD config CFLAGS`
|
PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS)
|
||||||
# expose these flags to R CMD SHLIB
|
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
|
||||||
PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS)
|
OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o
|
||||||
PKG_CPPFLAGS+= $(SHLIB_PTHREAD_FLAGS)
|
|
||||||
XGBFLAG= $(TCFLAGS) -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
|
|
||||||
|
|
||||||
ifeq ($(no_omp),1)
|
|
||||||
PKG_CPPFLAGS += -DDISABLE_OPENMP
|
|
||||||
endif
|
|
||||||
|
|
||||||
CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o
|
|
||||||
OBJECTS= xgboost_R.o $(CXXOBJ)
|
|
||||||
|
|
||||||
.PHONY: all clean
|
|
||||||
all: $(SHLIB)
|
|
||||||
$(SHLIB): $(OBJECTS)
|
|
||||||
|
|
||||||
xgboost_wrapper.o: $(PKGROOT)/wrapper/xgboost_wrapper.cpp
|
|
||||||
xgboost_io.o: $(PKGROOT)/src/io/io.cpp
|
|
||||||
xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp
|
|
||||||
xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp
|
|
||||||
|
|
||||||
$(CXXOBJ) :
|
|
||||||
$(CXX) -c $(XGBFLAG) -o $@ $(firstword $(filter %.cpp %.c, $^) )
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf *.so *.o *~ *.dll
|
|
||||||
|
|
||||||
|
|||||||
@ -1,33 +1,7 @@
|
|||||||
# package root
|
# package root
|
||||||
PKGROOT=../../
|
PKGROOT=../../
|
||||||
# _*_ mode: Makefile; _*_
|
# _*_ mode: Makefile; _*_
|
||||||
CXX=`Rcmd config CXX`
|
PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -I$(PKGROOT)
|
||||||
TCFLAGS=`Rcmd config CFLAGS`
|
PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS)
|
||||||
# expose these flags to R CMD SHLIB
|
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
|
||||||
PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS)
|
OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o
|
||||||
PKG_CPPFLAGS+= $(SHLIB_PTHREAD_FLAGS)
|
|
||||||
XGBFLAG= -O3 -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
|
|
||||||
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
|
|
||||||
|
|
||||||
ifeq ($(no_omp),1)
|
|
||||||
PKG_CPPFLAGS += -DDISABLE_OPENMP
|
|
||||||
endif
|
|
||||||
|
|
||||||
CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o
|
|
||||||
OBJECTS= xgboost_R.o $(CXXOBJ)
|
|
||||||
|
|
||||||
.PHONY: all clean
|
|
||||||
all: $(SHLIB)
|
|
||||||
$(SHLIB): $(OBJECTS)
|
|
||||||
|
|
||||||
xgboost_wrapper.o: $(PKGROOT)/wrapper/xgboost_wrapper.cpp
|
|
||||||
xgboost_io.o: $(PKGROOT)/src/io/io.cpp
|
|
||||||
xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp
|
|
||||||
xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp
|
|
||||||
|
|
||||||
$(CXXOBJ) :
|
|
||||||
$(CXX) -c $(XGBFLAG) -o $@ $(firstword $(filter %.cpp %.c, $^) )
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf *.so *.o *~ *.dll
|
|
||||||
|
|
||||||
|
|||||||
@ -2,25 +2,55 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <cstdio>
|
||||||
#include "xgboost_R.h"
|
#include "xgboost_R.h"
|
||||||
#include "wrapper/xgboost_wrapper.h"
|
#include "wrapper/xgboost_wrapper.h"
|
||||||
#include "src/utils/utils.h"
|
#include "src/utils/utils.h"
|
||||||
#include "src/utils/omp.h"
|
#include "src/utils/omp.h"
|
||||||
#include "src/utils/matrix_csr.h"
|
#include "src/utils/matrix_csr.h"
|
||||||
|
using namespace std;
|
||||||
using namespace xgboost;
|
using namespace xgboost;
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
void XGBoostAssert_R(int exp, const char *fmt, ...);
|
||||||
|
void XGBoostCheck_R(int exp, const char *fmt, ...);
|
||||||
|
int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...);
|
||||||
|
}
|
||||||
|
|
||||||
// implements error handling
|
// implements error handling
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace utils {
|
namespace utils {
|
||||||
void HandleAssertError(const char *msg) {
|
extern "C" {
|
||||||
error("%s", msg);
|
void (*Printf)(const char *fmt, ...) = Rprintf;
|
||||||
}
|
int (*SPrintf)(char *buf, size_t size, const char *fmt, ...) = XGBoostSPrintf_R;
|
||||||
void HandleCheckError(const char *msg) {
|
void (*Assert)(int exp, const char *fmt, ...) = XGBoostAssert_R;
|
||||||
error("%s", msg);
|
void (*Check)(int exp, const char *fmt, ...) = XGBoostCheck_R;
|
||||||
|
void (*Error)(const char *fmt, ...) = error;
|
||||||
}
|
}
|
||||||
} // namespace utils
|
} // namespace utils
|
||||||
|
|
||||||
|
namespace random {
|
||||||
|
void Seed(unsigned seed) {
|
||||||
|
warning("parameter seed is ignored, please set random seed using set.seed");
|
||||||
|
}
|
||||||
|
double Uniform(void) {
|
||||||
|
return unif_rand();
|
||||||
|
}
|
||||||
|
double Normal(void) {
|
||||||
|
return norm_rand();
|
||||||
|
}
|
||||||
|
} // namespace random
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|
||||||
|
// call before wrapper starts
|
||||||
|
inline void _WrapperBegin(void) {
|
||||||
|
GetRNGstate();
|
||||||
|
}
|
||||||
|
// call after wrapper starts
|
||||||
|
inline void _WrapperEnd(void) {
|
||||||
|
PutRNGstate();
|
||||||
|
}
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
void _DMatrixFinalizer(SEXP ext) {
|
void _DMatrixFinalizer(SEXP ext) {
|
||||||
if (R_ExternalPtrAddr(ext) == NULL) return;
|
if (R_ExternalPtrAddr(ext) == NULL) return;
|
||||||
@ -28,14 +58,17 @@ extern "C" {
|
|||||||
R_ClearExternalPtr(ext);
|
R_ClearExternalPtr(ext);
|
||||||
}
|
}
|
||||||
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
|
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
|
||||||
|
_WrapperBegin();
|
||||||
void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
|
void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
|
||||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||||
UNPROTECT(1);
|
UNPROTECT(1);
|
||||||
|
_WrapperEnd();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
|
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
|
||||||
SEXP missing) {
|
SEXP missing) {
|
||||||
|
_WrapperBegin();
|
||||||
SEXP dim = getAttrib(mat, R_DimSymbol);
|
SEXP dim = getAttrib(mat, R_DimSymbol);
|
||||||
int nrow = INTEGER(dim)[0];
|
int nrow = INTEGER(dim)[0];
|
||||||
int ncol = INTEGER(dim)[1];
|
int ncol = INTEGER(dim)[1];
|
||||||
@ -47,15 +80,17 @@ extern "C" {
|
|||||||
data[i * ncol +j] = din[i + nrow * j];
|
data[i * ncol +j] = din[i + nrow * j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void *handle = XGDMatrixCreateFromMat(&data[0], nrow, ncol, asReal(missing));
|
void *handle = XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing));
|
||||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||||
UNPROTECT(1);
|
UNPROTECT(1);
|
||||||
|
_WrapperEnd();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
|
SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
|
||||||
SEXP indices,
|
SEXP indices,
|
||||||
SEXP data) {
|
SEXP data) {
|
||||||
|
_WrapperBegin();
|
||||||
const int *col_ptr = INTEGER(indptr);
|
const int *col_ptr = INTEGER(indptr);
|
||||||
const int *row_index = INTEGER(indices);
|
const int *row_index = INTEGER(indices);
|
||||||
const double *col_data = REAL(data);
|
const double *col_data = REAL(data);
|
||||||
@ -85,29 +120,36 @@ extern "C" {
|
|||||||
col_index[i] = csr_data[i].first;
|
col_index[i] = csr_data[i].first;
|
||||||
row_data[i] = csr_data[i].second;
|
row_data[i] = csr_data[i].second;
|
||||||
}
|
}
|
||||||
void *handle = XGDMatrixCreateFromCSR(&row_ptr[0], &col_index[0], &row_data[0], row_ptr.size(), ndata );
|
void *handle = XGDMatrixCreateFromCSR(BeginPtr(row_ptr), BeginPtr(col_index),
|
||||||
|
BeginPtr(row_data), row_ptr.size(), ndata );
|
||||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||||
UNPROTECT(1);
|
UNPROTECT(1);
|
||||||
|
_WrapperEnd();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
|
SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
|
||||||
|
_WrapperBegin();
|
||||||
int len = length(idxset);
|
int len = length(idxset);
|
||||||
std::vector<int> idxvec(len);
|
std::vector<int> idxvec(len);
|
||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
idxvec[i] = INTEGER(idxset)[i] - 1;
|
idxvec[i] = INTEGER(idxset)[i] - 1;
|
||||||
}
|
}
|
||||||
void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), &idxvec[0], len);
|
void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), BeginPtr(idxvec), len);
|
||||||
SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
|
SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
|
||||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||||
UNPROTECT(1);
|
UNPROTECT(1);
|
||||||
|
_WrapperEnd();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
|
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
|
||||||
|
_WrapperBegin();
|
||||||
XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
|
XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
|
||||||
CHAR(asChar(fname)), asInteger(silent));
|
CHAR(asChar(fname)), asInteger(silent));
|
||||||
|
_WrapperEnd();
|
||||||
}
|
}
|
||||||
void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
|
void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
|
||||||
|
_WrapperBegin();
|
||||||
int len = length(array);
|
int len = length(array);
|
||||||
const char *name = CHAR(asChar(field));
|
const char *name = CHAR(asChar(field));
|
||||||
if (!strcmp("group", name)) {
|
if (!strcmp("group", name)) {
|
||||||
@ -116,7 +158,8 @@ extern "C" {
|
|||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
|
vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
|
||||||
}
|
}
|
||||||
XGDMatrixSetGroup(R_ExternalPtrAddr(handle), &vec[0], len);
|
XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len);
|
||||||
|
_WrapperEnd();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
@ -127,10 +170,12 @@ extern "C" {
|
|||||||
}
|
}
|
||||||
XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
|
XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
|
||||||
CHAR(asChar(field)),
|
CHAR(asChar(field)),
|
||||||
&vec[0], len);
|
BeginPtr(vec), len);
|
||||||
}
|
}
|
||||||
|
_WrapperEnd();
|
||||||
}
|
}
|
||||||
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
|
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
|
||||||
|
_WrapperBegin();
|
||||||
bst_ulong olen;
|
bst_ulong olen;
|
||||||
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
|
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
|
||||||
CHAR(asChar(field)), &olen);
|
CHAR(asChar(field)), &olen);
|
||||||
@ -139,6 +184,7 @@ extern "C" {
|
|||||||
REAL(ret)[i] = res[i];
|
REAL(ret)[i] = res[i];
|
||||||
}
|
}
|
||||||
UNPROTECT(1);
|
UNPROTECT(1);
|
||||||
|
_WrapperEnd();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
// functions related to booster
|
// functions related to booster
|
||||||
@ -148,28 +194,35 @@ extern "C" {
|
|||||||
R_ClearExternalPtr(ext);
|
R_ClearExternalPtr(ext);
|
||||||
}
|
}
|
||||||
SEXP XGBoosterCreate_R(SEXP dmats) {
|
SEXP XGBoosterCreate_R(SEXP dmats) {
|
||||||
|
_WrapperBegin();
|
||||||
int len = length(dmats);
|
int len = length(dmats);
|
||||||
std::vector<void*> dvec;
|
std::vector<void*> dvec;
|
||||||
for (int i = 0; i < len; ++i){
|
for (int i = 0; i < len; ++i){
|
||||||
dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
|
dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
|
||||||
}
|
}
|
||||||
void *handle = XGBoosterCreate(&dvec[0], dvec.size());
|
void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size());
|
||||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||||
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
|
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
|
||||||
UNPROTECT(1);
|
UNPROTECT(1);
|
||||||
|
_WrapperEnd();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
|
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
|
||||||
|
_WrapperBegin();
|
||||||
XGBoosterSetParam(R_ExternalPtrAddr(handle),
|
XGBoosterSetParam(R_ExternalPtrAddr(handle),
|
||||||
CHAR(asChar(name)),
|
CHAR(asChar(name)),
|
||||||
CHAR(asChar(val)));
|
CHAR(asChar(val)));
|
||||||
|
_WrapperEnd();
|
||||||
}
|
}
|
||||||
void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
|
void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
|
||||||
|
_WrapperBegin();
|
||||||
XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
|
XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
|
||||||
asInteger(iter),
|
asInteger(iter),
|
||||||
R_ExternalPtrAddr(dtrain));
|
R_ExternalPtrAddr(dtrain));
|
||||||
|
_WrapperEnd();
|
||||||
}
|
}
|
||||||
void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
|
void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
|
||||||
|
_WrapperBegin();
|
||||||
utils::Check(length(grad) == length(hess), "gradient and hess must have same length");
|
utils::Check(length(grad) == length(hess), "gradient and hess must have same length");
|
||||||
int len = length(grad);
|
int len = length(grad);
|
||||||
std::vector<float> tgrad(len), thess(len);
|
std::vector<float> tgrad(len), thess(len);
|
||||||
@ -180,9 +233,11 @@ extern "C" {
|
|||||||
}
|
}
|
||||||
XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
|
XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
|
||||||
R_ExternalPtrAddr(dtrain),
|
R_ExternalPtrAddr(dtrain),
|
||||||
&tgrad[0], &thess[0], len);
|
BeginPtr(tgrad), BeginPtr(thess), len);
|
||||||
|
_WrapperEnd();
|
||||||
}
|
}
|
||||||
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
|
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
|
||||||
|
_WrapperBegin();
|
||||||
utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length");
|
utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length");
|
||||||
int len = length(dmats);
|
int len = length(dmats);
|
||||||
std::vector<void*> vec_dmats;
|
std::vector<void*> vec_dmats;
|
||||||
@ -197,28 +252,37 @@ extern "C" {
|
|||||||
}
|
}
|
||||||
return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
|
return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
|
||||||
asInteger(iter),
|
asInteger(iter),
|
||||||
&vec_dmats[0], &vec_sptr[0], len));
|
BeginPtr(vec_dmats), BeginPtr(vec_sptr), len));
|
||||||
|
_WrapperEnd();
|
||||||
}
|
}
|
||||||
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
|
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit) {
|
||||||
|
_WrapperBegin();
|
||||||
bst_ulong olen;
|
bst_ulong olen;
|
||||||
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
|
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
|
||||||
R_ExternalPtrAddr(dmat),
|
R_ExternalPtrAddr(dmat),
|
||||||
asInteger(output_margin),
|
asInteger(output_margin),
|
||||||
|
asInteger(ntree_limit),
|
||||||
&olen);
|
&olen);
|
||||||
SEXP ret = PROTECT(allocVector(REALSXP, olen));
|
SEXP ret = PROTECT(allocVector(REALSXP, olen));
|
||||||
for (size_t i = 0; i < olen; ++i) {
|
for (size_t i = 0; i < olen; ++i) {
|
||||||
REAL(ret)[i] = res[i];
|
REAL(ret)[i] = res[i];
|
||||||
}
|
}
|
||||||
UNPROTECT(1);
|
UNPROTECT(1);
|
||||||
|
_WrapperEnd();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
|
void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
|
||||||
|
_WrapperBegin();
|
||||||
XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
|
XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
|
||||||
|
_WrapperEnd();
|
||||||
}
|
}
|
||||||
void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
|
void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
|
||||||
|
_WrapperBegin();
|
||||||
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
|
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
|
||||||
|
_WrapperEnd();
|
||||||
}
|
}
|
||||||
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
|
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
|
||||||
|
_WrapperBegin();
|
||||||
bst_ulong olen;
|
bst_ulong olen;
|
||||||
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
|
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
|
||||||
CHAR(asChar(fmap)),
|
CHAR(asChar(fmap)),
|
||||||
@ -229,5 +293,6 @@ extern "C" {
|
|||||||
fprintf(fo, "%s", res[i]);
|
fprintf(fo, "%s", res[i]);
|
||||||
}
|
}
|
||||||
fclose(fo);
|
fclose(fo);
|
||||||
|
_WrapperEnd();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -7,6 +7,7 @@
|
|||||||
*/
|
*/
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#include <Rinternals.h>
|
#include <Rinternals.h>
|
||||||
|
#include <R_ext/Random.h>
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@ -106,8 +107,9 @@ extern "C" {
|
|||||||
* \param handle handle
|
* \param handle handle
|
||||||
* \param dmat data matrix
|
* \param dmat data matrix
|
||||||
* \param output_margin whether only output raw margin value
|
* \param output_margin whether only output raw margin value
|
||||||
|
* \param ntree_limit limit number of trees used in prediction
|
||||||
*/
|
*/
|
||||||
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin);
|
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit);
|
||||||
/*!
|
/*!
|
||||||
* \brief load model from existing file
|
* \brief load model from existing file
|
||||||
* \param handle handle
|
* \param handle handle
|
||||||
|
|||||||
33
R-package/src/xgboost_assert.c
Normal file
33
R-package/src/xgboost_assert.c
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <Rinternals.h>
|
||||||
|
|
||||||
|
// implements error handling
|
||||||
|
void XGBoostAssert_R(int exp, const char *fmt, ...) {
|
||||||
|
char buf[1024];
|
||||||
|
if (exp == 0) {
|
||||||
|
va_list args;
|
||||||
|
va_start(args, fmt);
|
||||||
|
vsprintf(buf, fmt, args);
|
||||||
|
va_end(args);
|
||||||
|
error("AssertError:%s\n", buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void XGBoostCheck_R(int exp, const char *fmt, ...) {
|
||||||
|
char buf[1024];
|
||||||
|
if (exp == 0) {
|
||||||
|
va_list args;
|
||||||
|
va_start(args, fmt);
|
||||||
|
vsprintf(buf, fmt, args);
|
||||||
|
va_end(args);
|
||||||
|
error("%s\n", buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...) {
|
||||||
|
int ret;
|
||||||
|
va_list args;
|
||||||
|
va_start(args, fmt);
|
||||||
|
ret = vsnprintf(buf, size, fmt, args);
|
||||||
|
va_end(args);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
@ -173,7 +173,7 @@ objective function.
|
|||||||
We also have \verb@slice@ for row extraction. It is useful in
|
We also have \verb@slice@ for row extraction. It is useful in
|
||||||
cross-validation.
|
cross-validation.
|
||||||
|
|
||||||
For a walkthrough demo, please see \verb@R-package/demo/demo.R@ for further
|
For a walkthrough demo, please see \verb@R-package/inst/examples/demo.R@ for further
|
||||||
details.
|
details.
|
||||||
|
|
||||||
\section{The Higgs Boson competition}
|
\section{The Higgs Boson competition}
|
||||||
|
|||||||
@ -35,11 +35,11 @@ Version
|
|||||||
======
|
======
|
||||||
* This version xgboost-0.3, the code has been refactored from 0.2x to be cleaner and more flexibility
|
* This version xgboost-0.3, the code has been refactored from 0.2x to be cleaner and more flexibility
|
||||||
* This version of xgboost is not compatible with 0.2x, due to huge amount of changes in code structure
|
* This version of xgboost is not compatible with 0.2x, due to huge amount of changes in code structure
|
||||||
- This means the model and buffer file of previous version can not be loaded in xgboost-unity
|
- This means the model and buffer file of previous version can not be loaded in xgboost-3.0
|
||||||
* For legacy 0.2x code, refer to [Here](https://github.com/tqchen/xgboost/releases/tag/v0.22)
|
* For legacy 0.2x code, refer to [Here](https://github.com/tqchen/xgboost/releases/tag/v0.22)
|
||||||
* Change log in [CHANGES.md](CHANGES.md)
|
* Change log in [CHANGES.md](CHANGES.md)
|
||||||
|
|
||||||
XGBoost in Graphlab Create
|
XGBoost in Graphlab Create
|
||||||
======
|
======
|
||||||
* XGBoost is adopted as part of boosted tree toolkit in Graphlab Create (GLC). Graphlab Create is a powerful python toolkit that allows you to data manipulation, graph processing, hyper-parameter search, and visualization of big data in one framework. Try the Graphlab Create in http://graphlab.com/products/create/quick-start-guide.html
|
* XGBoost is adopted as part of boosted tree toolkit in Graphlab Create (GLC). Graphlab Create is a powerful python toolkit that allows you to data manipulation, graph processing, hyper-parameter search, and visualization of TeraBytes scale data in one framework. Try the Graphlab Create in http://graphlab.com/products/create/quick-start-guide.html
|
||||||
* Nice blogpost by Jay Gu using GLC boosted tree to solve kaggle bike sharing challenge: http://blog.graphlab.com/using-gradient-boosted-trees-to-predict-bike-sharing-demand
|
* Nice blogpost by Jay Gu using GLC boosted tree to solve kaggle bike sharing challenge: http://blog.graphlab.com/using-gradient-boosted-trees-to-predict-bike-sharing-demand
|
||||||
|
|||||||
@ -105,7 +105,10 @@ class GBLinear : public IGradBooster {
|
|||||||
virtual void Predict(IFMatrix *p_fmat,
|
virtual void Predict(IFMatrix *p_fmat,
|
||||||
int64_t buffer_offset,
|
int64_t buffer_offset,
|
||||||
const BoosterInfo &info,
|
const BoosterInfo &info,
|
||||||
std::vector<float> *out_preds) {
|
std::vector<float> *out_preds,
|
||||||
|
unsigned ntree_limit = 0) {
|
||||||
|
utils::Check(ntree_limit == 0,
|
||||||
|
"GBLinear::Predict ntrees is only valid for gbtree predictor");
|
||||||
std::vector<float> &preds = *out_preds;
|
std::vector<float> &preds = *out_preds;
|
||||||
preds.resize(0);
|
preds.resize(0);
|
||||||
// start collecting the prediction
|
// start collecting the prediction
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
#define _CRT_SECURE_NO_WARNINGS
|
#define _CRT_SECURE_NO_WARNINGS
|
||||||
#define _CRT_SECURE_NO_DEPRECATE
|
#define _CRT_SECURE_NO_DEPRECATE
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
using namespace std;
|
||||||
#include "./gbm.h"
|
#include "./gbm.h"
|
||||||
#include "./gbtree-inl.hpp"
|
#include "./gbtree-inl.hpp"
|
||||||
#include "./gblinear-inl.hpp"
|
#include "./gblinear-inl.hpp"
|
||||||
|
|||||||
@ -57,11 +57,14 @@ class IGradBooster {
|
|||||||
* the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size")
|
* the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size")
|
||||||
* \param info extra side information that may be needed for prediction
|
* \param info extra side information that may be needed for prediction
|
||||||
* \param out_preds output vector to hold the predictions
|
* \param out_preds output vector to hold the predictions
|
||||||
|
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
|
||||||
|
* we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
|
||||||
*/
|
*/
|
||||||
virtual void Predict(IFMatrix *p_fmat,
|
virtual void Predict(IFMatrix *p_fmat,
|
||||||
int64_t buffer_offset,
|
int64_t buffer_offset,
|
||||||
const BoosterInfo &info,
|
const BoosterInfo &info,
|
||||||
std::vector<float> *out_preds) = 0;
|
std::vector<float> *out_preds,
|
||||||
|
unsigned ntree_limit = 0) = 0;
|
||||||
/*!
|
/*!
|
||||||
* \brief dump the model in text format
|
* \brief dump the model in text format
|
||||||
* \param fmap feature map that may help give interpretations of feature
|
* \param fmap feature map that may help give interpretations of feature
|
||||||
|
|||||||
@ -105,7 +105,8 @@ class GBTree : public IGradBooster {
|
|||||||
virtual void Predict(IFMatrix *p_fmat,
|
virtual void Predict(IFMatrix *p_fmat,
|
||||||
int64_t buffer_offset,
|
int64_t buffer_offset,
|
||||||
const BoosterInfo &info,
|
const BoosterInfo &info,
|
||||||
std::vector<float> *out_preds) {
|
std::vector<float> *out_preds,
|
||||||
|
unsigned ntree_limit = 0) {
|
||||||
int nthread;
|
int nthread;
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
{
|
{
|
||||||
@ -137,7 +138,8 @@ class GBTree : public IGradBooster {
|
|||||||
this->Pred(batch[i],
|
this->Pred(batch[i],
|
||||||
buffer_offset < 0 ? -1 : buffer_offset + ridx,
|
buffer_offset < 0 ? -1 : buffer_offset + ridx,
|
||||||
gid, info.GetRoot(ridx), &feats,
|
gid, info.GetRoot(ridx), &feats,
|
||||||
&preds[ridx * mparam.num_output_group + gid], stride);
|
&preds[ridx * mparam.num_output_group + gid], stride,
|
||||||
|
ntree_limit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -212,14 +214,16 @@ class GBTree : public IGradBooster {
|
|||||||
int bst_group,
|
int bst_group,
|
||||||
unsigned root_index,
|
unsigned root_index,
|
||||||
tree::RegTree::FVec *p_feats,
|
tree::RegTree::FVec *p_feats,
|
||||||
float *out_pred, size_t stride) {
|
float *out_pred, size_t stride, unsigned ntree_limit) {
|
||||||
size_t itop = 0;
|
size_t itop = 0;
|
||||||
float psum = 0.0f;
|
float psum = 0.0f;
|
||||||
// sum of leaf vector
|
// sum of leaf vector
|
||||||
std::vector<float> vec_psum(mparam.size_leaf_vector, 0.0f);
|
std::vector<float> vec_psum(mparam.size_leaf_vector, 0.0f);
|
||||||
const int64_t bid = mparam.BufferOffset(buffer_index, bst_group);
|
const int64_t bid = mparam.BufferOffset(buffer_index, bst_group);
|
||||||
|
// number of valid trees
|
||||||
|
unsigned treeleft = ntree_limit == 0 ? std::numeric_limits<unsigned>::max() : ntree_limit;
|
||||||
// load buffered results if any
|
// load buffered results if any
|
||||||
if (bid >= 0) {
|
if (bid >= 0 && ntree_limit == 0) {
|
||||||
itop = pred_counter[bid];
|
itop = pred_counter[bid];
|
||||||
psum = pred_buffer[bid];
|
psum = pred_buffer[bid];
|
||||||
for (int i = 0; i < mparam.size_leaf_vector; ++i) {
|
for (int i = 0; i < mparam.size_leaf_vector; ++i) {
|
||||||
@ -235,12 +239,13 @@ class GBTree : public IGradBooster {
|
|||||||
for (int j = 0; j < mparam.size_leaf_vector; ++j) {
|
for (int j = 0; j < mparam.size_leaf_vector; ++j) {
|
||||||
vec_psum[j] += trees[i]->leafvec(tid)[j];
|
vec_psum[j] += trees[i]->leafvec(tid)[j];
|
||||||
}
|
}
|
||||||
|
if(--treeleft == 0) break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
p_feats->Drop(inst);
|
p_feats->Drop(inst);
|
||||||
}
|
}
|
||||||
// updated the buffered results
|
// updated the buffered results
|
||||||
if (bid >= 0) {
|
if (bid >= 0 && ntree_limit == 0) {
|
||||||
pred_counter[bid] = static_cast<unsigned>(trees.size());
|
pred_counter[bid] = static_cast<unsigned>(trees.size());
|
||||||
pred_buffer[bid] = psum;
|
pred_buffer[bid] = psum;
|
||||||
for (int i = 0; i < mparam.size_leaf_vector; ++i) {
|
for (int i = 0; i < mparam.size_leaf_vector; ++i) {
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
#define _CRT_SECURE_NO_WARNINGS
|
#define _CRT_SECURE_NO_WARNINGS
|
||||||
#define _CRT_SECURE_NO_DEPRECATE
|
#define _CRT_SECURE_NO_DEPRECATE
|
||||||
#include <string>
|
#include <string>
|
||||||
|
using namespace std;
|
||||||
#include "./io.h"
|
#include "./io.h"
|
||||||
#include "../utils/io.h"
|
#include "../utils/io.h"
|
||||||
#include "../utils/utils.h"
|
#include "../utils/utils.h"
|
||||||
|
|||||||
@ -54,8 +54,10 @@ class DMatrixSimple : public DataMatrix {
|
|||||||
for (size_t i = 0; i < batch.size; ++i) {
|
for (size_t i = 0; i < batch.size; ++i) {
|
||||||
RowBatch::Inst inst = batch[i];
|
RowBatch::Inst inst = batch[i];
|
||||||
row_data_.resize(row_data_.size() + inst.length);
|
row_data_.resize(row_data_.size() + inst.length);
|
||||||
memcpy(&row_data_[row_ptr_.back()], inst.data,
|
if (inst.length != 0) {
|
||||||
sizeof(RowBatch::Entry) * inst.length);
|
memcpy(&row_data_[row_ptr_.back()], inst.data,
|
||||||
|
sizeof(RowBatch::Entry) * inst.length);
|
||||||
|
}
|
||||||
row_ptr_.push_back(row_ptr_.back() + inst.length);
|
row_ptr_.push_back(row_ptr_.back() + inst.length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -104,10 +106,10 @@ class DMatrixSimple : public DataMatrix {
|
|||||||
this->AddRow(feats);
|
this->AddRow(feats);
|
||||||
|
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
printf("%lux%lu matrix with %lu entries is loaded from %s\n",
|
utils::Printf("%lux%lu matrix with %lu entries is loaded from %s\n",
|
||||||
static_cast<unsigned long>(info.num_row()),
|
static_cast<unsigned long>(info.num_row()),
|
||||||
static_cast<unsigned long>(info.num_col()),
|
static_cast<unsigned long>(info.num_col()),
|
||||||
static_cast<unsigned long>(row_data_.size()), fname);
|
static_cast<unsigned long>(row_data_.size()), fname);
|
||||||
}
|
}
|
||||||
fclose(file);
|
fclose(file);
|
||||||
// try to load in additional file
|
// try to load in additional file
|
||||||
@ -147,26 +149,26 @@ class DMatrixSimple : public DataMatrix {
|
|||||||
* \param fname file name, used to print message
|
* \param fname file name, used to print message
|
||||||
*/
|
*/
|
||||||
inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) {
|
inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) {
|
||||||
int magic;
|
int tmagic;
|
||||||
utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
|
utils::Check(fs.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format");
|
||||||
utils::Check(magic == kMagic, "invalid format,magic number mismatch");
|
utils::Check(tmagic == kMagic, "invalid format,magic number mismatch");
|
||||||
|
|
||||||
info.LoadBinary(fs);
|
info.LoadBinary(fs);
|
||||||
FMatrixS::LoadBinary(fs, &row_ptr_, &row_data_);
|
FMatrixS::LoadBinary(fs, &row_ptr_, &row_data_);
|
||||||
fmat_->LoadColAccess(fs);
|
fmat_->LoadColAccess(fs);
|
||||||
|
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
printf("%lux%lu matrix with %lu entries is loaded",
|
utils::Printf("%lux%lu matrix with %lu entries is loaded",
|
||||||
static_cast<unsigned long>(info.num_row()),
|
static_cast<unsigned long>(info.num_row()),
|
||||||
static_cast<unsigned long>(info.num_col()),
|
static_cast<unsigned long>(info.num_col()),
|
||||||
static_cast<unsigned long>(row_data_.size()));
|
static_cast<unsigned long>(row_data_.size()));
|
||||||
if (fname != NULL) {
|
if (fname != NULL) {
|
||||||
printf(" from %s\n", fname);
|
utils::Printf(" from %s\n", fname);
|
||||||
} else {
|
} else {
|
||||||
printf("\n");
|
utils::Printf("\n");
|
||||||
}
|
}
|
||||||
if (info.group_ptr.size() != 0) {
|
if (info.group_ptr.size() != 0) {
|
||||||
printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1);
|
utils::Printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -177,8 +179,8 @@ class DMatrixSimple : public DataMatrix {
|
|||||||
*/
|
*/
|
||||||
inline void SaveBinary(const char* fname, bool silent = false) const {
|
inline void SaveBinary(const char* fname, bool silent = false) const {
|
||||||
utils::FileStream fs(utils::FopenCheck(fname, "wb"));
|
utils::FileStream fs(utils::FopenCheck(fname, "wb"));
|
||||||
int magic = kMagic;
|
int tmagic = kMagic;
|
||||||
fs.Write(&magic, sizeof(magic));
|
fs.Write(&tmagic, sizeof(tmagic));
|
||||||
|
|
||||||
info.SaveBinary(fs);
|
info.SaveBinary(fs);
|
||||||
FMatrixS::SaveBinary(fs, row_ptr_, row_data_);
|
FMatrixS::SaveBinary(fs, row_ptr_, row_data_);
|
||||||
@ -186,13 +188,13 @@ class DMatrixSimple : public DataMatrix {
|
|||||||
fs.Close();
|
fs.Close();
|
||||||
|
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
printf("%lux%lu matrix with %lu entries is saved to %s\n",
|
utils::Printf("%lux%lu matrix with %lu entries is saved to %s\n",
|
||||||
static_cast<unsigned long>(info.num_row()),
|
static_cast<unsigned long>(info.num_row()),
|
||||||
static_cast<unsigned long>(info.num_col()),
|
static_cast<unsigned long>(info.num_col()),
|
||||||
static_cast<unsigned long>(row_data_.size()), fname);
|
static_cast<unsigned long>(row_data_.size()), fname);
|
||||||
if (info.group_ptr.size() != 0) {
|
if (info.group_ptr.size() != 0) {
|
||||||
printf("data contains %u groups\n",
|
utils::Printf("data contains %u groups\n",
|
||||||
static_cast<unsigned>(info.group_ptr.size()-1));
|
static_cast<unsigned>(info.group_ptr.size()-1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -244,8 +246,8 @@ class DMatrixSimple : public DataMatrix {
|
|||||||
at_first_ = false;
|
at_first_ = false;
|
||||||
batch_.size = parent_->row_ptr_.size() - 1;
|
batch_.size = parent_->row_ptr_.size() - 1;
|
||||||
batch_.base_rowid = 0;
|
batch_.base_rowid = 0;
|
||||||
batch_.ind_ptr = &parent_->row_ptr_[0];
|
batch_.ind_ptr = BeginPtr(parent_->row_ptr_);
|
||||||
batch_.data_ptr = &parent_->row_data_[0];
|
batch_.data_ptr = BeginPtr(parent_->row_data_);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
virtual const RowBatch &Value(void) const {
|
virtual const RowBatch &Value(void) const {
|
||||||
|
|||||||
@ -110,9 +110,9 @@ class FMatrixS : public IFMatrix{
|
|||||||
const std::vector<RowBatch::Entry> &data) {
|
const std::vector<RowBatch::Entry> &data) {
|
||||||
size_t nrow = ptr.size() - 1;
|
size_t nrow = ptr.size() - 1;
|
||||||
fo.Write(&nrow, sizeof(size_t));
|
fo.Write(&nrow, sizeof(size_t));
|
||||||
fo.Write(&ptr[0], ptr.size() * sizeof(size_t));
|
fo.Write(BeginPtr(ptr), ptr.size() * sizeof(size_t));
|
||||||
if (data.size() != 0) {
|
if (data.size() != 0) {
|
||||||
fo.Write(&data[0], data.size() * sizeof(RowBatch::Entry));
|
fo.Write(BeginPtr(data), data.size() * sizeof(RowBatch::Entry));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
@ -127,11 +127,11 @@ class FMatrixS : public IFMatrix{
|
|||||||
size_t nrow;
|
size_t nrow;
|
||||||
utils::Check(fi.Read(&nrow, sizeof(size_t)) != 0, "invalid input file format");
|
utils::Check(fi.Read(&nrow, sizeof(size_t)) != 0, "invalid input file format");
|
||||||
out_ptr->resize(nrow + 1);
|
out_ptr->resize(nrow + 1);
|
||||||
utils::Check(fi.Read(&(*out_ptr)[0], out_ptr->size() * sizeof(size_t)) != 0,
|
utils::Check(fi.Read(BeginPtr(*out_ptr), out_ptr->size() * sizeof(size_t)) != 0,
|
||||||
"invalid input file format");
|
"invalid input file format");
|
||||||
out_data->resize(out_ptr->back());
|
out_data->resize(out_ptr->back());
|
||||||
if (out_data->size() != 0) {
|
if (out_data->size() != 0) {
|
||||||
utils::Assert(fi.Read(&(*out_data)[0], out_data->size() * sizeof(RowBatch::Entry)) != 0,
|
utils::Assert(fi.Read(BeginPtr(*out_data), out_data->size() * sizeof(RowBatch::Entry)) != 0,
|
||||||
"invalid input file format");
|
"invalid input file format");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -213,8 +213,8 @@ class FMatrixS : public IFMatrix{
|
|||||||
col_data_[i] = SparseBatch::Inst(&data[0] + ptr[ridx],
|
col_data_[i] = SparseBatch::Inst(&data[0] + ptr[ridx],
|
||||||
static_cast<bst_uint>(ptr[ridx+1] - ptr[ridx]));
|
static_cast<bst_uint>(ptr[ridx+1] - ptr[ridx]));
|
||||||
}
|
}
|
||||||
batch_.col_index = &col_index_[0];
|
batch_.col_index = BeginPtr(col_index_);
|
||||||
batch_.col_data = &col_data_[0];
|
batch_.col_data = BeginPtr(col_data_);
|
||||||
this->BeforeFirst();
|
this->BeforeFirst();
|
||||||
}
|
}
|
||||||
// data content
|
// data content
|
||||||
|
|||||||
@ -98,8 +98,8 @@ struct MetaInfo {
|
|||||||
group_ptr.push_back(group_ptr.back()+nline);
|
group_ptr.push_back(group_ptr.back()+nline);
|
||||||
}
|
}
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
printf("%u groups are loaded from %s\n",
|
utils::Printf("%u groups are loaded from %s\n",
|
||||||
static_cast<unsigned>(group_ptr.size()-1), fname);
|
static_cast<unsigned>(group_ptr.size()-1), fname);
|
||||||
}
|
}
|
||||||
fclose(fi);
|
fclose(fi);
|
||||||
return true;
|
return true;
|
||||||
@ -125,15 +125,15 @@ struct MetaInfo {
|
|||||||
}
|
}
|
||||||
// try to load weight information from file, if exists
|
// try to load weight information from file, if exists
|
||||||
inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
|
inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
|
||||||
std::vector<float> &weights = this->GetFloatInfo(field);
|
std::vector<float> &data = this->GetFloatInfo(field);
|
||||||
FILE *fi = fopen64(fname, "r");
|
FILE *fi = fopen64(fname, "r");
|
||||||
if (fi == NULL) return false;
|
if (fi == NULL) return false;
|
||||||
float wt;
|
float wt;
|
||||||
while (fscanf(fi, "%f", &wt) == 1) {
|
while (fscanf(fi, "%f", &wt) == 1) {
|
||||||
weights.push_back(wt);
|
data.push_back(wt);
|
||||||
}
|
}
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
printf("loading %s from %s\n", field, fname);
|
utils::Printf("loading %s from %s\n", field, fname);
|
||||||
}
|
}
|
||||||
fclose(fi);
|
fclose(fi);
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@ -8,8 +8,8 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <climits>
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <climits>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "./evaluation.h"
|
#include "./evaluation.h"
|
||||||
#include "./helper_utils.h"
|
#include "./helper_utils.h"
|
||||||
@ -183,7 +183,7 @@ struct EvalAMS : public IEvaluator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ntop == ndata) {
|
if (ntop == ndata) {
|
||||||
fprintf(stderr, "\tams-ratio=%g", static_cast<float>(thresindex) / ndata);
|
utils::Printf("\tams-ratio=%g", static_cast<float>(thresindex) / ndata);
|
||||||
return static_cast<float>(tams);
|
return static_cast<float>(tams);
|
||||||
} else {
|
} else {
|
||||||
return static_cast<float>(sqrt(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp)));
|
return static_cast<float>(sqrt(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp)));
|
||||||
|
|||||||
@ -73,7 +73,7 @@ class EvalSet{
|
|||||||
for (size_t i = 0; i < evals_.size(); ++i) {
|
for (size_t i = 0; i < evals_.size(); ++i) {
|
||||||
float res = evals_[i]->Eval(preds, info);
|
float res = evals_[i]->Eval(preds, info);
|
||||||
char tmp[1024];
|
char tmp[1024];
|
||||||
snprintf(tmp, sizeof(tmp), "\t%s-%s:%f", evname, evals_[i]->Name(), res);
|
utils::SPrintf(tmp, sizeof(tmp), "\t%s-%s:%f", evname, evals_[i]->Name(), res);
|
||||||
result += tmp;
|
result += tmp;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
|
|||||||
@ -7,6 +7,7 @@
|
|||||||
*/
|
*/
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <cmath>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace learner {
|
namespace learner {
|
||||||
|
|||||||
@ -63,14 +63,14 @@ class BoostLearner {
|
|||||||
}
|
}
|
||||||
char str_temp[25];
|
char str_temp[25];
|
||||||
if (num_feature > mparam.num_feature) {
|
if (num_feature > mparam.num_feature) {
|
||||||
snprintf(str_temp, sizeof(str_temp), "%u", num_feature);
|
utils::SPrintf(str_temp, sizeof(str_temp), "%u", num_feature);
|
||||||
this->SetParam("bst:num_feature", str_temp);
|
this->SetParam("bst:num_feature", str_temp);
|
||||||
}
|
}
|
||||||
snprintf(str_temp, sizeof(str_temp), "%lu",
|
utils::SPrintf(str_temp, sizeof(str_temp), "%lu",
|
||||||
static_cast<unsigned long>(buffer_size));
|
static_cast<unsigned long>(buffer_size));
|
||||||
this->SetParam("num_pbuffer", str_temp);
|
this->SetParam("num_pbuffer", str_temp);
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
printf("buffer_size=%ld\n", static_cast<long>(buffer_size));
|
utils::Printf("buffer_size=%ld\n", static_cast<long>(buffer_size));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
@ -183,7 +183,7 @@ class BoostLearner {
|
|||||||
const std::vector<std::string> &evname) {
|
const std::vector<std::string> &evname) {
|
||||||
std::string res;
|
std::string res;
|
||||||
char tmp[256];
|
char tmp[256];
|
||||||
snprintf(tmp, sizeof(tmp), "[%d]", iter);
|
utils::SPrintf(tmp, sizeof(tmp), "[%d]", iter);
|
||||||
res = tmp;
|
res = tmp;
|
||||||
for (size_t i = 0; i < evals.size(); ++i) {
|
for (size_t i = 0; i < evals.size(); ++i) {
|
||||||
this->PredictRaw(*evals[i], &preds_);
|
this->PredictRaw(*evals[i], &preds_);
|
||||||
@ -212,11 +212,14 @@ class BoostLearner {
|
|||||||
* \param data input data
|
* \param data input data
|
||||||
* \param output_margin whether to only predict margin value instead of transformed prediction
|
* \param output_margin whether to only predict margin value instead of transformed prediction
|
||||||
* \param out_preds output vector that stores the prediction
|
* \param out_preds output vector that stores the prediction
|
||||||
|
* \param ntree_limit limit number of trees used for boosted tree
|
||||||
|
* predictor, when it equals 0, this means we are using all the trees
|
||||||
*/
|
*/
|
||||||
inline void Predict(const DMatrix &data,
|
inline void Predict(const DMatrix &data,
|
||||||
bool output_margin,
|
bool output_margin,
|
||||||
std::vector<float> *out_preds) const {
|
std::vector<float> *out_preds,
|
||||||
this->PredictRaw(data, out_preds);
|
unsigned ntree_limit = 0) const {
|
||||||
|
this->PredictRaw(data, out_preds, ntree_limit);
|
||||||
if (!output_margin) {
|
if (!output_margin) {
|
||||||
obj_->PredTransform(out_preds);
|
obj_->PredTransform(out_preds);
|
||||||
}
|
}
|
||||||
@ -246,11 +249,14 @@ class BoostLearner {
|
|||||||
* \brief get un-transformed prediction
|
* \brief get un-transformed prediction
|
||||||
* \param data training data matrix
|
* \param data training data matrix
|
||||||
* \param out_preds output vector that stores the prediction
|
* \param out_preds output vector that stores the prediction
|
||||||
|
* \param ntree_limit limit number of trees used for boosted tree
|
||||||
|
* predictor, when it equals 0, this means we are using all the trees
|
||||||
*/
|
*/
|
||||||
inline void PredictRaw(const DMatrix &data,
|
inline void PredictRaw(const DMatrix &data,
|
||||||
std::vector<float> *out_preds) const {
|
std::vector<float> *out_preds,
|
||||||
|
unsigned ntree_limit = 0) const {
|
||||||
gbm_->Predict(data.fmat(), this->FindBufferOffset(data),
|
gbm_->Predict(data.fmat(), this->FindBufferOffset(data),
|
||||||
data.info.info, out_preds);
|
data.info.info, out_preds, ntree_limit);
|
||||||
// add base margin
|
// add base margin
|
||||||
std::vector<float> &preds = *out_preds;
|
std::vector<float> &preds = *out_preds;
|
||||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
|
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
|
||||||
|
|||||||
@ -6,9 +6,9 @@
|
|||||||
* \author Tianqi Chen, Kailong Chen
|
* \author Tianqi Chen, Kailong Chen
|
||||||
*/
|
*/
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <cmath>
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
#include <cmath>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include "../data.h"
|
#include "../data.h"
|
||||||
#include "./objective.h"
|
#include "./objective.h"
|
||||||
@ -37,7 +37,7 @@ struct LossType {
|
|||||||
case kLogisticRaw:
|
case kLogisticRaw:
|
||||||
case kLinearSquare: return x;
|
case kLinearSquare: return x;
|
||||||
case kLogisticClassify:
|
case kLogisticClassify:
|
||||||
case kLogisticNeglik: return 1.0f / (1.0f + expf(-x));
|
case kLogisticNeglik: return 1.0f / (1.0f + std::exp(-x));
|
||||||
default: utils::Error("unknown loss_type"); return 0.0f;
|
default: utils::Error("unknown loss_type"); return 0.0f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -50,7 +50,7 @@ struct LossType {
|
|||||||
inline float FirstOrderGradient(float predt, float label) const {
|
inline float FirstOrderGradient(float predt, float label) const {
|
||||||
switch (loss_type) {
|
switch (loss_type) {
|
||||||
case kLinearSquare: return predt - label;
|
case kLinearSquare: return predt - label;
|
||||||
case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
|
case kLogisticRaw: predt = 1.0f / (1.0f + std::exp(-predt));
|
||||||
case kLogisticClassify:
|
case kLogisticClassify:
|
||||||
case kLogisticNeglik: return predt - label;
|
case kLogisticNeglik: return predt - label;
|
||||||
default: utils::Error("unknown loss_type"); return 0.0f;
|
default: utils::Error("unknown loss_type"); return 0.0f;
|
||||||
@ -65,7 +65,7 @@ struct LossType {
|
|||||||
inline float SecondOrderGradient(float predt, float label) const {
|
inline float SecondOrderGradient(float predt, float label) const {
|
||||||
switch (loss_type) {
|
switch (loss_type) {
|
||||||
case kLinearSquare: return 1.0f;
|
case kLinearSquare: return 1.0f;
|
||||||
case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
|
case kLogisticRaw: predt = 1.0f / (1.0f + std::exp(-predt));
|
||||||
case kLogisticClassify:
|
case kLogisticClassify:
|
||||||
case kLogisticNeglik: return predt * (1 - predt);
|
case kLogisticNeglik: return predt * (1 - predt);
|
||||||
default: utils::Error("unknown loss_type"); return 0.0f;
|
default: utils::Error("unknown loss_type"); return 0.0f;
|
||||||
@ -80,7 +80,7 @@ struct LossType {
|
|||||||
loss_type == kLogisticNeglik ) {
|
loss_type == kLogisticNeglik ) {
|
||||||
utils::Check(base_score > 0.0f && base_score < 1.0f,
|
utils::Check(base_score > 0.0f && base_score < 1.0f,
|
||||||
"base_score must be in (0,1) for logistic loss");
|
"base_score must be in (0,1) for logistic loss");
|
||||||
base_score = -logf(1.0f / base_score - 1.0f);
|
base_score = -std::log(1.0f / base_score - 1.0f);
|
||||||
}
|
}
|
||||||
return base_score;
|
return base_score;
|
||||||
}
|
}
|
||||||
@ -419,8 +419,8 @@ class LambdaRankObjNDCG : public LambdaRankObj {
|
|||||||
for (size_t i = 0; i < pairs.size(); ++i) {
|
for (size_t i = 0; i < pairs.size(); ++i) {
|
||||||
unsigned pos_idx = pairs[i].pos_index;
|
unsigned pos_idx = pairs[i].pos_index;
|
||||||
unsigned neg_idx = pairs[i].neg_index;
|
unsigned neg_idx = pairs[i].neg_index;
|
||||||
float pos_loginv = 1.0f / logf(pos_idx + 2.0f);
|
float pos_loginv = 1.0f / std::log(pos_idx + 2.0f);
|
||||||
float neg_loginv = 1.0f / logf(neg_idx + 2.0f);
|
float neg_loginv = 1.0f / std::log(neg_idx + 2.0f);
|
||||||
int pos_label = static_cast<int>(sorted_list[pos_idx].label);
|
int pos_label = static_cast<int>(sorted_list[pos_idx].label);
|
||||||
int neg_label = static_cast<int>(sorted_list[neg_idx].label);
|
int neg_label = static_cast<int>(sorted_list[neg_idx].label);
|
||||||
float original =
|
float original =
|
||||||
@ -438,7 +438,7 @@ class LambdaRankObjNDCG : public LambdaRankObj {
|
|||||||
for (size_t i = 0; i < labels.size(); ++i) {
|
for (size_t i = 0; i < labels.size(); ++i) {
|
||||||
const unsigned rel = static_cast<unsigned>(labels[i]);
|
const unsigned rel = static_cast<unsigned>(labels[i]);
|
||||||
if (rel != 0) {
|
if (rel != 0) {
|
||||||
sumdcg += ((1 << rel) - 1) / logf(static_cast<float>(i + 2));
|
sumdcg += ((1 << rel) - 1) / std::log(static_cast<float>(i + 2));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return static_cast<float>(sumdcg);
|
return static_cast<float>(sumdcg);
|
||||||
|
|||||||
@ -302,11 +302,11 @@ struct SplitEntry{
|
|||||||
* \param loss_chg the loss reduction get through the split
|
* \param loss_chg the loss reduction get through the split
|
||||||
* \param split_index the feature index where the split is on
|
* \param split_index the feature index where the split is on
|
||||||
*/
|
*/
|
||||||
inline bool NeedReplace(bst_float loss_chg, unsigned split_index) const {
|
inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {
|
||||||
if (this->split_index() <= split_index) {
|
if (this->split_index() <= split_index) {
|
||||||
return loss_chg > this->loss_chg;
|
return new_loss_chg > this->loss_chg;
|
||||||
} else {
|
} else {
|
||||||
return !(this->loss_chg > loss_chg);
|
return !(this->loss_chg > new_loss_chg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
@ -332,13 +332,13 @@ struct SplitEntry{
|
|||||||
* \param default_left whether the missing value goes to left
|
* \param default_left whether the missing value goes to left
|
||||||
* \return whether the proposed split is better and can replace current split
|
* \return whether the proposed split is better and can replace current split
|
||||||
*/
|
*/
|
||||||
inline bool Update(bst_float loss_chg, unsigned split_index,
|
inline bool Update(bst_float new_loss_chg, unsigned split_index,
|
||||||
float split_value, bool default_left) {
|
float new_split_value, bool default_left) {
|
||||||
if (this->NeedReplace(loss_chg, split_index)) {
|
if (this->NeedReplace(new_loss_chg, split_index)) {
|
||||||
this->loss_chg = loss_chg;
|
this->loss_chg = new_loss_chg;
|
||||||
if (default_left) split_index |= (1U << 31);
|
if (default_left) split_index |= (1U << 31);
|
||||||
this->sindex = split_index;
|
this->sindex = split_index;
|
||||||
this->split_value = split_value;
|
this->split_value = new_split_value;
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
#define _CRT_SECURE_NO_WARNINGS
|
#define _CRT_SECURE_NO_WARNINGS
|
||||||
#define _CRT_SECURE_NO_DEPRECATE
|
#define _CRT_SECURE_NO_DEPRECATE
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
using namespace std;
|
||||||
#include "./updater.h"
|
#include "./updater.h"
|
||||||
#include "./updater_prune-inl.hpp"
|
#include "./updater_prune-inl.hpp"
|
||||||
#include "./updater_refresh-inl.hpp"
|
#include "./updater_refresh-inl.hpp"
|
||||||
|
|||||||
@ -63,8 +63,8 @@ class TreePruner: public IUpdater {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (silent == 0) {
|
if (silent == 0) {
|
||||||
printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n",
|
utils::Printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n",
|
||||||
tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth());
|
tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -26,14 +26,14 @@ class TreeRefresher: public IUpdater {
|
|||||||
virtual void Update(const std::vector<bst_gpair> &gpair,
|
virtual void Update(const std::vector<bst_gpair> &gpair,
|
||||||
IFMatrix *p_fmat,
|
IFMatrix *p_fmat,
|
||||||
const BoosterInfo &info,
|
const BoosterInfo &info,
|
||||||
const std::vector<RegTree*> &trees) {
|
const std::vector<RegTree*> &trees) {
|
||||||
if (trees.size() == 0) return;
|
if (trees.size() == 0) return;
|
||||||
// number of threads
|
// number of threads
|
||||||
int nthread;
|
|
||||||
// thread temporal space
|
// thread temporal space
|
||||||
std::vector< std::vector<TStats> > stemp;
|
std::vector< std::vector<TStats> > stemp;
|
||||||
std::vector<RegTree::FVec> fvec_temp;
|
std::vector<RegTree::FVec> fvec_temp;
|
||||||
// setup temp space for each thread
|
// setup temp space for each thread
|
||||||
|
int nthread;
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
{
|
{
|
||||||
nthread = omp_get_num_threads();
|
nthread = omp_get_num_threads();
|
||||||
@ -127,8 +127,6 @@ class TreeRefresher: public IUpdater {
|
|||||||
this->Refresh(gstats, tree[nid].cright(), p_tree);
|
this->Refresh(gstats, tree[nid].cright(), p_tree);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// number of thread in the data
|
|
||||||
int nthread;
|
|
||||||
// training parameter
|
// training parameter
|
||||||
TrainParam param;
|
TrainParam param;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -100,12 +100,10 @@ class ISeekStream: public IStream {
|
|||||||
/*! \brief implementation of file i/o stream */
|
/*! \brief implementation of file i/o stream */
|
||||||
class FileStream : public ISeekStream {
|
class FileStream : public ISeekStream {
|
||||||
public:
|
public:
|
||||||
|
explicit FileStream(FILE *fp) : fp(fp) {}
|
||||||
explicit FileStream(void) {
|
explicit FileStream(void) {
|
||||||
this->fp = NULL;
|
this->fp = NULL;
|
||||||
}
|
}
|
||||||
explicit FileStream(FILE *fp) {
|
|
||||||
this->fp = fp;
|
|
||||||
}
|
|
||||||
virtual size_t Read(void *ptr, size_t size) {
|
virtual size_t Read(void *ptr, size_t size) {
|
||||||
return fread(ptr, size, 1, fp);
|
return fread(ptr, size, 1, fp);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -163,7 +163,7 @@ struct SparseCSRFileBuilder {
|
|||||||
fo->Write(rptr);
|
fo->Write(rptr);
|
||||||
// setup buffer space
|
// setup buffer space
|
||||||
buffer_rptr.resize(rptr.size());
|
buffer_rptr.resize(rptr.size());
|
||||||
buffer.reserve(buffer_size);
|
buffer_temp.reserve(buffer_size);
|
||||||
buffer_data.resize(buffer_size);
|
buffer_data.resize(buffer_size);
|
||||||
saved_offset.clear();
|
saved_offset.clear();
|
||||||
saved_offset.resize(rptr.size() - 1, 0);
|
saved_offset.resize(rptr.size() - 1, 0);
|
||||||
|
|||||||
@ -9,13 +9,8 @@
|
|||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#else
|
#else
|
||||||
#ifndef DISABLE_OPENMP
|
#ifndef DISABLE_OPENMP
|
||||||
#ifndef _MSC_VER
|
// use pragma message instead of warning
|
||||||
#warning "OpenMP is not available, compile to single thread code."\
|
#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single-thread code. Use OpenMP-enabled compiler to get benefit of multi-threading")
|
||||||
"You may want to ungrade your compiler to enable OpenMP support,"\
|
|
||||||
"to get benefit of multi-threading."
|
|
||||||
#else
|
|
||||||
// TODO add warning for msvc
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
inline int omp_get_thread_num() { return 0; }
|
inline int omp_get_thread_num() { return 0; }
|
||||||
inline int omp_get_num_threads() { return 1; }
|
inline int omp_get_num_threads() { return 1; }
|
||||||
|
|||||||
@ -16,30 +16,21 @@
|
|||||||
/*! namespace of PRNG */
|
/*! namespace of PRNG */
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace random {
|
namespace random {
|
||||||
|
#ifndef XGBOOST_CUSTOMIZE_PRNG_
|
||||||
/*! \brief seed the PRNG */
|
/*! \brief seed the PRNG */
|
||||||
inline void Seed(uint32_t seed) {
|
inline void Seed(unsigned seed) {
|
||||||
srand(seed);
|
srand(seed);
|
||||||
}
|
}
|
||||||
/*! \brief return a real number uniform in [0,1) */
|
/*! \brief basic function, uniform */
|
||||||
inline double NextDouble(void) {
|
inline double Uniform(void) {
|
||||||
return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0);
|
return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0);
|
||||||
}
|
}
|
||||||
/*! \brief return a real numer uniform in (0,1) */
|
/*! \brief return a real numer uniform in (0,1) */
|
||||||
inline double NextDouble2(void) {
|
inline double NextDouble2(void) {
|
||||||
return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0);
|
return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*! \brief return a random number */
|
|
||||||
inline uint32_t NextUInt32(void) {
|
|
||||||
return (uint32_t)rand();
|
|
||||||
}
|
|
||||||
/*! \brief return a random number in n */
|
|
||||||
inline uint32_t NextUInt32(uint32_t n) {
|
|
||||||
return (uint32_t)floor(NextDouble() * n);
|
|
||||||
}
|
|
||||||
/*! \brief return x~N(0,1) */
|
/*! \brief return x~N(0,1) */
|
||||||
inline double SampleNormal() {
|
inline double Normal(void) {
|
||||||
double x, y, s;
|
double x, y, s;
|
||||||
do {
|
do {
|
||||||
x = 2 * NextDouble2() - 1.0;
|
x = 2 * NextDouble2() - 1.0;
|
||||||
@ -49,22 +40,24 @@ inline double SampleNormal() {
|
|||||||
|
|
||||||
return x * sqrt(-2.0 * log(s) / s);
|
return x * sqrt(-2.0 * log(s) / s);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
// include declarations, to be implemented
|
||||||
|
void Seed(unsigned seed);
|
||||||
|
double Uniform(void);
|
||||||
|
double Normal(void);
|
||||||
|
#endif
|
||||||
|
|
||||||
/*! \brief return iid x,y ~N(0,1) */
|
/*! \brief return a real number uniform in [0,1) */
|
||||||
inline void SampleNormal2D(double &xx, double &yy) {
|
inline double NextDouble(void) {
|
||||||
double x, y, s;
|
return Uniform();
|
||||||
do {
|
}
|
||||||
x = 2 * NextDouble2() - 1.0;
|
/*! \brief return a random number in n */
|
||||||
y = 2 * NextDouble2() - 1.0;
|
inline uint32_t NextUInt32(uint32_t n) {
|
||||||
s = x*x + y*y;
|
return (uint32_t)floor(NextDouble() * n);
|
||||||
} while (s >= 1.0 || s == 0.0);
|
|
||||||
double t = sqrt(-2.0 * log(s) / s);
|
|
||||||
xx = x * t;
|
|
||||||
yy = y * t;
|
|
||||||
}
|
}
|
||||||
/*! \brief return x~N(mu,sigma^2) */
|
/*! \brief return x~N(mu,sigma^2) */
|
||||||
inline double SampleNormal(double mu, double sigma) {
|
inline double SampleNormal(double mu, double sigma) {
|
||||||
return SampleNormal() * sigma + mu;
|
return Normal() * sigma + mu;
|
||||||
}
|
}
|
||||||
/*! \brief return 1 with probability p, coin flip */
|
/*! \brief return 1 with probability p, coin flip */
|
||||||
inline int SampleBinary(double p) {
|
inline int SampleBinary(double p) {
|
||||||
@ -90,7 +83,7 @@ struct Random{
|
|||||||
inline void Seed(unsigned sd) {
|
inline void Seed(unsigned sd) {
|
||||||
this->rseed = sd;
|
this->rseed = sd;
|
||||||
#if defined(_MSC_VER)||defined(_WIN32)
|
#if defined(_MSC_VER)||defined(_WIN32)
|
||||||
srand(rseed);
|
::xgboost::random::Seed(sd);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
/*! \brief return a real number uniform in [0,1) */
|
/*! \brief return a real number uniform in [0,1) */
|
||||||
@ -98,8 +91,8 @@ struct Random{
|
|||||||
// use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe
|
// use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe
|
||||||
// For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general
|
// For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general
|
||||||
// todo, replace with another PRNG
|
// todo, replace with another PRNG
|
||||||
#if defined(_MSC_VER)||defined(_WIN32)
|
#if defined(_MSC_VER)||defined(_WIN32)||defined(XGBOOST_STRICT_CXX98_)
|
||||||
return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX) + 1.0);
|
return Uniform();
|
||||||
#else
|
#else
|
||||||
return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0);
|
return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -7,11 +7,18 @@
|
|||||||
*/
|
*/
|
||||||
#define _CRT_SECURE_NO_WARNINGS
|
#define _CRT_SECURE_NO_WARNINGS
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdarg>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#ifndef XGBOOST_STRICT_CXX98_
|
||||||
|
#include <cstdarg>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__GNUC__)
|
||||||
|
#define fopen64 std::fopen
|
||||||
|
#endif
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#define fopen64 fopen
|
|
||||||
// NOTE: sprintf_s is not equivalent to snprintf,
|
// NOTE: sprintf_s is not equivalent to snprintf,
|
||||||
// they are equivalent when success, which is sufficient for our case
|
// they are equivalent when success, which is sufficient for our case
|
||||||
#define snprintf sprintf_s
|
#define snprintf sprintf_s
|
||||||
@ -19,16 +26,15 @@
|
|||||||
#else
|
#else
|
||||||
#ifdef _FILE_OFFSET_BITS
|
#ifdef _FILE_OFFSET_BITS
|
||||||
#if _FILE_OFFSET_BITS == 32
|
#if _FILE_OFFSET_BITS == 32
|
||||||
#warning "FILE OFFSET BITS defined to be 32 bit"
|
#pragma message ("Warning: FILE OFFSET BITS defined to be 32 bit")
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
#define off64_t off_t
|
#define off64_t off_t
|
||||||
#define fopen64 fopen
|
#define fopen64 std::fopen
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define _FILE_OFFSET_BITS 64
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
}
|
}
|
||||||
@ -47,10 +53,11 @@ typedef long int64_t;
|
|||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
/*! \brief namespace for helper utils of the project */
|
/*! \brief namespace for helper utils of the project */
|
||||||
namespace utils {
|
namespace utils {
|
||||||
/*! \brief error message buffer length */
|
|
||||||
const int kErrorBuffer = 1 << 12;
|
|
||||||
|
|
||||||
#ifndef XGBOOST_CUSTOMIZE_ERROR_
|
/*! \brief error message buffer length */
|
||||||
|
const int kPrintBuffer = 1 << 12;
|
||||||
|
|
||||||
|
#ifndef XGBOOST_CUSTOMIZE_MSG_
|
||||||
/*!
|
/*!
|
||||||
* \brief handling of Assert error, caused by in-apropriate input
|
* \brief handling of Assert error, caused by in-apropriate input
|
||||||
* \param msg error message
|
* \param msg error message
|
||||||
@ -67,19 +74,50 @@ inline void HandleCheckError(const char *msg) {
|
|||||||
fprintf(stderr, "%s\n", msg);
|
fprintf(stderr, "%s\n", msg);
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
inline void HandlePrint(const char *msg) {
|
||||||
|
printf("%s", msg);
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
|
#ifndef XGBOOST_STRICT_CXX98_
|
||||||
// include declarations, some one must implement this
|
// include declarations, some one must implement this
|
||||||
void HandleAssertError(const char *msg);
|
void HandleAssertError(const char *msg);
|
||||||
void HandleCheckError(const char *msg);
|
void HandleCheckError(const char *msg);
|
||||||
|
void HandlePrint(const char *msg);
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
#ifdef XGBOOST_STRICT_CXX98_
|
||||||
|
// these function pointers are to be assigned
|
||||||
|
extern "C" void (*Printf)(const char *fmt, ...);
|
||||||
|
extern "C" int (*SPrintf)(char *buf, size_t size, const char *fmt, ...);
|
||||||
|
extern "C" void (*Assert)(int exp, const char *fmt, ...);
|
||||||
|
extern "C" void (*Check)(int exp, const char *fmt, ...);
|
||||||
|
extern "C" void (*Error)(const char *fmt, ...);
|
||||||
|
#else
|
||||||
|
/*! \brief printf, print message to the console */
|
||||||
|
inline void Printf(const char *fmt, ...) {
|
||||||
|
std::string msg(kPrintBuffer, '\0');
|
||||||
|
va_list args;
|
||||||
|
va_start(args, fmt);
|
||||||
|
vsnprintf(&msg[0], kPrintBuffer, fmt, args);
|
||||||
|
va_end(args);
|
||||||
|
HandlePrint(msg.c_str());
|
||||||
|
}
|
||||||
|
/*! \brief portable version of snprintf */
|
||||||
|
inline int SPrintf(char *buf, size_t size, const char *fmt, ...) {
|
||||||
|
va_list args;
|
||||||
|
va_start(args, fmt);
|
||||||
|
int ret = vsnprintf(buf, size, fmt, args);
|
||||||
|
va_end(args);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*! \brief assert an condition is true, use this to handle debug information */
|
/*! \brief assert an condition is true, use this to handle debug information */
|
||||||
inline void Assert(bool exp, const char *fmt, ...) {
|
inline void Assert(bool exp, const char *fmt, ...) {
|
||||||
if (!exp) {
|
if (!exp) {
|
||||||
std::string msg(kErrorBuffer, '\0');
|
std::string msg(kPrintBuffer, '\0');
|
||||||
va_list args;
|
va_list args;
|
||||||
va_start(args, fmt);
|
va_start(args, fmt);
|
||||||
vsnprintf(&msg[0], kErrorBuffer, fmt, args);
|
vsnprintf(&msg[0], kPrintBuffer, fmt, args);
|
||||||
va_end(args);
|
va_end(args);
|
||||||
HandleAssertError(msg.c_str());
|
HandleAssertError(msg.c_str());
|
||||||
}
|
}
|
||||||
@ -88,10 +126,10 @@ inline void Assert(bool exp, const char *fmt, ...) {
|
|||||||
/*!\brief same as assert, but this is intended to be used as message for user*/
|
/*!\brief same as assert, but this is intended to be used as message for user*/
|
||||||
inline void Check(bool exp, const char *fmt, ...) {
|
inline void Check(bool exp, const char *fmt, ...) {
|
||||||
if (!exp) {
|
if (!exp) {
|
||||||
std::string msg(kErrorBuffer, '\0');
|
std::string msg(kPrintBuffer, '\0');
|
||||||
va_list args;
|
va_list args;
|
||||||
va_start(args, fmt);
|
va_start(args, fmt);
|
||||||
vsnprintf(&msg[0], kErrorBuffer, fmt, args);
|
vsnprintf(&msg[0], kPrintBuffer, fmt, args);
|
||||||
va_end(args);
|
va_end(args);
|
||||||
HandleCheckError(msg.c_str());
|
HandleCheckError(msg.c_str());
|
||||||
}
|
}
|
||||||
@ -100,14 +138,15 @@ inline void Check(bool exp, const char *fmt, ...) {
|
|||||||
/*! \brief report error message, same as check */
|
/*! \brief report error message, same as check */
|
||||||
inline void Error(const char *fmt, ...) {
|
inline void Error(const char *fmt, ...) {
|
||||||
{
|
{
|
||||||
std::string msg(kErrorBuffer, '\0');
|
std::string msg(kPrintBuffer, '\0');
|
||||||
va_list args;
|
va_list args;
|
||||||
va_start(args, fmt);
|
va_start(args, fmt);
|
||||||
vsnprintf(&msg[0], kErrorBuffer, fmt, args);
|
vsnprintf(&msg[0], kPrintBuffer, fmt, args);
|
||||||
va_end(args);
|
va_end(args);
|
||||||
HandleCheckError(msg.c_str());
|
HandleCheckError(msg.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*! \brief replace fopen, report error when the file open fails */
|
/*! \brief replace fopen, report error when the file open fails */
|
||||||
inline FILE *FopenCheck(const char *fname, const char *flag) {
|
inline FILE *FopenCheck(const char *fname, const char *flag) {
|
||||||
@ -115,7 +154,25 @@ inline FILE *FopenCheck(const char *fname, const char *flag) {
|
|||||||
Check(fp != NULL, "can not open file \"%s\"\n", fname);
|
Check(fp != NULL, "can not open file \"%s\"\n", fname);
|
||||||
return fp;
|
return fp;
|
||||||
}
|
}
|
||||||
|
} // namespace utils
|
||||||
} // namespace utils
|
// easy utils that can be directly acessed in xgboost
|
||||||
|
/*! \brief get the beginning address of a vector */
|
||||||
|
template<typename T>
|
||||||
|
inline T *BeginPtr(std::vector<T> &vec) {
|
||||||
|
if (vec.size() == 0) {
|
||||||
|
return NULL;
|
||||||
|
} else {
|
||||||
|
return &vec[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*! \brief get the beginning address of a vector */
|
||||||
|
template<typename T>
|
||||||
|
inline const T *BeginPtr(const std::vector<T> &vec) {
|
||||||
|
if (vec.size() == 0) {
|
||||||
|
return NULL;
|
||||||
|
} else {
|
||||||
|
return &vec[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
#endif // XGBOOST_UTILS_UTILS_H_
|
#endif // XGBOOST_UTILS_UTILS_H_
|
||||||
|
|||||||
@ -50,6 +50,7 @@ class BoostLearnTask{
|
|||||||
if (!strcmp("use_buffer", name)) use_buffer = atoi(val);
|
if (!strcmp("use_buffer", name)) use_buffer = atoi(val);
|
||||||
if (!strcmp("num_round", name)) num_round = atoi(val);
|
if (!strcmp("num_round", name)) num_round = atoi(val);
|
||||||
if (!strcmp("pred_margin", name)) pred_margin = atoi(val);
|
if (!strcmp("pred_margin", name)) pred_margin = atoi(val);
|
||||||
|
if (!strcmp("ntree_limit", name)) ntree_limit = atoi(val);
|
||||||
if (!strcmp("save_period", name)) save_period = atoi(val);
|
if (!strcmp("save_period", name)) save_period = atoi(val);
|
||||||
if (!strcmp("eval_train", name)) eval_train = atoi(val);
|
if (!strcmp("eval_train", name)) eval_train = atoi(val);
|
||||||
if (!strcmp("task", name)) task = val;
|
if (!strcmp("task", name)) task = val;
|
||||||
@ -79,6 +80,7 @@ class BoostLearnTask{
|
|||||||
save_period = 0;
|
save_period = 0;
|
||||||
eval_train = 0;
|
eval_train = 0;
|
||||||
pred_margin = 0;
|
pred_margin = 0;
|
||||||
|
ntree_limit = 0;
|
||||||
dump_model_stats = 0;
|
dump_model_stats = 0;
|
||||||
task = "train";
|
task = "train";
|
||||||
model_in = "NULL";
|
model_in = "NULL";
|
||||||
@ -186,7 +188,7 @@ class BoostLearnTask{
|
|||||||
inline void TaskPred(void) {
|
inline void TaskPred(void) {
|
||||||
std::vector<float> preds;
|
std::vector<float> preds;
|
||||||
if (!silent) printf("start prediction...\n");
|
if (!silent) printf("start prediction...\n");
|
||||||
learner.Predict(*data, pred_margin != 0, &preds);
|
learner.Predict(*data, pred_margin != 0, &preds, ntree_limit);
|
||||||
if (!silent) printf("writing prediction to %s\n", name_pred.c_str());
|
if (!silent) printf("writing prediction to %s\n", name_pred.c_str());
|
||||||
FILE *fo = utils::FopenCheck(name_pred.c_str(), "w");
|
FILE *fo = utils::FopenCheck(name_pred.c_str(), "w");
|
||||||
for (size_t i = 0; i < preds.size(); i++) {
|
for (size_t i = 0; i < preds.size(); i++) {
|
||||||
@ -217,6 +219,8 @@ class BoostLearnTask{
|
|||||||
std::string task;
|
std::string task;
|
||||||
/*! \brief name of predict file */
|
/*! \brief name of predict file */
|
||||||
std::string name_pred;
|
std::string name_pred;
|
||||||
|
/*!\brief limit number of trees in prediction */
|
||||||
|
int ntree_limit;
|
||||||
/*!\brief whether to directly output margin value */
|
/*!\brief whether to directly output margin value */
|
||||||
int pred_margin;
|
int pred_margin;
|
||||||
/*! \brief whether dump statistics along with model */
|
/*! \brief whether dump statistics along with model */
|
||||||
|
|||||||
@ -365,7 +365,7 @@ class Booster:
|
|||||||
return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
|
return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
|
||||||
def eval(self, mat, name = 'eval', it = 0):
|
def eval(self, mat, name = 'eval', it = 0):
|
||||||
return self.eval_set( [(mat,name)], it)
|
return self.eval_set( [(mat,name)], it)
|
||||||
def predict(self, data, output_margin=False):
|
def predict(self, data, output_margin=False, ntree_limit=0):
|
||||||
"""
|
"""
|
||||||
predict with data
|
predict with data
|
||||||
Args:
|
Args:
|
||||||
@ -373,12 +373,14 @@ class Booster:
|
|||||||
the dmatrix storing the input
|
the dmatrix storing the input
|
||||||
output_margin: bool
|
output_margin: bool
|
||||||
whether output raw margin value that is untransformed
|
whether output raw margin value that is untransformed
|
||||||
|
|
||||||
|
ntree_limit: limit number of trees in prediction, default to 0, 0 means using all the trees
|
||||||
Returns:
|
Returns:
|
||||||
numpy array of prediction
|
numpy array of prediction
|
||||||
"""
|
"""
|
||||||
length = ctypes.c_ulong()
|
length = ctypes.c_ulong()
|
||||||
preds = xglib.XGBoosterPredict(self.handle, data.handle,
|
preds = xglib.XGBoosterPredict(self.handle, data.handle,
|
||||||
int(output_margin), ctypes.byref(length))
|
int(output_margin), ntree_limit, ctypes.byref(length))
|
||||||
return ctypes2numpy(preds, length.value, 'float32')
|
return ctypes2numpy(preds, length.value, 'float32')
|
||||||
def save_model(self, fname):
|
def save_model(self, fname):
|
||||||
""" save model to file
|
""" save model to file
|
||||||
|
|||||||
@ -6,10 +6,14 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
// include all std functions
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
#include "./xgboost_wrapper.h"
|
#include "./xgboost_wrapper.h"
|
||||||
#include "../src/data.h"
|
#include "../src/data.h"
|
||||||
#include "../src/learner/learner-inl.hpp"
|
#include "../src/learner/learner-inl.hpp"
|
||||||
#include "../src/io/io.h"
|
#include "../src/io/io.h"
|
||||||
|
#include "../src/utils/utils.h"
|
||||||
#include "../src/io/simple_dmatrix-inl.hpp"
|
#include "../src/io/simple_dmatrix-inl.hpp"
|
||||||
|
|
||||||
using namespace xgboost;
|
using namespace xgboost;
|
||||||
@ -25,11 +29,11 @@ class Booster: public learner::BoostLearner {
|
|||||||
this->init_model = false;
|
this->init_model = false;
|
||||||
this->SetCacheData(mats);
|
this->SetCacheData(mats);
|
||||||
}
|
}
|
||||||
const float *Pred(const DataMatrix &dmat, int output_margin, bst_ulong *len) {
|
inline const float *Pred(const DataMatrix &dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
|
||||||
this->CheckInitModel();
|
this->CheckInitModel();
|
||||||
this->Predict(dmat, output_margin != 0, &this->preds_);
|
this->Predict(dmat, output_margin != 0, &this->preds_, ntree_limit);
|
||||||
*len = static_cast<bst_ulong>(this->preds_.size());
|
*len = static_cast<bst_ulong>(this->preds_.size());
|
||||||
return &this->preds_[0];
|
return BeginPtr(this->preds_);
|
||||||
}
|
}
|
||||||
inline void BoostOneIter(const DataMatrix &train,
|
inline void BoostOneIter(const DataMatrix &train,
|
||||||
float *grad, float *hess, bst_ulong len) {
|
float *grad, float *hess, bst_ulong len) {
|
||||||
@ -57,7 +61,7 @@ class Booster: public learner::BoostLearner {
|
|||||||
model_dump_cptr[i] = model_dump[i].c_str();
|
model_dump_cptr[i] = model_dump[i].c_str();
|
||||||
}
|
}
|
||||||
*len = static_cast<bst_ulong>(model_dump.size());
|
*len = static_cast<bst_ulong>(model_dump.size());
|
||||||
return &model_dump_cptr[0];
|
return BeginPtr(model_dump_cptr);
|
||||||
}
|
}
|
||||||
// temporal fields
|
// temporal fields
|
||||||
// temporal data to save evaluation dump
|
// temporal data to save evaluation dump
|
||||||
@ -174,13 +178,13 @@ extern "C"{
|
|||||||
std::vector<float> &vec =
|
std::vector<float> &vec =
|
||||||
static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
|
static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
|
||||||
vec.resize(len);
|
vec.resize(len);
|
||||||
memcpy(&vec[0], info, sizeof(float) * len);
|
memcpy(BeginPtr(vec), info, sizeof(float) * len);
|
||||||
}
|
}
|
||||||
void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) {
|
void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) {
|
||||||
std::vector<unsigned> &vec =
|
std::vector<unsigned> &vec =
|
||||||
static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
|
static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
|
||||||
vec.resize(len);
|
vec.resize(len);
|
||||||
memcpy(&vec[0], info, sizeof(unsigned) * len);
|
memcpy(BeginPtr(vec), info, sizeof(unsigned) * len);
|
||||||
}
|
}
|
||||||
void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) {
|
void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) {
|
||||||
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
|
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
|
||||||
@ -194,13 +198,13 @@ extern "C"{
|
|||||||
const std::vector<float> &vec =
|
const std::vector<float> &vec =
|
||||||
static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field);
|
static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field);
|
||||||
*len = static_cast<bst_ulong>(vec.size());
|
*len = static_cast<bst_ulong>(vec.size());
|
||||||
return &vec[0];
|
return BeginPtr(vec);
|
||||||
}
|
}
|
||||||
const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) {
|
const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) {
|
||||||
const std::vector<unsigned> &vec =
|
const std::vector<unsigned> &vec =
|
||||||
static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
|
static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
|
||||||
*len = static_cast<bst_ulong>(vec.size());
|
*len = static_cast<bst_ulong>(vec.size());
|
||||||
return &vec[0];
|
return BeginPtr(vec);
|
||||||
}
|
}
|
||||||
bst_ulong XGDMatrixNumRow(const void *handle) {
|
bst_ulong XGDMatrixNumRow(const void *handle) {
|
||||||
return static_cast<bst_ulong>(static_cast<const DataMatrix*>(handle)->info.num_row());
|
return static_cast<bst_ulong>(static_cast<const DataMatrix*>(handle)->info.num_row());
|
||||||
@ -249,8 +253,8 @@ extern "C"{
|
|||||||
bst->eval_str = bst->EvalOneIter(iter, mats, names);
|
bst->eval_str = bst->EvalOneIter(iter, mats, names);
|
||||||
return bst->eval_str.c_str();
|
return bst->eval_str.c_str();
|
||||||
}
|
}
|
||||||
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len) {
|
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
|
||||||
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
|
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, ntree_limit, len);
|
||||||
}
|
}
|
||||||
void XGBoosterLoadModel(void *handle, const char *fname) {
|
void XGBoosterLoadModel(void *handle, const char *fname) {
|
||||||
static_cast<Booster*>(handle)->LoadModel(fname);
|
static_cast<Booster*>(handle)->LoadModel(fname);
|
||||||
|
|||||||
@ -165,9 +165,11 @@ extern "C" {
|
|||||||
* \param handle handle
|
* \param handle handle
|
||||||
* \param dmat data matrix
|
* \param dmat data matrix
|
||||||
* \param output_margin whether only output raw margin value
|
* \param output_margin whether only output raw margin value
|
||||||
|
* \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
|
||||||
|
* when the parameter is set to 0, we will use all the trees
|
||||||
* \param len used to store length of returning result
|
* \param len used to store length of returning result
|
||||||
*/
|
*/
|
||||||
XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len);
|
XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len);
|
||||||
/*!
|
/*!
|
||||||
* \brief load model from existing file
|
* \brief load model from existing file
|
||||||
* \param handle handle
|
* \param handle handle
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user