Merge branch 'master' of https://github.com/pommedeterresautee/xgboost
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
Package: xgboost
|
||||
Type: Package
|
||||
Title: eXtreme Gradient Boosting
|
||||
Version: 0.3-4
|
||||
Date: 2014-12-28
|
||||
Version: 0.4-0
|
||||
Date: 2015-05-11
|
||||
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>, Michael Benesty <michael@benesty.fr>
|
||||
Maintainer: Tong He <hetong007@gmail.com>
|
||||
Description: Xgboost is short for eXtreme Gradient Boosting, which is an
|
||||
|
||||
@@ -54,6 +54,13 @@
|
||||
#' @param folds \code{list} provides a possibility of using a list of pre-defined CV folds (each element must be a vector of fold's indices).
|
||||
#' If folds are supplied, the nfold and stratified parameters would be ignored.
|
||||
#' @param verbose \code{boolean}, print the statistics during the process
|
||||
#' @param print.every.n Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.
|
||||
#' @param early.stop.round If \code{NULL}, the early stopping function is not triggered.
|
||||
#' If set to an integer \code{k}, training with a validation set will stop if the performance
|
||||
#' keeps getting worse consecutively for \code{k} rounds.
|
||||
#' @param maximize If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
|
||||
#' \code{maximize=TRUE} means the larger the evaluation score the better.
|
||||
#'
|
||||
#' @param ... other parameters to pass to \code{params}.
|
||||
#'
|
||||
#' @return
|
||||
@@ -86,7 +93,8 @@
|
||||
#'
|
||||
xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NULL,
|
||||
prediction = FALSE, showsd = TRUE, metrics=list(),
|
||||
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T,...) {
|
||||
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T, print.every.n=1L,
|
||||
early.stop.round = NULL, maximize = NULL, ...) {
|
||||
if (typeof(params) != "list") {
|
||||
stop("xgb.cv: first argument params must be list")
|
||||
}
|
||||
@@ -109,7 +117,50 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
||||
for (mc in metrics) {
|
||||
params <- append(params, list("eval_metric"=mc))
|
||||
}
|
||||
|
||||
|
||||
# customized objective and evaluation metric interface
|
||||
if (!is.null(params$objective) && !is.null(obj))
|
||||
stop("xgb.cv: cannot assign two different objectives")
|
||||
if (!is.null(params$objective))
|
||||
if (class(params$objective)=='function') {
|
||||
obj = params$objective
|
||||
params$objective = NULL
|
||||
}
|
||||
if (!is.null(params$eval_metric) && !is.null(feval))
|
||||
stop("xgb.cv: cannot assign two different evaluation metrics")
|
||||
if (!is.null(params$eval_metric))
|
||||
if (class(params$eval_metric)=='function') {
|
||||
feval = params$eval_metric
|
||||
params$eval_metric = NULL
|
||||
}
|
||||
|
||||
# Early Stopping
|
||||
if (!is.null(early.stop.round)){
|
||||
if (!is.null(feval) && is.null(maximize))
|
||||
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
|
||||
if (is.null(maximize) && is.null(params$eval_metric))
|
||||
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
|
||||
if (is.null(maximize))
|
||||
{
|
||||
if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
|
||||
maximize = FALSE
|
||||
} else {
|
||||
maximize = TRUE
|
||||
}
|
||||
}
|
||||
|
||||
if (maximize) {
|
||||
bestScore = 0
|
||||
} else {
|
||||
bestScore = Inf
|
||||
}
|
||||
bestInd = 0
|
||||
earlyStopflag = FALSE
|
||||
|
||||
if (length(metrics)>1)
|
||||
warning('Only the first metric is used for early stopping process.')
|
||||
}
|
||||
|
||||
xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds)
|
||||
obj_type = params[['objective']]
|
||||
mat_pred = FALSE
|
||||
@@ -124,6 +175,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
||||
else
|
||||
predictValues <- rep(0,xgb.numrow(dtrain))
|
||||
history <- c()
|
||||
print.every.n = max(as.integer(print.every.n), 1L)
|
||||
for (i in 1:nrounds) {
|
||||
msg <- list()
|
||||
for (k in 1:nfold) {
|
||||
@@ -148,7 +200,27 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
||||
}
|
||||
ret <- xgb.cv.aggcv(msg, showsd)
|
||||
history <- c(history, ret)
|
||||
if(verbose) paste(ret, "\n", sep="") %>% cat
|
||||
if(verbose)
|
||||
if (0==(i-1L)%%print.every.n)
|
||||
cat(ret, "\n", sep="")
|
||||
|
||||
# early_Stopping
|
||||
if (!is.null(early.stop.round)){
|
||||
score = strsplit(ret,'\\s+')[[1]][1+length(metrics)+2]
|
||||
score = strsplit(score,'\\+|:')[[1]][[2]]
|
||||
score = as.numeric(score)
|
||||
if ((maximize && score>bestScore) || (!maximize && score<bestScore)) {
|
||||
bestScore = score
|
||||
bestInd = i
|
||||
} else {
|
||||
if (i-bestInd>=early.stop.round) {
|
||||
earlyStopflag = TRUE
|
||||
cat('Stopping. Best iteration:',bestInd)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
#' 3. Task Parameters
|
||||
#'
|
||||
#' \itemize{
|
||||
#' \item \code{objective} specify the learning task and the corresponding learning objective, and the objective options are below:
|
||||
#' \item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below:
|
||||
#' \itemize{
|
||||
#' \item \code{reg:linear} linear regression (Default).
|
||||
#' \item \code{reg:logistic} logistic regression.
|
||||
@@ -48,7 +48,7 @@
|
||||
#' \item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
|
||||
#' }
|
||||
#' \item \code{base_score} the initial prediction score of all instances, global bias. Default: 0.5
|
||||
#' \item \code{eval_metric} evaluation metrics for validation data. Default: metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking). List is provided in detail section.
|
||||
#' \item \code{eval_metric} evaluation metrics for validation data. Users can pass a self-defined function to it. Default: metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking). List is provided in detail section.
|
||||
#' }
|
||||
#'
|
||||
#' @param data takes an \code{xgb.DMatrix} as the input.
|
||||
@@ -66,7 +66,12 @@
|
||||
#' prediction and dtrain,
|
||||
#' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print
|
||||
#' information of performance. If 2, xgboost will print information of both
|
||||
#'
|
||||
#' @param print.every.n Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.
|
||||
#' @param early.stop.round If \code{NULL}, the early stopping function is not triggered.
|
||||
#' If set to an integer \code{k}, training with a validation set will stop if the performance
|
||||
#' keeps getting worse consecutively for \code{k} rounds.
|
||||
#' @param maximize If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
|
||||
#' \code{maximize=TRUE} means the larger the evaluation score the better.
|
||||
#' @param ... other parameters to pass to \code{params}.
|
||||
#'
|
||||
#' @details
|
||||
@@ -98,7 +103,6 @@
|
||||
#' dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
#' dtest <- dtrain
|
||||
#' watchlist <- list(eval = dtest, train = dtrain)
|
||||
#' param <- list(max.depth = 2, eta = 1, silent = 1)
|
||||
#' logregobj <- function(preds, dtrain) {
|
||||
#' labels <- getinfo(dtrain, "label")
|
||||
#' preds <- 1/(1 + exp(-preds))
|
||||
@@ -111,11 +115,13 @@
|
||||
#' err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
|
||||
#' return(list(metric = "error", value = err))
|
||||
#' }
|
||||
#' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist, logregobj, evalerror)
|
||||
#' param <- list(max.depth = 2, eta = 1, silent = 1, objective=logregobj,eval_metric=evalerror)
|
||||
#' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist)
|
||||
#' @export
|
||||
#'
|
||||
xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
||||
obj = NULL, feval = NULL, verbose = 1, ...) {
|
||||
obj = NULL, feval = NULL, verbose = 1, print.every.n=1L,
|
||||
early.stop.round = NULL, maximize = NULL, ...) {
|
||||
dtrain <- data
|
||||
if (typeof(params) != "list") {
|
||||
stop("xgb.train: first argument params must be list")
|
||||
@@ -130,19 +136,85 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
||||
}
|
||||
if (length(watchlist) != 0 && verbose == 0) {
|
||||
warning('watchlist is provided but verbose=0, no evaluation information will be printed')
|
||||
watchlist <- list()
|
||||
}
|
||||
params = append(params, list(...))
|
||||
|
||||
# customized objective and evaluation metric interface
|
||||
if (!is.null(params$objective) && !is.null(obj))
|
||||
stop("xgb.train: cannot assign two different objectives")
|
||||
if (!is.null(params$objective))
|
||||
if (class(params$objective)=='function') {
|
||||
obj = params$objective
|
||||
params$objective = NULL
|
||||
}
|
||||
if (!is.null(params$eval_metric) && !is.null(feval))
|
||||
stop("xgb.train: cannot assign two different evaluation metrics")
|
||||
if (!is.null(params$eval_metric))
|
||||
if (class(params$eval_metric)=='function') {
|
||||
feval = params$eval_metric
|
||||
params$eval_metric = NULL
|
||||
}
|
||||
|
||||
# Early stopping
|
||||
if (!is.null(early.stop.round)){
|
||||
if (!is.null(feval) && is.null(maximize))
|
||||
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
|
||||
if (length(watchlist) == 0)
|
||||
stop('For early stopping you need at least one set in watchlist.')
|
||||
if (is.null(maximize) && is.null(params$eval_metric))
|
||||
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
|
||||
if (is.null(maximize))
|
||||
{
|
||||
if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
|
||||
maximize = FALSE
|
||||
} else {
|
||||
maximize = TRUE
|
||||
}
|
||||
}
|
||||
|
||||
if (maximize) {
|
||||
bestScore = 0
|
||||
} else {
|
||||
bestScore = Inf
|
||||
}
|
||||
bestInd = 0
|
||||
earlyStopflag = FALSE
|
||||
|
||||
if (length(watchlist)>1)
|
||||
warning('Only the first data set in watchlist is used for early stopping process.')
|
||||
}
|
||||
|
||||
|
||||
handle <- xgb.Booster(params, append(watchlist, dtrain))
|
||||
bst <- xgb.handleToBooster(handle)
|
||||
print.every.n=max( as.integer(print.every.n), 1L)
|
||||
for (i in 1:nrounds) {
|
||||
succ <- xgb.iter.update(bst$handle, dtrain, i - 1, obj)
|
||||
if (length(watchlist) != 0) {
|
||||
msg <- xgb.iter.eval(bst$handle, watchlist, i - 1, feval)
|
||||
cat(paste(msg, "\n", sep=""))
|
||||
if (0== ( (i-1) %% print.every.n))
|
||||
cat(paste(msg, "\n", sep=""))
|
||||
if (!is.null(early.stop.round))
|
||||
{
|
||||
score = strsplit(msg,':|\\s+')[[1]][3]
|
||||
score = as.numeric(score)
|
||||
if ((maximize && score>bestScore) || (!maximize && score<bestScore)) {
|
||||
bestScore = score
|
||||
bestInd = i
|
||||
} else {
|
||||
if (i-bestInd>=early.stop.round) {
|
||||
earlyStopflag = TRUE
|
||||
cat('Stopping. Best iteration:',bestInd)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
bst <- xgb.Booster.check(bst)
|
||||
if (!is.null(early.stop.round)) {
|
||||
bst$bestScore = bestScore
|
||||
bst$bestInd = bestInd
|
||||
}
|
||||
return(bst)
|
||||
}
|
||||
|
||||
@@ -28,8 +28,14 @@
|
||||
#' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print
|
||||
#' information of performance. If 2, xgboost will print information of both
|
||||
#' performance and construction progress information
|
||||
#' @param print.every.n Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.
|
||||
#' @param missing Missing is only used when input is dense matrix, pick a float
|
||||
#' value that represents missing value. Sometimes a data use 0 or other extreme value to represents missing values.
|
||||
#' @param early.stop.round If \code{NULL}, the early stopping function is not triggered.
|
||||
#' If set to an integer \code{k}, training with a validation set will stop if the performance
|
||||
#' keeps getting worse consecutively for \code{k} rounds.
|
||||
#' @param maximize If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
|
||||
#' \code{maximize=TRUE} means the larger the evaluation score the better.
|
||||
#' @param ... other parameters to pass to \code{params}.
|
||||
#'
|
||||
#' @details
|
||||
@@ -51,7 +57,8 @@
|
||||
#' @export
|
||||
#'
|
||||
xgboost <- function(data = NULL, label = NULL, missing = NULL, params = list(), nrounds,
|
||||
verbose = 1, ...) {
|
||||
verbose = 1, print.every.n = 1L, early.stop.round = NULL,
|
||||
maximize = NULL, ...) {
|
||||
if (is.null(missing)) {
|
||||
dtrain <- xgb.get.DMatrix(data, label)
|
||||
} else {
|
||||
@@ -66,7 +73,8 @@ xgboost <- function(data = NULL, label = NULL, missing = NULL, params = list(),
|
||||
watchlist <- list()
|
||||
}
|
||||
|
||||
bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose=verbose)
|
||||
bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose, print.every.n=print.every.n,
|
||||
early.stop.round = early.stop.round)
|
||||
|
||||
return(bst)
|
||||
}
|
||||
|
||||
@@ -8,11 +8,6 @@ For up-to-date version (which is recommended), please install from github. Windo
|
||||
devtools::install_github('dmlc/xgboost',subdir='R-package')
|
||||
```
|
||||
|
||||
For stable version on CRAN, please run
|
||||
|
||||
```r
|
||||
install.packages('xgboost')
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
|
||||
@@ -6,3 +6,5 @@ generalized_linear_model Generalized Linear Model
|
||||
cross_validation Cross validation
|
||||
create_sparse_matrix Create Sparse Matrix
|
||||
predict_leaf_indices Predicting the corresponding leaves
|
||||
early_stopping Early Stop in training
|
||||
poisson_regression Poisson Regression on count data
|
||||
|
||||
@@ -40,10 +40,10 @@ evalerror <- function(preds, dtrain) {
|
||||
return(list(metric = "error", value = err))
|
||||
}
|
||||
|
||||
param <- list(max.depth=2,eta=1,silent=1)
|
||||
param <- list(max.depth=2,eta=1,silent=1,
|
||||
objective = logregobj, eval_metric = evalerror)
|
||||
# train with customized objective
|
||||
xgb.cv(param, dtrain, nround, nfold = 5,
|
||||
obj = logregobj, feval=evalerror)
|
||||
xgb.cv(param, dtrain, nround, nfold = 5)
|
||||
|
||||
# do cross validation with prediction values for each fold
|
||||
res <- xgb.cv(param, dtrain, nround, nfold=5, prediction = TRUE)
|
||||
|
||||
@@ -8,7 +8,6 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
# note: for customized objective function, we leave objective as default
|
||||
# note: what we are getting is margin value in prediction
|
||||
# you must know what you are doing
|
||||
param <- list(max.depth=2,eta=1,nthread = 2, silent=1)
|
||||
watchlist <- list(eval = dtest, train = dtrain)
|
||||
num_round <- 2
|
||||
|
||||
@@ -33,10 +32,13 @@ evalerror <- function(preds, dtrain) {
|
||||
err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
|
||||
return(list(metric = "error", value = err))
|
||||
}
|
||||
|
||||
param <- list(max.depth=2,eta=1,nthread = 2, silent=1,
|
||||
objective=logregobj, eval_metric=evalerror)
|
||||
print ('start training with user customized objective')
|
||||
# training with customized objective, we can also do step by step training
|
||||
# simply look at xgboost.py's implementation of train
|
||||
bst <- xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
|
||||
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
||||
|
||||
#
|
||||
# there can be cases where you want additional information
|
||||
@@ -59,4 +61,5 @@ logregobjattr <- function(preds, dtrain) {
|
||||
print ('start training with user customized objective, with additional attributes in DMatrix')
|
||||
# training with customized objective, we can also do step by step training
|
||||
# simply look at xgboost.py's implementation of train
|
||||
bst <- xgb.train(param, dtrain, num_round, watchlist, logregobjattr, evalerror)
|
||||
bst <- xgb.train(param, dtrain, num_round, watchlist,
|
||||
objective=logregobj, eval_metric=evalerror)
|
||||
|
||||
40
R-package/demo/early_stopping.R
Normal file
40
R-package/demo/early_stopping.R
Normal file
@@ -0,0 +1,40 @@
|
||||
require(xgboost)
|
||||
# load in the agaricus dataset
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
# note: for customized objective function, we leave objective as default
|
||||
# note: what we are getting is margin value in prediction
|
||||
# you must know what you are doing
|
||||
param <- list(max.depth=2,eta=1,nthread = 2, silent=1)
|
||||
watchlist <- list(eval = dtest)
|
||||
num_round <- 20
|
||||
# user define objective function, given prediction, return gradient and second order gradient
|
||||
# this is loglikelihood loss
|
||||
logregobj <- function(preds, dtrain) {
|
||||
labels <- getinfo(dtrain, "label")
|
||||
preds <- 1/(1 + exp(-preds))
|
||||
grad <- preds - labels
|
||||
hess <- preds * (1 - preds)
|
||||
return(list(grad = grad, hess = hess))
|
||||
}
|
||||
# user defined evaluation function, return a pair metric_name, result
|
||||
# NOTE: when you do customized loss function, the default prediction value is margin
|
||||
# this may make buildin evalution metric not function properly
|
||||
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
||||
# the buildin evaluation error assumes input is after logistic transformation
|
||||
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||
evalerror <- function(preds, dtrain) {
|
||||
labels <- getinfo(dtrain, "label")
|
||||
err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
|
||||
return(list(metric = "error", value = err))
|
||||
}
|
||||
print ('start training with early Stopping setting')
|
||||
|
||||
bst <- xgb.train(param, dtrain, num_round, watchlist,
|
||||
objective = logregobj, eval_metric = evalerror, maximize = FALSE,
|
||||
early.stop.round = 3)
|
||||
bst <- xgb.cv(param, dtrain, num_round, nfold = 5,
|
||||
objective = logregobj, eval_metric = evalerror,
|
||||
maximize = FALSE, early.stop.round = 3)
|
||||
7
R-package/demo/poisson_regression.R
Normal file
7
R-package/demo/poisson_regression.R
Normal file
@@ -0,0 +1,7 @@
|
||||
data(mtcars)
|
||||
head(mtcars)
|
||||
bst = xgboost(data=as.matrix(mtcars[,-11]),label=mtcars[,11],
|
||||
objective='count:poisson',nrounds=5)
|
||||
pred = predict(bst,as.matrix(mtcars[,-11]))
|
||||
sqrt(mean((pred-mtcars[,11])^2))
|
||||
|
||||
@@ -7,3 +7,5 @@ demo(generalized_linear_model)
|
||||
demo(cross_validation)
|
||||
demo(create_sparse_matrix)
|
||||
demo(predict_leaf_indices)
|
||||
demo(early_stopping)
|
||||
demo(poisson_regression)
|
||||
|
||||
@@ -7,7 +7,8 @@
|
||||
xgb.cv(params = list(), data, nrounds, nfold, label = NULL,
|
||||
missing = NULL, prediction = FALSE, showsd = TRUE, metrics = list(),
|
||||
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL,
|
||||
verbose = T, ...)
|
||||
verbose = T, print.every.n = 1L, early.stop.round = NULL,
|
||||
maximize = NULL, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{params}{the list of parameters. Commonly used ones are:
|
||||
@@ -65,6 +66,15 @@ If folds are supplied, the nfold and stratified parameters would be ignored.}
|
||||
|
||||
\item{verbose}{\code{boolean}, print the statistics during the process}
|
||||
|
||||
\item{print.every.n}{Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.}
|
||||
|
||||
\item{early.stop.round}{If \code{NULL}, the early stopping function is not triggered.
|
||||
If set to an integer \code{k}, training with a validation set will stop if the performance
|
||||
keeps getting worse consecutively for \code{k} rounds.}
|
||||
|
||||
\item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
|
||||
\code{maximize=TRUE} means the larger the evaluation score the better.}
|
||||
|
||||
\item{...}{other parameters to pass to \code{params}.}
|
||||
}
|
||||
\value{
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
\title{eXtreme Gradient Boosting Training}
|
||||
\usage{
|
||||
xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
|
||||
feval = NULL, verbose = 1, ...)
|
||||
feval = NULL, verbose = 1, print.every.n = 1L,
|
||||
early.stop.round = NULL, maximize = NULL, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{params}{the list of parameters.
|
||||
@@ -42,7 +43,7 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
|
||||
3. Task Parameters
|
||||
|
||||
\itemize{
|
||||
\item \code{objective} specify the learning task and the corresponding learning objective, and the objective options are below:
|
||||
\item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below:
|
||||
\itemize{
|
||||
\item \code{reg:linear} linear regression (Default).
|
||||
\item \code{reg:logistic} logistic regression.
|
||||
@@ -54,7 +55,7 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
|
||||
\item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
|
||||
}
|
||||
\item \code{base_score} the initial prediction score of all instances, global bias. Default: 0.5
|
||||
\item \code{eval_metric} evaluation metrics for validation data. Default: metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking). List is provided in detail section.
|
||||
\item \code{eval_metric} evaluation metrics for validation data. Users can pass a self-defined function to it. Default: metric will be assigned according to objective(rmse for regression, and error for classification, mean average precision for ranking). List is provided in detail section.
|
||||
}}
|
||||
|
||||
\item{data}{takes an \code{xgb.DMatrix} as the input.}
|
||||
@@ -75,7 +76,16 @@ gradient with given prediction and dtrain,}
|
||||
prediction and dtrain,}
|
||||
|
||||
\item{verbose}{If 0, xgboost will stay silent. If 1, xgboost will print
|
||||
information of performance. If 2, xgboost will print information of both}
|
||||
information of performance. If 2, xgboost will print information of both}
|
||||
|
||||
\item{print.every.n}{Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.}
|
||||
|
||||
\item{early.stop.round}{If \code{NULL}, the early stopping function is not triggered.
|
||||
If set to an integer \code{k}, training with a validation set will stop if the performance
|
||||
keeps getting worse consecutively for \code{k} rounds.}
|
||||
|
||||
\item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
|
||||
\code{maximize=TRUE} means the larger the evaluation score the better.}
|
||||
|
||||
\item{...}{other parameters to pass to \code{params}.}
|
||||
}
|
||||
@@ -110,7 +120,6 @@ data(agaricus.train, package='xgboost')
|
||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- dtrain
|
||||
watchlist <- list(eval = dtest, train = dtrain)
|
||||
param <- list(max.depth = 2, eta = 1, silent = 1)
|
||||
logregobj <- function(preds, dtrain) {
|
||||
labels <- getinfo(dtrain, "label")
|
||||
preds <- 1/(1 + exp(-preds))
|
||||
@@ -123,6 +132,7 @@ evalerror <- function(preds, dtrain) {
|
||||
err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
|
||||
return(list(metric = "error", value = err))
|
||||
}
|
||||
bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist, logregobj, evalerror)
|
||||
param <- list(max.depth = 2, eta = 1, silent = 1, objective=logregobj,eval_metric=evalerror)
|
||||
bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist)
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
\title{eXtreme Gradient Boosting (Tree) library}
|
||||
\usage{
|
||||
xgboost(data = NULL, label = NULL, missing = NULL, params = list(),
|
||||
nrounds, verbose = 1, ...)
|
||||
nrounds, verbose = 1, print.every.n = 1L, early.stop.round = NULL,
|
||||
maximize = NULL, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{takes \code{matrix}, \code{dgCMatrix}, local data file or
|
||||
@@ -41,6 +42,15 @@ Commonly used ones are:
|
||||
information of performance. If 2, xgboost will print information of both
|
||||
performance and construction progress information}
|
||||
|
||||
\item{print.every.n}{Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.}
|
||||
|
||||
\item{early.stop.round}{If \code{NULL}, the early stopping function is not triggered.
|
||||
If set to an integer \code{k}, training with a validation set will stop if the performance
|
||||
keeps getting worse consecutively for \code{k} rounds.}
|
||||
|
||||
\item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
|
||||
\code{maximize=TRUE} means the larger the evaluation score the better.}
|
||||
|
||||
\item{...}{other parameters to pass to \code{params}.}
|
||||
}
|
||||
\description{
|
||||
|
||||
@@ -70,10 +70,10 @@ extern "C" {
|
||||
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
|
||||
_WrapperBegin();
|
||||
void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
|
||||
_WrapperEnd();
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||
UNPROTECT(1);
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
|
||||
@@ -91,10 +91,10 @@ extern "C" {
|
||||
}
|
||||
}
|
||||
void *handle = XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing));
|
||||
_WrapperEnd();
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||
UNPROTECT(1);
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
|
||||
@@ -120,10 +120,10 @@ extern "C" {
|
||||
}
|
||||
void *handle = XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_),
|
||||
BeginPtr(data_), nindptr, ndata);
|
||||
_WrapperEnd();
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||
UNPROTECT(1);
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
|
||||
@@ -134,10 +134,10 @@ extern "C" {
|
||||
idxvec[i] = INTEGER(idxset)[i] - 1;
|
||||
}
|
||||
void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), BeginPtr(idxvec), len);
|
||||
_WrapperEnd();
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
|
||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||
UNPROTECT(1);
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
|
||||
@@ -157,10 +157,7 @@ extern "C" {
|
||||
vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
|
||||
}
|
||||
XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len);
|
||||
_WrapperEnd();
|
||||
return;
|
||||
}
|
||||
{
|
||||
} else {
|
||||
std::vector<float> vec(len);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (int i = 0; i < len; ++i) {
|
||||
@@ -177,12 +174,12 @@ extern "C" {
|
||||
bst_ulong olen;
|
||||
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
|
||||
CHAR(asChar(field)), &olen);
|
||||
_WrapperEnd();
|
||||
SEXP ret = PROTECT(allocVector(REALSXP, olen));
|
||||
for (size_t i = 0; i < olen; ++i) {
|
||||
REAL(ret)[i] = res[i];
|
||||
}
|
||||
UNPROTECT(1);
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
SEXP XGDMatrixNumRow_R(SEXP handle) {
|
||||
@@ -203,10 +200,10 @@ extern "C" {
|
||||
dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
|
||||
}
|
||||
void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size());
|
||||
_WrapperEnd();
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
|
||||
UNPROTECT(1);
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
|
||||
@@ -252,10 +249,12 @@ extern "C" {
|
||||
for (int i = 0; i < len; ++i) {
|
||||
vec_sptr.push_back(vec_names[i].c_str());
|
||||
}
|
||||
const char *ret =
|
||||
XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
|
||||
asInteger(iter),
|
||||
BeginPtr(vec_dmats), BeginPtr(vec_sptr), len);
|
||||
_WrapperEnd();
|
||||
return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
|
||||
asInteger(iter),
|
||||
BeginPtr(vec_dmats), BeginPtr(vec_sptr), len));
|
||||
return mkString(ret);
|
||||
}
|
||||
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) {
|
||||
_WrapperBegin();
|
||||
@@ -265,12 +264,12 @@ extern "C" {
|
||||
asInteger(option_mask),
|
||||
asInteger(ntree_limit),
|
||||
&olen);
|
||||
_WrapperEnd();
|
||||
SEXP ret = PROTECT(allocVector(REALSXP, olen));
|
||||
for (size_t i = 0; i < olen; ++i) {
|
||||
REAL(ret)[i] = res[i];
|
||||
}
|
||||
UNPROTECT(1);
|
||||
_WrapperEnd();
|
||||
return ret;
|
||||
}
|
||||
void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
|
||||
@@ -305,17 +304,18 @@ extern "C" {
|
||||
SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) {
|
||||
_WrapperBegin();
|
||||
bst_ulong olen;
|
||||
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
|
||||
CHAR(asChar(fmap)),
|
||||
asInteger(with_stats),
|
||||
&olen);
|
||||
const char **res =
|
||||
XGBoosterDumpModel(R_ExternalPtrAddr(handle),
|
||||
CHAR(asChar(fmap)),
|
||||
asInteger(with_stats),
|
||||
&olen);
|
||||
_WrapperEnd();
|
||||
SEXP out = PROTECT(allocVector(STRSXP, olen));
|
||||
for (size_t i = 0; i < olen; ++i) {
|
||||
stringstream stream;
|
||||
stream << "booster["<<i<<"]\n" << res[i];
|
||||
SET_STRING_ELT(out, i, mkChar(stream.str().c_str()));
|
||||
}
|
||||
_WrapperEnd();
|
||||
UNPROTECT(1);
|
||||
return out;
|
||||
}
|
||||
|
||||
@@ -57,11 +57,9 @@ devtools::install_github('dmlc/xgboost', subdir='R-package')
|
||||
Cran version
|
||||
------------
|
||||
|
||||
For stable version on *CRAN*, run:
|
||||
As of 2015-03-13, ‘xgboost’ was removed from the CRAN repository.
|
||||
|
||||
```{r installCran, eval=FALSE}
|
||||
install.packages('xgboost')
|
||||
```
|
||||
Formerly available versions can be obtained from the CRAN [archive](http://cran.r-project.org/src/contrib/Archive/xgboost)
|
||||
|
||||
Learning
|
||||
========
|
||||
|
||||
Reference in New Issue
Block a user