add early stopping to R
This commit is contained in:
parent
3b4697786e
commit
54fb49ee5c
@ -1,4 +1,4 @@
|
||||
# Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
# Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
|
||||
export(getinfo)
|
||||
export(setinfo)
|
||||
|
||||
@ -66,7 +66,11 @@
|
||||
#' prediction and dtrain,
|
||||
#' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print
|
||||
#' information of performance. If 2, xgboost will print information of both
|
||||
#'
|
||||
#' @param earlyStopRound If \code{NULL}, the early stopping function is not triggered.
|
||||
#' If set to an integer \code{k}, training with a validation set will stop if the performance
|
||||
#' keeps getting worse consecutively for \code{k} rounds.
|
||||
#' @param maximize If \code{feval} and \code{earlyStopRound} are set, then \code{maximize} must be set as well.
|
||||
#' \code{maximize=TRUE} means the larger the evaluation score the better.
|
||||
#' @param ... other parameters to pass to \code{params}.
|
||||
#'
|
||||
#' @details
|
||||
@ -114,7 +118,8 @@
|
||||
#' @export
|
||||
#'
|
||||
xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
||||
obj = NULL, feval = NULL, verbose = 1, ...) {
|
||||
obj = NULL, feval = NULL, verbose = 1,
|
||||
earlyStopRound = NULL, maximize = NULL, ...) {
|
||||
dtrain <- data
|
||||
if (typeof(params) != "list") {
|
||||
stop("xgb.train: first argument params must be list")
|
||||
@ -133,6 +138,33 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
||||
}
|
||||
params = append(params, list(...))
|
||||
|
||||
# Early stopping
|
||||
if (!is.null(feval) && is.null(maximize))
|
||||
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
|
||||
if (length(watchlist) == 0 && !is.null(earlyStopRound))
|
||||
stop('For early stopping you need at least one set in watchlist.')
|
||||
if (is.null(maximize) && is.null(params$eval_metric))
|
||||
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
|
||||
if (is.null(maximize))
|
||||
{
|
||||
if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
|
||||
maximize = FALSE
|
||||
} else {
|
||||
maximize = TRUE
|
||||
}
|
||||
}
|
||||
|
||||
if (maximize) {
|
||||
bestScore = 0
|
||||
} else {
|
||||
bestScore = Inf
|
||||
}
|
||||
bestInd = 0
|
||||
earlyStopflag = FALSE
|
||||
|
||||
if (length(watchlist)>1 && !is.null(earlyStopRound))
|
||||
warning('Only the first data set in watchlist is used for early stopping process.')
|
||||
|
||||
handle <- xgb.Booster(params, append(watchlist, dtrain))
|
||||
bst <- xgb.handleToBooster(handle)
|
||||
for (i in 1:nrounds) {
|
||||
@ -140,8 +172,30 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
||||
if (length(watchlist) != 0) {
|
||||
msg <- xgb.iter.eval(bst$handle, watchlist, i - 1, feval)
|
||||
cat(paste(msg, "\n", sep=""))
|
||||
if (!is.null(earlyStopRound))
|
||||
{
|
||||
score = strsplit(msg,'\\s+')[[1]][1]
|
||||
score = strsplit(score,':')[[1]][2]
|
||||
score = as.numeric(score)
|
||||
if ((maximize && score>bestScore) || (!maximize && score<bestScore)) {
|
||||
bestScore = score
|
||||
bestInd = i
|
||||
} else {
|
||||
if (i-bestInd>earlyStopRound) {
|
||||
earlyStopflag = TRUE
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (earlyStopflag) {
|
||||
cat('Stopping. Best iteration:',bestInd)
|
||||
break
|
||||
}
|
||||
}
|
||||
bst <- xgb.Booster.check(bst)
|
||||
if (!is.null(earlyStopRound)) {
|
||||
bst$bestScore = bestScore
|
||||
bst$bestInd = bestInd
|
||||
}
|
||||
return(bst)
|
||||
}
|
||||
|
||||
@ -30,6 +30,11 @@
|
||||
#' performance and construction progress information
|
||||
#' @param missing Missing is only used when input is dense matrix, pick a float
|
||||
#' value that represents missing value. Sometimes a data use 0 or other extreme value to represents missing values.
|
||||
#' @param earlyStopRound If \code{NULL}, the early stopping function is not triggered.
|
||||
#' If set to an integer \code{k}, training with a validation set will stop if the performance
|
||||
#' keeps getting worse consecutively for \code{k} rounds.
|
||||
#' @param maximize If \code{feval} and \code{earlyStopRound} are set, then \code{maximize} must be set as well.
|
||||
#' \code{maximize=TRUE} means the larger the evaluation score the better.
|
||||
#' @param ... other parameters to pass to \code{params}.
|
||||
#'
|
||||
#' @details
|
||||
@ -51,7 +56,7 @@
|
||||
#' @export
|
||||
#'
|
||||
xgboost <- function(data = NULL, label = NULL, missing = NULL, params = list(), nrounds,
|
||||
verbose = 1, ...) {
|
||||
verbose = 1, earlyStopRound = NULL, maximize = NULL, ...) {
|
||||
if (is.null(missing)) {
|
||||
dtrain <- xgb.get.DMatrix(data, label)
|
||||
} else {
|
||||
@ -66,7 +71,8 @@ xgboost <- function(data = NULL, label = NULL, missing = NULL, params = list(),
|
||||
watchlist <- list()
|
||||
}
|
||||
|
||||
bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose=verbose)
|
||||
bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose,
|
||||
earlyStopRound = earlyStopRound)
|
||||
|
||||
return(bst)
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgboost.R
|
||||
\docType{data}
|
||||
\name{agaricus.test}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgboost.R
|
||||
\docType{data}
|
||||
\name{agaricus.train}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/getinfo.xgb.DMatrix.R
|
||||
\docType{methods}
|
||||
\name{getinfo}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/nrow.xgb.DMatrix.R
|
||||
\docType{methods}
|
||||
\name{nrow,xgb.DMatrix-method}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/predict.xgb.Booster.R
|
||||
\docType{methods}
|
||||
\name{predict,xgb.Booster-method}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/predict.xgb.Booster.handle.R
|
||||
\docType{methods}
|
||||
\name{predict,xgb.Booster.handle-method}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/setinfo.xgb.DMatrix.R
|
||||
\docType{methods}
|
||||
\name{setinfo}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/slice.xgb.DMatrix.R
|
||||
\docType{methods}
|
||||
\name{slice}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.DMatrix.R
|
||||
\name{xgb.DMatrix}
|
||||
\alias{xgb.DMatrix}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.DMatrix.save.R
|
||||
\name{xgb.DMatrix.save}
|
||||
\alias{xgb.DMatrix.save}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.cv.R
|
||||
\name{xgb.cv}
|
||||
\alias{xgb.cv}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.dump.R
|
||||
\name{xgb.dump}
|
||||
\alias{xgb.dump}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.importance.R
|
||||
\name{xgb.importance}
|
||||
\alias{xgb.importance}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.load.R
|
||||
\name{xgb.load}
|
||||
\alias{xgb.load}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.model.dt.tree.R
|
||||
\name{xgb.model.dt.tree}
|
||||
\alias{xgb.model.dt.tree}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.plot.importance.R
|
||||
\name{xgb.plot.importance}
|
||||
\alias{xgb.plot.importance}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.plot.tree.R
|
||||
\name{xgb.plot.tree}
|
||||
\alias{xgb.plot.tree}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.save.R
|
||||
\name{xgb.save}
|
||||
\alias{xgb.save}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.save.raw.R
|
||||
\name{xgb.save.raw}
|
||||
\alias{xgb.save.raw}
|
||||
|
||||
@ -1,11 +1,12 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.train.R
|
||||
\name{xgb.train}
|
||||
\alias{xgb.train}
|
||||
\title{eXtreme Gradient Boosting Training}
|
||||
\usage{
|
||||
xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
|
||||
feval = NULL, verbose = 1, ...)
|
||||
feval = NULL, verbose = 1, earlyStopRound = NULL, maximize = NULL,
|
||||
...)
|
||||
}
|
||||
\arguments{
|
||||
\item{params}{the list of parameters.
|
||||
@ -49,7 +50,7 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
|
||||
\item \code{binary:logistic} logistic regression for binary classification. Output probability.
|
||||
\item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
|
||||
\item \code{num_class} set the number of classes. To use only with multiclass objectives.
|
||||
\item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is a number and should be from 0 \code{tonum_class}
|
||||
\item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{tonum_class}.
|
||||
\item \code{multi:softprob} same as softmax, but output a vector of ndata * nclass, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class.
|
||||
\item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
|
||||
}
|
||||
@ -77,6 +78,13 @@ prediction and dtrain,}
|
||||
\item{verbose}{If 0, xgboost will stay silent. If 1, xgboost will print
|
||||
information of performance. If 2, xgboost will print information of both}
|
||||
|
||||
\item{earlyStopRound}{If \code{NULL}, the early stopping function is not triggered.
|
||||
If set to an integer \code{k}, training with a validation set will stop if the performance
|
||||
keeps getting worse consecutively for \code{k} rounds.}
|
||||
|
||||
\item{maximize}{If \code{feval} and \code{earlyStopRound} are set, then \code{maximize} must be set as well.
|
||||
\code{maximize=TRUE} means the larger the evaluation score the better.}
|
||||
|
||||
\item{...}{other parameters to pass to \code{params}.}
|
||||
}
|
||||
\description{
|
||||
@ -98,7 +106,7 @@ Number of threads can also be manually specified via \code{nthread} parameter.
|
||||
\item \code{error} Binary classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.
|
||||
\item \code{merror} Multiclass classification error rate. It is calculated as \code{(wrong cases) / (all cases)}.
|
||||
\item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
|
||||
\item \code{ndcg} Normalized Discounted Cumulative Gain. \url{http://en.wikipedia.org/wiki/NDCG}
|
||||
\item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{http://en.wikipedia.org/wiki/NDCG}
|
||||
}
|
||||
|
||||
Full list of parameters is available in the Wiki \url{https://github.com/dmlc/xgboost/wiki/Parameters}.
|
||||
|
||||
@ -1,11 +1,11 @@
|
||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgboost.R
|
||||
\name{xgboost}
|
||||
\alias{xgboost}
|
||||
\title{eXtreme Gradient Boosting (Tree) library}
|
||||
\usage{
|
||||
xgboost(data = NULL, label = NULL, missing = NULL, params = list(),
|
||||
nrounds, verbose = 1, ...)
|
||||
nrounds, verbose = 1, earlyStopRound = NULL, maximize = NULL, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{takes \code{matrix}, \code{dgCMatrix}, local data file or
|
||||
@ -41,6 +41,13 @@ Commonly used ones are:
|
||||
information of performance. If 2, xgboost will print information of both
|
||||
performance and construction progress information}
|
||||
|
||||
\item{earlyStopRound}{If \code{NULL}, the early stopping function is not triggered.
|
||||
If set to an integer \code{k}, training with a validation set will stop if the performance
|
||||
keeps getting worse consecutively for \code{k} rounds.}
|
||||
|
||||
\item{maximize}{If \code{feval} and \code{earlyStopRound} are set, then \code{maximize} must be set as well.
|
||||
\code{maximize=TRUE} means the larger the evaluation score the better.}
|
||||
|
||||
\item{...}{other parameters to pass to \code{params}.}
|
||||
}
|
||||
\description{
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user