86 lines
3.0 KiB
R
86 lines
3.0 KiB
R
% Generated by roxygen2 (4.1.0): do not edit by hand
|
|
% Please edit documentation in R/xgb.cv.R
|
|
\name{xgb.cv}
|
|
\alias{xgb.cv}
|
|
\title{Cross Validation}
|
|
\usage{
|
|
xgb.cv(params = list(), data, nrounds, nfold, label = NULL,
|
|
missing = NULL, prediction = FALSE, showsd = TRUE, metrics = list(),
|
|
obj = NULL, feval = NULL, verbose = T, ...)
|
|
}
|
|
\arguments{
|
|
\item{params}{the list of parameters. Commonly used ones are:
|
|
\itemize{
|
|
\item \code{objective} objective function, common ones are
|
|
\itemize{
|
|
\item \code{reg:linear} linear regression
|
|
\item \code{binary:logistic} logistic regression for classification
|
|
}
|
|
\item \code{eta} step size of each boosting step
|
|
\item \code{max.depth} maximum depth of the tree
|
|
\item \code{nthread} number of thread used in training, if not set, all threads are used
|
|
}
|
|
|
|
See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
|
further details. See also demo/ for walkthrough example in R.}
|
|
|
|
\item{data}{takes an \code{xgb.DMatrix} as the input.}
|
|
|
|
\item{nrounds}{the max number of iterations}
|
|
|
|
\item{nfold}{number of folds used}
|
|
|
|
\item{label}{option field, when data is Matrix}
|
|
|
|
\item{missing}{Missing is only used when input is dense matrix, pick a float
|
|
value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.}
|
|
|
|
\item{prediction}{A logical value indicating whether to return the prediction vector.}
|
|
|
|
\item{showsd}{\code{boolean}, whether show standard deviation of cross validation}
|
|
|
|
\item{metrics,}{list of evaluation metrics to be used in corss validation,
|
|
when it is not specified, the evaluation metric is chosen according to objective function.
|
|
Possible options are:
|
|
\itemize{
|
|
\item \code{error} binary classification error rate
|
|
\item \code{rmse} Rooted mean square error
|
|
\item \code{logloss} negative log-likelihood function
|
|
\item \code{auc} Area under curve
|
|
\item \code{merror} Exact matching error, used to evaluate multi-class classification
|
|
}}
|
|
|
|
\item{obj}{customized objective function. Returns gradient and second order
|
|
gradient with given prediction and dtrain,}
|
|
|
|
\item{feval}{custimized evaluation function. Returns
|
|
\code{list(metric='metric-name', value='metric-value')} with given
|
|
prediction and dtrain,}
|
|
|
|
\item{verbose}{\code{boolean}, print the statistics during the process.}
|
|
|
|
\item{...}{other parameters to pass to \code{params}.}
|
|
}
|
|
\value{
|
|
A \code{data.table} with each mean and standard deviation stat for training set and test set.
|
|
}
|
|
\description{
|
|
The cross valudation function of xgboost
|
|
}
|
|
\details{
|
|
This is the cross validation function for xgboost
|
|
|
|
Parallelization is automatically enabled if OpenMP is present.
|
|
Number of threads can also be manually specified via "nthread" parameter.
|
|
|
|
This function only accepts an \code{xgb.DMatrix} object as the input.
|
|
}
|
|
\examples{
|
|
data(agaricus.train, package='xgboost')
|
|
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
|
history <- xgb.cv(data = dtrain, nround=3, nfold = 5, metrics=list("rmse","auc"),
|
|
"max.depth"=3, "eta"=1, "objective"="binary:logistic")
|
|
print(history)
|
|
}
|
|
|