Update lib version dependencies (for DiagrammeR mainly)

Fix @export tag in each R file (for Roxygen 5, otherwise it doesn't work anymore)
Regerate Roxygen doc
This commit is contained in:
unknown 2015-11-07 21:01:28 +01:00
parent 635645c650
commit 0052b193cf
36 changed files with 123 additions and 126 deletions

View File

@ -3,16 +3,16 @@ Type: Package
Title: Extreme Gradient Boosting Title: Extreme Gradient Boosting
Version: 0.4-2 Version: 0.4-2
Date: 2015-08-01 Date: 2015-08-01
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>, Michael Benesty <michael@benesty.fr> Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
Michael Benesty <michael@benesty.fr>
Maintainer: Tong He <hetong007@gmail.com> Maintainer: Tong He <hetong007@gmail.com>
Description: Extreme Gradient Boosting, which is an Description: Extreme Gradient Boosting, which is an efficient implementation
efficient implementation of gradient boosting framework. of gradient boosting framework. This package is its R interface. The package
This package is its R interface. The package includes efficient includes efficient linear model solver and tree learning algorithms. The package
linear model solver and tree learning algorithms. The package can automatically can automatically do parallel computation on a single machine which could be
do parallel computation on a single machine which could be more than 10 times faster more than 10 times faster than existing gradient boosting packages. It supports
than existing gradient boosting packages. It supports various various objective functions, including regression, classification and ranking.
objective functions, including regression, classification and ranking. The The package is made to be extensible, so that users are also allowed to define
package is made to be extensible, so that users are also allowed to define
their own objectives easily. their own objectives easily.
License: Apache License (== 2.0) | file LICENSE License: Apache License (== 2.0) | file LICENSE
URL: https://github.com/dmlc/xgboost URL: https://github.com/dmlc/xgboost
@ -21,7 +21,7 @@ VignetteBuilder: knitr
Suggests: Suggests:
knitr, knitr,
ggplot2 (>= 1.0.0), ggplot2 (>= 1.0.0),
DiagrammeR (>= 0.6), DiagrammeR (>= 0.8.1),
Ckmeans.1d.dp (>= 3.3.1), Ckmeans.1d.dp (>= 3.3.1),
vcd (>= 1.3), vcd (>= 1.3),
testthat testthat
@ -30,6 +30,7 @@ Depends:
Imports: Imports:
Matrix (>= 1.1-0), Matrix (>= 1.1-0),
methods, methods,
data.table (>= 1.9.4), data.table (>= 1.9.6),
magrittr (>= 1.5), magrittr (>= 1.5),
stringr (>= 0.6.2) stringr (>= 0.6.2)
RoxygenNote: 5.0.0

View File

@ -1,4 +1,4 @@
# Generated by roxygen2 (4.1.1): do not edit by hand # Generated by roxygen2: do not edit by hand
export(getinfo) export(getinfo)
export(setinfo) export(setinfo)
@ -21,6 +21,10 @@ exportMethods(predict)
import(methods) import(methods)
importClassesFrom(Matrix,dgCMatrix) importClassesFrom(Matrix,dgCMatrix)
importClassesFrom(Matrix,dgeMatrix) importClassesFrom(Matrix,dgeMatrix)
importFrom(DiagrammeR,create_edges)
importFrom(DiagrammeR,create_graph)
importFrom(DiagrammeR,create_nodes)
importFrom(DiagrammeR,render_graph)
importFrom(Matrix,cBind) importFrom(Matrix,cBind)
importFrom(Matrix,colSums) importFrom(Matrix,colSums)
importFrom(Matrix,sparseVector) importFrom(Matrix,sparseVector)

View File

@ -23,7 +23,6 @@ setClass('xgb.DMatrix')
#' stopifnot(all(labels2 == 1-labels)) #' stopifnot(all(labels2 == 1-labels))
#' @rdname getinfo #' @rdname getinfo
#' @export #' @export
#'
getinfo <- function(object, ...){ getinfo <- function(object, ...){
UseMethod("getinfo") UseMethod("getinfo")
} }

View File

@ -29,7 +29,6 @@ setClass("xgb.Booster",
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' pred <- predict(bst, test$data) #' pred <- predict(bst, test$data)
#' @export #' @export
#'
setMethod("predict", signature = "xgb.Booster", setMethod("predict", signature = "xgb.Booster",
definition = function(object, newdata, missing = NA, definition = function(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) { outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) {

View File

@ -21,7 +21,6 @@
#' stopifnot(all(labels2 == 1-labels)) #' stopifnot(all(labels2 == 1-labels))
#' @rdname setinfo #' @rdname setinfo
#' @export #' @export
#'
setinfo <- function(object, ...){ setinfo <- function(object, ...){
UseMethod("setinfo") UseMethod("setinfo")
} }

View File

@ -13,7 +13,6 @@ setClass('xgb.DMatrix')
#' dsub <- slice(dtrain, 1:3) #' dsub <- slice(dtrain, 1:3)
#' @rdname slice #' @rdname slice
#' @export #' @export
#'
slice <- function(object, ...){ slice <- function(object, ...){
UseMethod("slice") UseMethod("slice")
} }

View File

@ -17,7 +17,6 @@
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data') #' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data') #' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' @export #' @export
#'
xgb.DMatrix <- function(data, info = list(), missing = NA, ...) { xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
if (typeof(data) == "character") { if (typeof(data) == "character") {
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE),

View File

@ -12,7 +12,6 @@
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data') #' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data') #' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' @export #' @export
#'
xgb.DMatrix.save <- function(DMatrix, fname) { xgb.DMatrix.save <- function(DMatrix, fname) {
if (typeof(fname) != "character") { if (typeof(fname) != "character") {
stop("xgb.save: fname must be character") stop("xgb.save: fname must be character")

View File

@ -90,7 +90,6 @@
#' max.depth =3, eta = 1, objective = "binary:logistic") #' max.depth =3, eta = 1, objective = "binary:logistic")
#' print(history) #' print(history)
#' @export #' @export
#'
xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NA, xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NA,
prediction = FALSE, showsd = TRUE, metrics=list(), prediction = FALSE, showsd = TRUE, metrics=list(),
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T, print.every.n=1L, obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T, print.every.n=1L,

View File

@ -36,7 +36,6 @@
#' # print the model without saving it to a file #' # print the model without saving it to a file
#' print(xgb.dump(bst)) #' print(xgb.dump(bst))
#' @export #' @export
#'
xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with.stats=FALSE) { xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with.stats=FALSE) {
if (class(model) != "xgb.Booster") { if (class(model) != "xgb.Booster") {
stop("model: argument must be type xgb.Booster") stop("model: argument must be type xgb.Booster")

View File

@ -15,7 +15,6 @@
#' bst <- xgb.load('xgb.model') #' bst <- xgb.load('xgb.model')
#' pred <- predict(bst, test$data) #' pred <- predict(bst, test$data)
#' @export #' @export
#'
xgb.load <- function(modelfile) { xgb.load <- function(modelfile) {
if (is.null(modelfile)) if (is.null(modelfile))
stop("xgb.load: modelfile cannot be NULL") stop("xgb.load: modelfile cannot be NULL")

View File

@ -16,7 +16,6 @@
#' bst <- xgb.load('xgb.model') #' bst <- xgb.load('xgb.model')
#' pred <- predict(bst, test$data) #' pred <- predict(bst, test$data)
#' @export #' @export
#'
xgb.save <- function(model, fname) { xgb.save <- function(model, fname) {
if (typeof(fname) != "character") { if (typeof(fname) != "character") {
stop("xgb.save: fname must be character") stop("xgb.save: fname must be character")

View File

@ -16,7 +16,6 @@
#' bst <- xgb.load(raw) #' bst <- xgb.load(raw)
#' pred <- predict(bst, test$data) #' pred <- predict(bst, test$data)
#' @export #' @export
#'
xgb.save.raw <- function(model) { xgb.save.raw <- function(model) {
if (class(model) == "xgb.Booster"){ if (class(model) == "xgb.Booster"){
model <- model$handle model <- model$handle

View File

@ -120,7 +120,6 @@
#' param <- list(max.depth = 2, eta = 1, silent = 1, objective=logregobj,eval_metric=evalerror) #' param <- list(max.depth = 2, eta = 1, silent = 1, objective=logregobj,eval_metric=evalerror)
#' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist) #' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist)
#' @export #' @export
#'
xgb.train <- function(params=list(), data, nrounds, watchlist = list(), xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
obj = NULL, feval = NULL, verbose = 1, print.every.n=1L, obj = NULL, feval = NULL, verbose = 1, print.every.n=1L,
early.stop.round = NULL, maximize = NULL, early.stop.round = NULL, maximize = NULL,

View File

@ -58,7 +58,6 @@
#' pred <- predict(bst, test$data) #' pred <- predict(bst, test$data)
#' #'
#' @export #' @export
#'
xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL, xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
params = list(), nrounds, params = list(), nrounds,
verbose = 1, print.every.n = 1L, early.stop.round = NULL, verbose = 1, print.every.n = 1L, early.stop.round = NULL,

View File

@ -1,10 +1,10 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgboost.R % Please edit documentation in R/xgboost.R
\docType{data} \docType{data}
\name{agaricus.test} \name{agaricus.test}
\alias{agaricus.test} \alias{agaricus.test}
\title{Test part from Mushroom Data Set} \title{Test part from Mushroom Data Set}
\format{A list containing a label vector, and a dgCMatrix object with 1611 \format{A list containing a label vector, and a dgCMatrix object with 1611
rows and 126 variables} rows and 126 variables}
\usage{ \usage{
data(agaricus.test) data(agaricus.test)
@ -24,8 +24,8 @@ This data set includes the following fields:
\references{ \references{
https://archive.ics.uci.edu/ml/datasets/Mushroom https://archive.ics.uci.edu/ml/datasets/Mushroom
Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
School of Information and Computer Science. School of Information and Computer Science.
} }
\keyword{datasets} \keyword{datasets}

View File

@ -1,10 +1,10 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgboost.R % Please edit documentation in R/xgboost.R
\docType{data} \docType{data}
\name{agaricus.train} \name{agaricus.train}
\alias{agaricus.train} \alias{agaricus.train}
\title{Training part from Mushroom Data Set} \title{Training part from Mushroom Data Set}
\format{A list containing a label vector, and a dgCMatrix object with 6513 \format{A list containing a label vector, and a dgCMatrix object with 6513
rows and 127 variables} rows and 127 variables}
\usage{ \usage{
data(agaricus.train) data(agaricus.train)
@ -24,8 +24,8 @@ This data set includes the following fields:
\references{ \references{
https://archive.ics.uci.edu/ml/datasets/Mushroom https://archive.ics.uci.edu/ml/datasets/Mushroom
Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
School of Information and Computer Science. School of Information and Computer Science.
} }
\keyword{datasets} \keyword{datasets}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/getinfo.xgb.DMatrix.R % Please edit documentation in R/getinfo.xgb.DMatrix.R
\docType{methods} \docType{methods}
\name{getinfo} \name{getinfo}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/nrow.xgb.DMatrix.R % Please edit documentation in R/nrow.xgb.DMatrix.R
\docType{methods} \docType{methods}
\name{nrow,xgb.DMatrix-method} \name{nrow,xgb.DMatrix-method}
@ -18,5 +18,6 @@ data(agaricus.train, package='xgboost')
train <- agaricus.train train <- agaricus.train
dtrain <- xgb.DMatrix(train$data, label=train$label) dtrain <- xgb.DMatrix(train$data, label=train$label)
stopifnot(nrow(dtrain) == nrow(train$data)) stopifnot(nrow(dtrain) == nrow(train$data))
} }

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict.xgb.Booster.R % Please edit documentation in R/predict.xgb.Booster.R
\docType{methods} \docType{methods}
\name{predict,xgb.Booster-method} \name{predict,xgb.Booster-method}
@ -11,19 +11,19 @@
\arguments{ \arguments{
\item{object}{Object of class "xgb.Boost"} \item{object}{Object of class "xgb.Boost"}
\item{newdata}{takes \code{matrix}, \code{dgCMatrix}, local data file or \item{newdata}{takes \code{matrix}, \code{dgCMatrix}, local data file or
\code{xgb.DMatrix}.} \code{xgb.DMatrix}.}
\item{missing}{Missing is only used when input is dense matrix, pick a float \item{missing}{Missing is only used when input is dense matrix, pick a float
value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.} value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.}
\item{outputmargin}{whether the prediction should be shown in the original \item{outputmargin}{whether the prediction should be shown in the original
value of sum of functions, when outputmargin=TRUE, the prediction is value of sum of functions, when outputmargin=TRUE, the prediction is
untransformed margin value. In logistic regression, outputmargin=T will untransformed margin value. In logistic regression, outputmargin=T will
output value before logistic transformation.} output value before logistic transformation.}
\item{ntreelimit}{limit number of trees used in prediction, this parameter is \item{ntreelimit}{limit number of trees used in prediction, this parameter is
only valid for gbtree, but not for gblinear. set it to be value bigger only valid for gbtree, but not for gblinear. set it to be value bigger
than 0. It will use all trees by default.} than 0. It will use all trees by default.}
\item{predleaf}{whether predict leaf index instead. If set to TRUE, the output will be a matrix object.} \item{predleaf}{whether predict leaf index instead. If set to TRUE, the output will be a matrix object.}
@ -36,7 +36,7 @@ data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost') data(agaricus.test, package='xgboost')
train <- agaricus.train train <- agaricus.train
test <- agaricus.test test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
pred <- predict(bst, test$data) pred <- predict(bst, test$data)
} }

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict.xgb.Booster.handle.R % Please edit documentation in R/predict.xgb.Booster.handle.R
\docType{methods} \docType{methods}
\name{predict,xgb.Booster.handle-method} \name{predict,xgb.Booster.handle-method}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/setinfo.xgb.DMatrix.R % Please edit documentation in R/setinfo.xgb.DMatrix.R
\docType{methods} \docType{methods}
\name{setinfo} \name{setinfo}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/slice.xgb.DMatrix.R % Please edit documentation in R/slice.xgb.DMatrix.R
\docType{methods} \docType{methods}
\name{slice} \name{slice}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.DMatrix.R % Please edit documentation in R/xgb.DMatrix.R
\name{xgb.DMatrix} \name{xgb.DMatrix}
\alias{xgb.DMatrix} \alias{xgb.DMatrix}
@ -7,7 +7,7 @@
xgb.DMatrix(data, info = list(), missing = NA, ...) xgb.DMatrix(data, info = list(), missing = NA, ...)
} }
\arguments{ \arguments{
\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character \item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character
indicating the data file.} indicating the data file.}
\item{info}{a list of information of the xgb.DMatrix object} \item{info}{a list of information of the xgb.DMatrix object}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.DMatrix.save.R % Please edit documentation in R/xgb.DMatrix.save.R
\name{xgb.DMatrix.save} \name{xgb.DMatrix.save}
\alias{xgb.DMatrix.save} \alias{xgb.DMatrix.save}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.cv.R % Please edit documentation in R/xgb.cv.R
\name{xgb.cv} \name{xgb.cv}
\alias{xgb.cv} \alias{xgb.cv}
@ -40,7 +40,7 @@ value that represents missing value. Sometime a data use 0 or other extreme valu
\item{showsd}{\code{boolean}, whether show standard deviation of cross validation} \item{showsd}{\code{boolean}, whether show standard deviation of cross validation}
\item{metrics,}{list of evaluation metrics to be used in corss validation, \item{metrics, }{list of evaluation metrics to be used in corss validation,
when it is not specified, the evaluation metric is chosen according to objective function. when it is not specified, the evaluation metric is chosen according to objective function.
Possible options are: Possible options are:
\itemize{ \itemize{
@ -51,11 +51,11 @@ value that represents missing value. Sometime a data use 0 or other extreme valu
\item \code{merror} Exact matching error, used to evaluate multi-class classification \item \code{merror} Exact matching error, used to evaluate multi-class classification
}} }}
\item{obj}{customized objective function. Returns gradient and second order \item{obj}{customized objective function. Returns gradient and second order
gradient with given prediction and dtrain.} gradient with given prediction and dtrain.}
\item{feval}{custimized evaluation function. Returns \item{feval}{custimized evaluation function. Returns
\code{list(metric='metric-name', value='metric-value')} with given \code{list(metric='metric-name', value='metric-value')} with given
prediction and dtrain.} prediction and dtrain.}
\item{stratified}{\code{boolean} whether sampling of folds should be stratified by the values of labels in \code{data}} \item{stratified}{\code{boolean} whether sampling of folds should be stratified by the values of labels in \code{data}}
@ -67,12 +67,12 @@ If folds are supplied, the nfold and stratified parameters would be ignored.}
\item{print.every.n}{Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.} \item{print.every.n}{Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.}
\item{early.stop.round}{If \code{NULL}, the early stopping function is not triggered. \item{early.stop.round}{If \code{NULL}, the early stopping function is not triggered.
If set to an integer \code{k}, training with a validation set will stop if the performance If set to an integer \code{k}, training with a validation set will stop if the performance
keeps getting worse consecutively for \code{k} rounds.} keeps getting worse consecutively for \code{k} rounds.}
\item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well. \item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
\code{maximize=TRUE} means the larger the evaluation score the better.} \code{maximize=TRUE} means the larger the evaluation score the better.}
\item{...}{other parameters to pass to \code{params}.} \item{...}{other parameters to pass to \code{params}.}
} }
@ -89,9 +89,9 @@ If \code{prediction = FALSE}, just a \code{data.table} with each mean and standa
The cross valudation function of xgboost The cross valudation function of xgboost
} }
\details{ \details{
The original sample is randomly partitioned into \code{nfold} equal size subsamples. The original sample is randomly partitioned into \code{nfold} equal size subsamples.
Of the \code{nfold} subsamples, a single subsample is retained as the validation data for testing the model, and the remaining \code{nfold - 1} subsamples are used as training data. Of the \code{nfold} subsamples, a single subsample is retained as the validation data for testing the model, and the remaining \code{nfold - 1} subsamples are used as training data.
The cross-validation process is then repeated \code{nrounds} times, with each of the \code{nfold} subsamples used exactly once as the validation data. The cross-validation process is then repeated \code{nrounds} times, with each of the \code{nfold} subsamples used exactly once as the validation data.

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.dump.R % Please edit documentation in R/xgb.dump.R
\name{xgb.dump} \name{xgb.dump}
\alias{xgb.dump} \alias{xgb.dump}
@ -11,17 +11,17 @@ xgb.dump(model = NULL, fname = NULL, fmap = "", with.stats = FALSE)
\item{fname}{the name of the text file where to save the model text dump. If not provided or set to \code{NULL} the function will return the model as a \code{character} vector.} \item{fname}{the name of the text file where to save the model text dump. If not provided or set to \code{NULL} the function will return the model as a \code{character} vector.}
\item{fmap}{feature map file representing the type of feature. \item{fmap}{feature map file representing the type of feature.
Detailed description could be found at Detailed description could be found at
\url{https://github.com/dmlc/xgboost/wiki/Binary-Classification#dump-model}. \url{https://github.com/dmlc/xgboost/wiki/Binary-Classification#dump-model}.
See demo/ for walkthrough example in R, and See demo/ for walkthrough example in R, and
\url{https://github.com/dmlc/xgboost/blob/master/demo/data/featmap.txt} \url{https://github.com/dmlc/xgboost/blob/master/demo/data/featmap.txt}
for example Format.} for example Format.}
\item{with.stats}{whether dump statistics of splits \item{with.stats}{whether dump statistics of splits
When this option is on, the model dump comes with two additional statistics: When this option is on, the model dump comes with two additional statistics:
gain is the approximate loss function gain we get in each split; gain is the approximate loss function gain we get in each split;
cover is the sum of second order gradient in each node.} cover is the sum of second order gradient in each node.}
} }
\value{ \value{
if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}. if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.
@ -34,7 +34,7 @@ data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost') data(agaricus.test, package='xgboost')
train <- agaricus.train train <- agaricus.train
test <- agaricus.test test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
# save the model in file 'xgb.model.dump' # save the model in file 'xgb.model.dump'
xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE) xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE)

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.importance.R % Please edit documentation in R/xgb.importance.R
\name{xgb.importance} \name{xgb.importance}
\alias{xgb.importance} \alias{xgb.importance}
@ -24,7 +24,7 @@ xgb.importance(feature_names = NULL, filename_dump = NULL, model = NULL,
A \code{data.table} of the features used in the model with their average gain (and their weight for boosted tree model) in the model. A \code{data.table} of the features used in the model with their average gain (and their weight for boosted tree model) in the model.
} }
\description{ \description{
Read a xgboost model text dump. Read a xgboost model text dump.
Can be tree or linear model (text dump of linear model are only supported in dev version of \code{Xgboost} for now). Can be tree or linear model (text dump of linear model are only supported in dev version of \code{Xgboost} for now).
} }
\details{ \details{
@ -32,7 +32,7 @@ This is the function to understand the model trained (and through your model, yo
Results are returned for both linear and tree models. Results are returned for both linear and tree models.
\code{data.table} is returned by the function. \code{data.table} is returned by the function.
There are 3 columns : There are 3 columns :
\itemize{ \itemize{
\item \code{Features} name of the features as provided in \code{feature_names} or already present in the model dump. \item \code{Features} name of the features as provided in \code{feature_names} or already present in the model dump.
@ -53,12 +53,12 @@ If you need to remember one thing only: until you want to leave us early, don't
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
# Both dataset are list with two items, a sparse matrix and labels # Both dataset are list with two items, a sparse matrix and labels
# (labels = outcome column which will be learned). # (labels = outcome column which will be learned).
# Each column of the sparse Matrix is a feature in one hot encoding format. # Each column of the sparse Matrix is a feature in one hot encoding format.
train <- agaricus.train train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
# train$data@Dimnames[[2]] represents the column names of the sparse matrix. # train$data@Dimnames[[2]] represents the column names of the sparse matrix.
@ -66,5 +66,6 @@ xgb.importance(train$data@Dimnames[[2]], model = bst)
# Same thing with co-occurence computation this time # Same thing with co-occurence computation this time
xgb.importance(train$data@Dimnames[[2]], model = bst, data = train$data, label = train$label) xgb.importance(train$data@Dimnames[[2]], model = bst, data = train$data, label = train$label)
} }

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.load.R % Please edit documentation in R/xgb.load.R
\name{xgb.load} \name{xgb.load}
\alias{xgb.load} \alias{xgb.load}
@ -17,7 +17,7 @@ data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost') data(agaricus.test, package='xgboost')
train <- agaricus.train train <- agaricus.train
test <- agaricus.test test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
xgb.save(bst, 'xgb.model') xgb.save(bst, 'xgb.model')
bst <- xgb.load('xgb.model') bst <- xgb.load('xgb.model')

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.model.dt.tree.R % Please edit documentation in R/xgb.model.dt.tree.R
\name{xgb.model.dt.tree} \name{xgb.model.dt.tree}
\alias{xgb.model.dt.tree} \alias{xgb.model.dt.tree}
@ -45,15 +45,16 @@ The content of the \code{data.table} is organised that way:
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
#Both dataset are list with two items, a sparse matrix and labels #Both dataset are list with two items, a sparse matrix and labels
#(labels = outcome column which will be learned). #(labels = outcome column which will be learned).
#Each column of the sparse Matrix is a feature in one hot encoding format. #Each column of the sparse Matrix is a feature in one hot encoding format.
train <- agaricus.train train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix. #agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], model = bst) xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], model = bst)
} }

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.plot.importance.R % Please edit documentation in R/xgb.plot.importance.R
\name{xgb.plot.importance} \name{xgb.plot.importance}
\alias{xgb.plot.importance} \alias{xgb.plot.importance}
@ -25,16 +25,17 @@ In particular you may want to override the title of the graph. To do so, add \co
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
#Both dataset are list with two items, a sparse matrix and labels #Both dataset are list with two items, a sparse matrix and labels
#(labels = outcome column which will be learned). #(labels = outcome column which will be learned).
#Each column of the sparse Matrix is a feature in one hot encoding format. #Each column of the sparse Matrix is a feature in one hot encoding format.
train <- agaricus.train train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#train$data@Dimnames[[2]] represents the column names of the sparse matrix. #train$data@Dimnames[[2]] represents the column names of the sparse matrix.
importance_matrix <- xgb.importance(train$data@Dimnames[[2]], model = bst) importance_matrix <- xgb.importance(train$data@Dimnames[[2]], model = bst)
xgb.plot.importance(importance_matrix) xgb.plot.importance(importance_matrix)
} }

View File

@ -1,11 +1,11 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.plot.tree.R % Please edit documentation in R/xgb.plot.tree.R
\name{xgb.plot.tree} \name{xgb.plot.tree}
\alias{xgb.plot.tree} \alias{xgb.plot.tree}
\title{Plot a boosted tree model} \title{Plot a boosted tree model}
\usage{ \usage{
xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL, xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,
n_first_tree = NULL, CSSstyle = NULL, width = NULL, height = NULL) n_first_tree = NULL, width = NULL, height = NULL)
} }
\arguments{ \arguments{
\item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.} \item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
@ -16,8 +16,6 @@ xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,
\item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.} \item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.}
\item{CSSstyle}{a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.}
\item{width}{the width of the diagram in pixels.} \item{width}{the width of the diagram in pixels.}
\item{height}{the height of the diagram in pixels.} \item{height}{the height of the diagram in pixels.}
@ -26,7 +24,7 @@ xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,
A \code{DiagrammeR} of the model. A \code{DiagrammeR} of the model.
} }
\description{ \description{
Read a tree model text dump. Read a tree model text dump.
Plotting only works for boosted tree model (not linear model). Plotting only works for boosted tree model (not linear model).
} }
\details{ \details{
@ -36,23 +34,24 @@ The content of each node is organised that way:
\item \code{feature} value ; \item \code{feature} value ;
\item \code{cover}: the sum of second order gradient of training data classified to the leaf, if it is square loss, this simply corresponds to the number of instances in that branch. Deeper in the tree a node is, lower this metric will be ; \item \code{cover}: the sum of second order gradient of training data classified to the leaf, if it is square loss, this simply corresponds to the number of instances in that branch. Deeper in the tree a node is, lower this metric will be ;
\item \code{gain}: metric the importance of the node in the model. \item \code{gain}: metric the importance of the node in the model.
} }
Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated. Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated.
It uses \href{https://github.com/knsv/mermaid/}{Mermaid} library for that purpose. It uses \href{http://www.graphviz.org/}{GraphViz} library for that purpose.
} }
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
#Both dataset are list with two items, a sparse matrix and labels #Both dataset are list with two items, a sparse matrix and labels
#(labels = outcome column which will be learned). #(labels = outcome column which will be learned).
#Each column of the sparse Matrix is a feature in one hot encoding format. #Each column of the sparse Matrix is a feature in one hot encoding format.
train <- agaricus.train train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix. #agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
xgb.plot.tree(agaricus.train$data@Dimnames[[2]], model = bst) xgb.plot.tree(agaricus.train$data@Dimnames[[2]], model = bst)
} }

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.save.R % Please edit documentation in R/xgb.save.R
\name{xgb.save} \name{xgb.save}
\alias{xgb.save} \alias{xgb.save}
@ -19,7 +19,7 @@ data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost') data(agaricus.test, package='xgboost')
train <- agaricus.train train <- agaricus.train
test <- agaricus.test test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
xgb.save(bst, 'xgb.model') xgb.save(bst, 'xgb.model')
bst <- xgb.load('xgb.model') bst <- xgb.load('xgb.model')

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.save.raw.R % Please edit documentation in R/xgb.save.raw.R
\name{xgb.save.raw} \name{xgb.save.raw}
\alias{xgb.save.raw} \alias{xgb.save.raw}
@ -18,7 +18,7 @@ data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost') data(agaricus.test, package='xgboost')
train <- agaricus.train train <- agaricus.train
test <- agaricus.test test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
raw <- xgb.save.raw(bst) raw <- xgb.save.raw(bst)
bst <- xgb.load(raw) bst <- xgb.load(raw)

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.train.R % Please edit documentation in R/xgb.train.R
\name{xgb.train} \name{xgb.train}
\alias{xgb.train} \alias{xgb.train}
@ -10,7 +10,7 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
save_name = "xgboost.model", ...) save_name = "xgboost.model", ...)
} }
\arguments{ \arguments{
\item{params}{the list of parameters. \item{params}{the list of parameters.
1. General Parameters 1. General Parameters
@ -18,30 +18,30 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
\item \code{booster} which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree} \item \code{booster} which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree}
\item \code{silent} 0 means printing running messages, 1 means silent mode. Default: 0 \item \code{silent} 0 means printing running messages, 1 means silent mode. Default: 0
} }
2. Booster Parameters 2. Booster Parameters
2.1. Parameter for Tree Booster 2.1. Parameter for Tree Booster
\itemize{ \itemize{
\item \code{eta} control the learning rate: scale the contribution of each tree by a factor of \code{0 < eta < 1} when it is added to the current approximation. Used to prevent overfitting by making the boosting process more conservative. Lower value for \code{eta} implies larger value for \code{nrounds}: low \code{eta} value means model more robust to overfitting but slower to compute. Default: 0.3 \item \code{eta} control the learning rate: scale the contribution of each tree by a factor of \code{0 < eta < 1} when it is added to the current approximation. Used to prevent overfitting by making the boosting process more conservative. Lower value for \code{eta} implies larger value for \code{nrounds}: low \code{eta} value means model more robust to overfitting but slower to compute. Default: 0.3
\item \code{gamma} minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be. \item \code{gamma} minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be.
\item \code{max_depth} maximum depth of a tree. Default: 6 \item \code{max_depth} maximum depth of a tree. Default: 6
\item \code{min_child_weight} minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. Default: 1 \item \code{min_child_weight} minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. Default: 1
\item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nround}. Default: 1 \item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nround}. Default: 1
\item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1 \item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
\item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample < 1} and \code{round = 1}) accordingly. Default: 1 \item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample < 1} and \code{round = 1}) accordingly. Default: 1
} }
2.2. Parameter for Linear Booster 2.2. Parameter for Linear Booster
\itemize{ \itemize{
\item \code{lambda} L2 regularization term on weights. Default: 0 \item \code{lambda} L2 regularization term on weights. Default: 0
\item \code{lambda_bias} L2 regularization term on bias. Default: 0 \item \code{lambda_bias} L2 regularization term on bias. Default: 0
\item \code{alpha} L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0 \item \code{alpha} L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0
} }
3. Task Parameters 3. Task Parameters
\itemize{ \itemize{
\item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below: \item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below:
@ -51,7 +51,7 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
\item \code{binary:logistic} logistic regression for binary classification. Output probability. \item \code{binary:logistic} logistic regression for binary classification. Output probability.
\item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation. \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
\item \code{num_class} set the number of classes. To use only with multiclass objectives. \item \code{num_class} set the number of classes. To use only with multiclass objectives.
\item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{tonum_class}. \item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{num_class}.
\item \code{multi:softprob} same as softmax, but output a vector of ndata * nclass, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class. \item \code{multi:softprob} same as softmax, but output a vector of ndata * nclass, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class.
\item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss. \item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
} }
@ -64,25 +64,25 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
\item{nrounds}{the max number of iterations} \item{nrounds}{the max number of iterations}
\item{watchlist}{what information should be printed when \code{verbose=1} or \item{watchlist}{what information should be printed when \code{verbose=1} or
\code{verbose=2}. Watchlist is used to specify validation set monitoring \code{verbose=2}. Watchlist is used to specify validation set monitoring
during training. For example user can specify during training. For example user can specify
watchlist=list(validation1=mat1, validation2=mat2) to watch watchlist=list(validation1=mat1, validation2=mat2) to watch
the performance of each round's model on mat1 and mat2} the performance of each round's model on mat1 and mat2}
\item{obj}{customized objective function. Returns gradient and second order \item{obj}{customized objective function. Returns gradient and second order
gradient with given prediction and dtrain,} gradient with given prediction and dtrain,}
\item{feval}{custimized evaluation function. Returns \item{feval}{custimized evaluation function. Returns
\code{list(metric='metric-name', value='metric-value')} with given \code{list(metric='metric-name', value='metric-value')} with given
prediction and dtrain,} prediction and dtrain,}
\item{verbose}{If 0, xgboost will stay silent. If 1, xgboost will print \item{verbose}{If 0, xgboost will stay silent. If 1, xgboost will print
information of performance. If 2, xgboost will print information of both} information of performance. If 2, xgboost will print information of both}
\item{print.every.n}{Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.} \item{print.every.n}{Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.}
\item{early.stop.round}{If \code{NULL}, the early stopping function is not triggered. \item{early.stop.round}{If \code{NULL}, the early stopping function is not triggered.
If set to an integer \code{k}, training with a validation set will stop if the performance If set to an integer \code{k}, training with a validation set will stop if the performance
keeps getting worse consecutively for \code{k} rounds.} keeps getting worse consecutively for \code{k} rounds.}
\item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well. \item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
@ -98,24 +98,25 @@ keeps getting worse consecutively for \code{k} rounds.}
An advanced interface for training xgboost model. Look at \code{\link{xgboost}} function for a simpler interface. An advanced interface for training xgboost model. Look at \code{\link{xgboost}} function for a simpler interface.
} }
\details{ \details{
This is the training function for \code{xgboost}. This is the training function for \code{xgboost}.
It supports advanced features such as \code{watchlist}, customized objective function (\code{feval}), It supports advanced features such as \code{watchlist}, customized objective function (\code{feval}),
therefore it is more flexible than \code{\link{xgboost}} function. therefore it is more flexible than \code{\link{xgboost}} function.
Parallelization is automatically enabled if \code{OpenMP} is present. Parallelization is automatically enabled if \code{OpenMP} is present.
Number of threads can also be manually specified via \code{nthread} parameter. Number of threads can also be manually specified via \code{nthread} parameter.
\code{eval_metric} parameter (not listed above) is set automatically by Xgboost but can be overriden by parameter. Below is provided the list of different metric optimized by Xgboost to help you to understand how it works inside or to use them with the \code{watchlist} parameter. \code{eval_metric} parameter (not listed above) is set automatically by Xgboost but can be overriden by parameter. Below is provided the list of different metric optimized by Xgboost to help you to understand how it works inside or to use them with the \code{watchlist} parameter.
\itemize{ \itemize{
\item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error} \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
\item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood} \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
\item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss}
\item \code{error} Binary classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances. \item \code{error} Binary classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.
\item \code{merror} Multiclass classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. \item \code{merror} Multiclass classification error rate. It is calculated as \code{(wrong cases) / (all cases)}.
\item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation. \item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
\item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{http://en.wikipedia.org/wiki/NDCG} \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{http://en.wikipedia.org/wiki/NDCG}
} }
Full list of parameters is available in the Wiki \url{https://github.com/dmlc/xgboost/wiki/Parameters}. Full list of parameters is available in the Wiki \url{https://github.com/dmlc/xgboost/wiki/Parameters}.
This function only accepts an \code{\link{xgb.DMatrix}} object as the input. This function only accepts an \code{\link{xgb.DMatrix}} object as the input.

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgboost.R % Please edit documentation in R/xgboost.R
\name{xgboost} \name{xgboost}
\alias{xgboost} \alias{xgboost}
@ -10,13 +10,13 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
save_name = "xgboost.model", ...) save_name = "xgboost.model", ...)
} }
\arguments{ \arguments{
\item{data}{takes \code{matrix}, \code{dgCMatrix}, local data file or \item{data}{takes \code{matrix}, \code{dgCMatrix}, local data file or
\code{xgb.DMatrix}.} \code{xgb.DMatrix}.}
\item{label}{the response variable. User should not set this field, \item{label}{the response variable. User should not set this field,
if data is local data file or \code{xgb.DMatrix}.} if data is local data file or \code{xgb.DMatrix}.}
\item{missing}{Missing is only used when input is dense matrix, pick a float \item{missing}{Missing is only used when input is dense matrix, pick a float
value that represents missing value. Sometimes a data use 0 or other extreme value to represents missing values.} value that represents missing value. Sometimes a data use 0 or other extreme value to represents missing values.}
\item{weight}{a vector indicating the weight for each row of the input.} \item{weight}{a vector indicating the weight for each row of the input.}
@ -34,21 +34,21 @@ Commonly used ones are:
\item \code{max.depth} maximum depth of the tree \item \code{max.depth} maximum depth of the tree
\item \code{nthread} number of thread used in training, if not set, all threads are used \item \code{nthread} number of thread used in training, if not set, all threads are used
} }
Look at \code{\link{xgb.train}} for a more complete list of parameters or \url{https://github.com/dmlc/xgboost/wiki/Parameters} for the full list. Look at \code{\link{xgb.train}} for a more complete list of parameters or \url{https://github.com/dmlc/xgboost/wiki/Parameters} for the full list.
See also \code{demo/} for walkthrough example in R.} See also \code{demo/} for walkthrough example in R.}
\item{nrounds}{the max number of iterations} \item{nrounds}{the max number of iterations}
\item{verbose}{If 0, xgboost will stay silent. If 1, xgboost will print \item{verbose}{If 0, xgboost will stay silent. If 1, xgboost will print
information of performance. If 2, xgboost will print information of both information of performance. If 2, xgboost will print information of both
performance and construction progress information} performance and construction progress information}
\item{print.every.n}{Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.} \item{print.every.n}{Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.}
\item{early.stop.round}{If \code{NULL}, the early stopping function is not triggered. \item{early.stop.round}{If \code{NULL}, the early stopping function is not triggered.
If set to an integer \code{k}, training with a validation set will stop if the performance If set to an integer \code{k}, training with a validation set will stop if the performance
keeps getting worse consecutively for \code{k} rounds.} keeps getting worse consecutively for \code{k} rounds.}
\item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well. \item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
@ -75,8 +75,9 @@ data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost') data(agaricus.test, package='xgboost')
train <- agaricus.train train <- agaricus.train
test <- agaricus.test test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2, objective = "binary:logistic") eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
pred <- predict(bst, test$data) pred <- predict(bst, test$data)
} }