Merge pull request #605 from pommedeterresautee/master

Rewrite Viz function
This commit is contained in:
Michaël Benesty 2015-11-08 08:40:22 +01:00
commit f5659e17d5
38 changed files with 154 additions and 160 deletions

View File

@ -3,16 +3,16 @@ Type: Package
Title: Extreme Gradient Boosting
Version: 0.4-2
Date: 2015-08-01
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>, Michael Benesty <michael@benesty.fr>
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
Michael Benesty <michael@benesty.fr>
Maintainer: Tong He <hetong007@gmail.com>
Description: Extreme Gradient Boosting, which is an
efficient implementation of gradient boosting framework.
This package is its R interface. The package includes efficient
linear model solver and tree learning algorithms. The package can automatically
do parallel computation on a single machine which could be more than 10 times faster
than existing gradient boosting packages. It supports various
objective functions, including regression, classification and ranking. The
package is made to be extensible, so that users are also allowed to define
Description: Extreme Gradient Boosting, which is an efficient implementation
of gradient boosting framework. This package is its R interface. The package
includes efficient linear model solver and tree learning algorithms. The package
can automatically do parallel computation on a single machine which could be
more than 10 times faster than existing gradient boosting packages. It supports
various objective functions, including regression, classification and ranking.
The package is made to be extensible, so that users are also allowed to define
their own objectives easily.
License: Apache License (== 2.0) | file LICENSE
URL: https://github.com/dmlc/xgboost
@ -21,7 +21,7 @@ VignetteBuilder: knitr
Suggests:
knitr,
ggplot2 (>= 1.0.0),
DiagrammeR (>= 0.6),
DiagrammeR (>= 0.8.1),
Ckmeans.1d.dp (>= 3.3.1),
vcd (>= 1.3),
testthat
@ -30,6 +30,7 @@ Depends:
Imports:
Matrix (>= 1.1-0),
methods,
data.table (>= 1.9.4),
data.table (>= 1.9.6),
magrittr (>= 1.5),
stringr (>= 0.6.2)
RoxygenNote: 5.0.0

View File

@ -1,4 +1,4 @@
# Generated by roxygen2 (4.1.1): do not edit by hand
# Generated by roxygen2: do not edit by hand
export(getinfo)
export(setinfo)

View File

@ -23,7 +23,6 @@ setClass('xgb.DMatrix')
#' stopifnot(all(labels2 == 1-labels))
#' @rdname getinfo
#' @export
#'
getinfo <- function(object, ...){
UseMethod("getinfo")
}

View File

@ -29,7 +29,6 @@ setClass("xgb.Booster",
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' pred <- predict(bst, test$data)
#' @export
#'
setMethod("predict", signature = "xgb.Booster",
definition = function(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) {

View File

@ -21,7 +21,6 @@
#' stopifnot(all(labels2 == 1-labels))
#' @rdname setinfo
#' @export
#'
setinfo <- function(object, ...){
UseMethod("setinfo")
}

View File

@ -13,7 +13,6 @@ setClass('xgb.DMatrix')
#' dsub <- slice(dtrain, 1:3)
#' @rdname slice
#' @export
#'
slice <- function(object, ...){
UseMethod("slice")
}

View File

@ -17,7 +17,6 @@
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' @export
#'
xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
if (typeof(data) == "character") {
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE),

View File

@ -12,7 +12,6 @@
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' @export
#'
xgb.DMatrix.save <- function(DMatrix, fname) {
if (typeof(fname) != "character") {
stop("xgb.save: fname must be character")

View File

@ -90,7 +90,6 @@
#' max.depth =3, eta = 1, objective = "binary:logistic")
#' print(history)
#' @export
#'
xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NA,
prediction = FALSE, showsd = TRUE, metrics=list(),
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T, print.every.n=1L,

View File

@ -36,7 +36,6 @@
#' # print the model without saving it to a file
#' print(xgb.dump(bst))
#' @export
#'
xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with.stats=FALSE) {
if (class(model) != "xgb.Booster") {
stop("model: argument must be type xgb.Booster")

View File

@ -15,7 +15,6 @@
#' bst <- xgb.load('xgb.model')
#' pred <- predict(bst, test$data)
#' @export
#'
xgb.load <- function(modelfile) {
if (is.null(modelfile))
stop("xgb.load: modelfile cannot be NULL")

View File

@ -4,23 +4,13 @@
#' Plotting only works for boosted tree model (not linear model).
#'
#' @importFrom data.table data.table
#' @importFrom data.table set
#' @importFrom data.table rbindlist
#' @importFrom data.table :=
#' @importFrom data.table copy
#' @importFrom magrittr %>%
#' @importFrom magrittr not
#' @importFrom magrittr add
#' @importFrom stringr str_extract
#' @importFrom stringr str_split
#' @importFrom stringr str_extract
#' @importFrom stringr str_trim
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
#' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). Possible to provide a model directly (see \code{model} argument).
#' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file.
#' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.
#' @param CSSstyle a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.
#' @param width the width of the diagram in pixels.
#' @param width the width of the diagram in pixels.
#' @param height the height of the diagram in pixels.
#'
#' @return A \code{DiagrammeR} of the model.
@ -36,7 +26,7 @@
#' }
#'
#' Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated.
#' It uses \href{https://github.com/knsv/mermaid/}{Mermaid} library for that purpose.
#' It uses \href{http://www.graphviz.org/}{GraphViz} library for that purpose.
#'
#' @examples
#' data(agaricus.train, package='xgboost')
@ -53,12 +43,7 @@
#' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], model = bst)
#'
#' @export
#'
xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, CSSstyle = NULL, width = NULL, height = NULL){
if (!(class(CSSstyle) %in% c("character", "NULL") && length(CSSstyle) <= 1)) {
stop("style: Has to be a character vector of size 1.")
}
xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, width = NULL, height = NULL){
if (!class(model) %in% c("xgb.Booster", "NULL")) {
stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
@ -74,23 +59,38 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
allTrees <- xgb.model.dt.tree(feature_names = feature_names, model = model, n_first_tree = n_first_tree)
}
allTrees[Feature != "Leaf" ,yesPath := paste(ID,"(", Feature, "<br/>Cover: ", Cover, "<br/>Gain: ", Quality, ")-->|< ", Split, "|", Yes, ">", Yes.Feature, "]", sep = "")]
allTrees[, label:= paste0(Feature, "\nCover: ", Cover, "\nGain: ", Quality)]
allTrees[, shape:= "rectangle"][Feature == "Leaf", shape:= "oval"]
allTrees[, filledcolor:= "Beige"][Feature == "Leaf", filledcolor:= "Khaki"]
allTrees[Feature != "Leaf" ,noPath := paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")]
# rev is used to put the first tree on top.
nodes <- DiagrammeR::create_nodes(nodes = allTrees[,ID] %>% rev,
label = allTrees[,label] %>% rev,
style = "filled",
color = "DimGray",
fillcolor= allTrees[,filledcolor] %>% rev,
shape = allTrees[,shape] %>% rev,
data = allTrees[,Feature] %>% rev,
fontname = "Helvetica"
)
if(is.null(CSSstyle)){
CSSstyle <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px"
}
edges <- DiagrammeR::create_edges(from = allTrees[Feature != "Leaf", c(ID)] %>% rep(2),
to = allTrees[Feature != "Leaf", c(Yes, No)],
label = allTrees[Feature != "Leaf", paste("<",Split)] %>% c(rep("",nrow(allTrees[Feature != "Leaf"]))),
color = "DimGray",
arrowsize = "1.5",
arrowhead = "vee",
fontname = "Helvetica",
rel = "leading_to")
yes <- allTrees[Feature != "Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "")
graph <- DiagrammeR::create_graph(nodes_df = nodes,
edges_df = edges,
graph_attrs = "rankdir = LR")
no <- allTrees[Feature != "Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
path <- allTrees[Feature != "Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
DiagrammeR::mermaid(path, width, height)
DiagrammeR::render_graph(graph, width = width, height = height)
}
# Avoid error messages during CRAN check.
# The reason is that these variables are never declared
# They are mainly column names inferred by Data.table...
globalVariables(c("Feature", "yesPath", "ID", "Cover", "Quality", "Split", "Yes", "Yes.Feature", "noPath", "No", "No.Feature", "."))
globalVariables(c("Feature", "ID", "Cover", "Quality", "Split", "Yes", "No", ".", "shape", "filledcolor", "label"))

View File

@ -16,7 +16,6 @@
#' bst <- xgb.load('xgb.model')
#' pred <- predict(bst, test$data)
#' @export
#'
xgb.save <- function(model, fname) {
if (typeof(fname) != "character") {
stop("xgb.save: fname must be character")

View File

@ -16,7 +16,6 @@
#' bst <- xgb.load(raw)
#' pred <- predict(bst, test$data)
#' @export
#'
xgb.save.raw <- function(model) {
if (class(model) == "xgb.Booster"){
model <- model$handle

View File

@ -43,7 +43,7 @@
#' \item \code{binary:logistic} logistic regression for binary classification. Output probability.
#' \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
#' \item \code{num_class} set the number of classes. To use only with multiclass objectives.
#' \item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{tonum_class}.
#' \item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{num_class}.
#' \item \code{multi:softprob} same as softmax, but output a vector of ndata * nclass, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class.
#' \item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
#' }
@ -89,6 +89,7 @@
#' \itemize{
#' \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
#' \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
#' \item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss}
#' \item \code{error} Binary classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.
#' \item \code{merror} Multiclass classification error rate. It is calculated as \code{(wrong cases) / (all cases)}.
#' \item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
@ -119,7 +120,6 @@
#' param <- list(max.depth = 2, eta = 1, silent = 1, objective=logregobj,eval_metric=evalerror)
#' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist)
#' @export
#'
xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
obj = NULL, feval = NULL, verbose = 1, print.every.n=1L,
early.stop.round = NULL, maximize = NULL,

View File

@ -58,7 +58,6 @@
#' pred <- predict(bst, test$data)
#'
#' @export
#'
xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
params = list(), nrounds,
verbose = 1, print.every.n = 1L, early.stop.round = NULL,

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgboost.R
\docType{data}
\name{agaricus.test}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgboost.R
\docType{data}
\name{agaricus.train}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/getinfo.xgb.DMatrix.R
\docType{methods}
\name{getinfo}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/nrow.xgb.DMatrix.R
\docType{methods}
\name{nrow,xgb.DMatrix-method}
@ -18,5 +18,6 @@ data(agaricus.train, package='xgboost')
train <- agaricus.train
dtrain <- xgb.DMatrix(train$data, label=train$label)
stopifnot(nrow(dtrain) == nrow(train$data))
}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict.xgb.Booster.R
\docType{methods}
\name{predict,xgb.Booster-method}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict.xgb.Booster.handle.R
\docType{methods}
\name{predict,xgb.Booster.handle-method}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/setinfo.xgb.DMatrix.R
\docType{methods}
\name{setinfo}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/slice.xgb.DMatrix.R
\docType{methods}
\name{slice}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.DMatrix.R
\name{xgb.DMatrix}
\alias{xgb.DMatrix}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.DMatrix.save.R
\name{xgb.DMatrix.save}
\alias{xgb.DMatrix.save}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.cv.R
\name{xgb.cv}
\alias{xgb.cv}
@ -40,7 +40,7 @@ value that represents missing value. Sometime a data use 0 or other extreme valu
\item{showsd}{\code{boolean}, whether show standard deviation of cross validation}
\item{metrics,}{list of evaluation metrics to be used in corss validation,
\item{metrics, }{list of evaluation metrics to be used in corss validation,
when it is not specified, the evaluation metric is chosen according to objective function.
Possible options are:
\itemize{
@ -72,7 +72,7 @@ If set to an integer \code{k}, training with a validation set will stop if the p
keeps getting worse consecutively for \code{k} rounds.}
\item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
\code{maximize=TRUE} means the larger the evaluation score the better.}
\code{maximize=TRUE} means the larger the evaluation score the better.}
\item{...}{other parameters to pass to \code{params}.}
}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.dump.R
\name{xgb.dump}
\alias{xgb.dump}
@ -19,9 +19,9 @@ See demo/ for walkthrough example in R, and
for example Format.}
\item{with.stats}{whether dump statistics of splits
When this option is on, the model dump comes with two additional statistics:
gain is the approximate loss function gain we get in each split;
cover is the sum of second order gradient in each node.}
When this option is on, the model dump comes with two additional statistics:
gain is the approximate loss function gain we get in each split;
cover is the sum of second order gradient in each node.}
}
\value{
if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.importance.R
\name{xgb.importance}
\alias{xgb.importance}
@ -66,5 +66,6 @@ xgb.importance(train$data@Dimnames[[2]], model = bst)
# Same thing with co-occurence computation this time
xgb.importance(train$data@Dimnames[[2]], model = bst, data = train$data, label = train$label)
}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.load.R
\name{xgb.load}
\alias{xgb.load}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.model.dt.tree.R
\name{xgb.model.dt.tree}
\alias{xgb.model.dt.tree}
@ -55,5 +55,6 @@ bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], model = bst)
}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.plot.importance.R
\name{xgb.plot.importance}
\alias{xgb.plot.importance}
@ -36,5 +36,6 @@ bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#train$data@Dimnames[[2]] represents the column names of the sparse matrix.
importance_matrix <- xgb.importance(train$data@Dimnames[[2]], model = bst)
xgb.plot.importance(importance_matrix)
}

View File

@ -1,11 +1,11 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.plot.tree.R
\name{xgb.plot.tree}
\alias{xgb.plot.tree}
\title{Plot a boosted tree model}
\usage{
xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,
n_first_tree = NULL, CSSstyle = NULL, width = NULL, height = NULL)
n_first_tree = NULL, width = NULL, height = NULL)
}
\arguments{
\item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
@ -16,8 +16,6 @@ xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,
\item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.}
\item{CSSstyle}{a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.}
\item{width}{the width of the diagram in pixels.}
\item{height}{the height of the diagram in pixels.}
@ -39,7 +37,7 @@ The content of each node is organised that way:
}
Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated.
It uses \href{https://github.com/knsv/mermaid/}{Mermaid} library for that purpose.
It uses \href{http://www.graphviz.org/}{GraphViz} library for that purpose.
}
\examples{
data(agaricus.train, package='xgboost')
@ -54,5 +52,6 @@ bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
xgb.plot.tree(agaricus.train$data@Dimnames[[2]], model = bst)
}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.save.R
\name{xgb.save}
\alias{xgb.save}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.save.raw.R
\name{xgb.save.raw}
\alias{xgb.save.raw}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.train.R
\name{xgb.train}
\alias{xgb.train}
@ -51,7 +51,7 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
\item \code{binary:logistic} logistic regression for binary classification. Output probability.
\item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
\item \code{num_class} set the number of classes. To use only with multiclass objectives.
\item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{tonum_class}.
\item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{num_class}.
\item \code{multi:softprob} same as softmax, but output a vector of ndata * nclass, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class.
\item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
}
@ -64,10 +64,10 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
\item{nrounds}{the max number of iterations}
\item{watchlist}{what information should be printed when \code{verbose=1} or
\code{verbose=2}. Watchlist is used to specify validation set monitoring
during training. For example user can specify
watchlist=list(validation1=mat1, validation2=mat2) to watch
the performance of each round's model on mat1 and mat2}
\code{verbose=2}. Watchlist is used to specify validation set monitoring
during training. For example user can specify
watchlist=list(validation1=mat1, validation2=mat2) to watch
the performance of each round's model on mat1 and mat2}
\item{obj}{customized objective function. Returns gradient and second order
gradient with given prediction and dtrain,}
@ -110,6 +110,7 @@ Number of threads can also be manually specified via \code{nthread} parameter.
\itemize{
\item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
\item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
\item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss}
\item \code{error} Binary classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.
\item \code{merror} Multiclass classification error rate. It is calculated as \code{(wrong cases) / (all cases)}.
\item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgboost.R
\name{xgboost}
\alias{xgboost}
@ -78,5 +78,6 @@ test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
pred <- predict(bst, test$data)
}