Merge pull request #605 from pommedeterresautee/master

Rewrite Viz function
This commit is contained in:
Michaël Benesty 2015-11-08 08:40:22 +01:00
commit f5659e17d5
38 changed files with 154 additions and 160 deletions

View File

@ -3,16 +3,16 @@ Type: Package
Title: Extreme Gradient Boosting Title: Extreme Gradient Boosting
Version: 0.4-2 Version: 0.4-2
Date: 2015-08-01 Date: 2015-08-01
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>, Michael Benesty <michael@benesty.fr> Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
Michael Benesty <michael@benesty.fr>
Maintainer: Tong He <hetong007@gmail.com> Maintainer: Tong He <hetong007@gmail.com>
Description: Extreme Gradient Boosting, which is an Description: Extreme Gradient Boosting, which is an efficient implementation
efficient implementation of gradient boosting framework. of gradient boosting framework. This package is its R interface. The package
This package is its R interface. The package includes efficient includes efficient linear model solver and tree learning algorithms. The package
linear model solver and tree learning algorithms. The package can automatically can automatically do parallel computation on a single machine which could be
do parallel computation on a single machine which could be more than 10 times faster more than 10 times faster than existing gradient boosting packages. It supports
than existing gradient boosting packages. It supports various various objective functions, including regression, classification and ranking.
objective functions, including regression, classification and ranking. The The package is made to be extensible, so that users are also allowed to define
package is made to be extensible, so that users are also allowed to define
their own objectives easily. their own objectives easily.
License: Apache License (== 2.0) | file LICENSE License: Apache License (== 2.0) | file LICENSE
URL: https://github.com/dmlc/xgboost URL: https://github.com/dmlc/xgboost
@ -21,7 +21,7 @@ VignetteBuilder: knitr
Suggests: Suggests:
knitr, knitr,
ggplot2 (>= 1.0.0), ggplot2 (>= 1.0.0),
DiagrammeR (>= 0.6), DiagrammeR (>= 0.8.1),
Ckmeans.1d.dp (>= 3.3.1), Ckmeans.1d.dp (>= 3.3.1),
vcd (>= 1.3), vcd (>= 1.3),
testthat testthat
@ -30,6 +30,7 @@ Depends:
Imports: Imports:
Matrix (>= 1.1-0), Matrix (>= 1.1-0),
methods, methods,
data.table (>= 1.9.4), data.table (>= 1.9.6),
magrittr (>= 1.5), magrittr (>= 1.5),
stringr (>= 0.6.2) stringr (>= 0.6.2)
RoxygenNote: 5.0.0

View File

@ -1,4 +1,4 @@
# Generated by roxygen2 (4.1.1): do not edit by hand # Generated by roxygen2: do not edit by hand
export(getinfo) export(getinfo)
export(setinfo) export(setinfo)

View File

@ -23,7 +23,6 @@ setClass('xgb.DMatrix')
#' stopifnot(all(labels2 == 1-labels)) #' stopifnot(all(labels2 == 1-labels))
#' @rdname getinfo #' @rdname getinfo
#' @export #' @export
#'
getinfo <- function(object, ...){ getinfo <- function(object, ...){
UseMethod("getinfo") UseMethod("getinfo")
} }

View File

@ -29,7 +29,6 @@ setClass("xgb.Booster",
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' pred <- predict(bst, test$data) #' pred <- predict(bst, test$data)
#' @export #' @export
#'
setMethod("predict", signature = "xgb.Booster", setMethod("predict", signature = "xgb.Booster",
definition = function(object, newdata, missing = NA, definition = function(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) { outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) {

View File

@ -21,7 +21,6 @@
#' stopifnot(all(labels2 == 1-labels)) #' stopifnot(all(labels2 == 1-labels))
#' @rdname setinfo #' @rdname setinfo
#' @export #' @export
#'
setinfo <- function(object, ...){ setinfo <- function(object, ...){
UseMethod("setinfo") UseMethod("setinfo")
} }

View File

@ -13,7 +13,6 @@ setClass('xgb.DMatrix')
#' dsub <- slice(dtrain, 1:3) #' dsub <- slice(dtrain, 1:3)
#' @rdname slice #' @rdname slice
#' @export #' @export
#'
slice <- function(object, ...){ slice <- function(object, ...){
UseMethod("slice") UseMethod("slice")
} }

View File

@ -17,7 +17,6 @@
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data') #' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data') #' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' @export #' @export
#'
xgb.DMatrix <- function(data, info = list(), missing = NA, ...) { xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
if (typeof(data) == "character") { if (typeof(data) == "character") {
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE),

View File

@ -12,7 +12,6 @@
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data') #' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data') #' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' @export #' @export
#'
xgb.DMatrix.save <- function(DMatrix, fname) { xgb.DMatrix.save <- function(DMatrix, fname) {
if (typeof(fname) != "character") { if (typeof(fname) != "character") {
stop("xgb.save: fname must be character") stop("xgb.save: fname must be character")

View File

@ -90,7 +90,6 @@
#' max.depth =3, eta = 1, objective = "binary:logistic") #' max.depth =3, eta = 1, objective = "binary:logistic")
#' print(history) #' print(history)
#' @export #' @export
#'
xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NA, xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NA,
prediction = FALSE, showsd = TRUE, metrics=list(), prediction = FALSE, showsd = TRUE, metrics=list(),
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T, print.every.n=1L, obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T, print.every.n=1L,

View File

@ -36,7 +36,6 @@
#' # print the model without saving it to a file #' # print the model without saving it to a file
#' print(xgb.dump(bst)) #' print(xgb.dump(bst))
#' @export #' @export
#'
xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with.stats=FALSE) { xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with.stats=FALSE) {
if (class(model) != "xgb.Booster") { if (class(model) != "xgb.Booster") {
stop("model: argument must be type xgb.Booster") stop("model: argument must be type xgb.Booster")

View File

@ -15,7 +15,6 @@
#' bst <- xgb.load('xgb.model') #' bst <- xgb.load('xgb.model')
#' pred <- predict(bst, test$data) #' pred <- predict(bst, test$data)
#' @export #' @export
#'
xgb.load <- function(modelfile) { xgb.load <- function(modelfile) {
if (is.null(modelfile)) if (is.null(modelfile))
stop("xgb.load: modelfile cannot be NULL") stop("xgb.load: modelfile cannot be NULL")

View File

@ -4,22 +4,12 @@
#' Plotting only works for boosted tree model (not linear model). #' Plotting only works for boosted tree model (not linear model).
#' #'
#' @importFrom data.table data.table #' @importFrom data.table data.table
#' @importFrom data.table set
#' @importFrom data.table rbindlist
#' @importFrom data.table := #' @importFrom data.table :=
#' @importFrom data.table copy
#' @importFrom magrittr %>% #' @importFrom magrittr %>%
#' @importFrom magrittr not
#' @importFrom magrittr add
#' @importFrom stringr str_extract
#' @importFrom stringr str_split
#' @importFrom stringr str_extract
#' @importFrom stringr str_trim
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}. #' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
#' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). Possible to provide a model directly (see \code{model} argument). #' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). Possible to provide a model directly (see \code{model} argument).
#' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file. #' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file.
#' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models. #' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.
#' @param CSSstyle a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.
#' @param width the width of the diagram in pixels. #' @param width the width of the diagram in pixels.
#' @param height the height of the diagram in pixels. #' @param height the height of the diagram in pixels.
#' #'
@ -36,7 +26,7 @@
#' } #' }
#' #'
#' Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated. #' Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated.
#' It uses \href{https://github.com/knsv/mermaid/}{Mermaid} library for that purpose. #' It uses \href{http://www.graphviz.org/}{GraphViz} library for that purpose.
#' #'
#' @examples #' @examples
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
@ -53,12 +43,7 @@
#' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], model = bst) #' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], model = bst)
#' #'
#' @export #' @export
#' xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, width = NULL, height = NULL){
xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, CSSstyle = NULL, width = NULL, height = NULL){
if (!(class(CSSstyle) %in% c("character", "NULL") && length(CSSstyle) <= 1)) {
stop("style: Has to be a character vector of size 1.")
}
if (!class(model) %in% c("xgb.Booster", "NULL")) { if (!class(model) %in% c("xgb.Booster", "NULL")) {
stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.") stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
@ -74,23 +59,38 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
allTrees <- xgb.model.dt.tree(feature_names = feature_names, model = model, n_first_tree = n_first_tree) allTrees <- xgb.model.dt.tree(feature_names = feature_names, model = model, n_first_tree = n_first_tree)
} }
allTrees[Feature != "Leaf" ,yesPath := paste(ID,"(", Feature, "<br/>Cover: ", Cover, "<br/>Gain: ", Quality, ")-->|< ", Split, "|", Yes, ">", Yes.Feature, "]", sep = "")] allTrees[, label:= paste0(Feature, "\nCover: ", Cover, "\nGain: ", Quality)]
allTrees[, shape:= "rectangle"][Feature == "Leaf", shape:= "oval"]
allTrees[, filledcolor:= "Beige"][Feature == "Leaf", filledcolor:= "Khaki"]
allTrees[Feature != "Leaf" ,noPath := paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")] # rev is used to put the first tree on top.
nodes <- DiagrammeR::create_nodes(nodes = allTrees[,ID] %>% rev,
label = allTrees[,label] %>% rev,
style = "filled",
color = "DimGray",
fillcolor= allTrees[,filledcolor] %>% rev,
shape = allTrees[,shape] %>% rev,
data = allTrees[,Feature] %>% rev,
fontname = "Helvetica"
)
if(is.null(CSSstyle)){ edges <- DiagrammeR::create_edges(from = allTrees[Feature != "Leaf", c(ID)] %>% rep(2),
CSSstyle <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px" to = allTrees[Feature != "Leaf", c(Yes, No)],
} label = allTrees[Feature != "Leaf", paste("<",Split)] %>% c(rep("",nrow(allTrees[Feature != "Leaf"]))),
color = "DimGray",
arrowsize = "1.5",
arrowhead = "vee",
fontname = "Helvetica",
rel = "leading_to")
yes <- allTrees[Feature != "Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "") graph <- DiagrammeR::create_graph(nodes_df = nodes,
edges_df = edges,
graph_attrs = "rankdir = LR")
no <- allTrees[Feature != "Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "") DiagrammeR::render_graph(graph, width = width, height = height)
path <- allTrees[Feature != "Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
DiagrammeR::mermaid(path, width, height)
} }
# Avoid error messages during CRAN check. # Avoid error messages during CRAN check.
# The reason is that these variables are never declared # The reason is that these variables are never declared
# They are mainly column names inferred by Data.table... # They are mainly column names inferred by Data.table...
globalVariables(c("Feature", "yesPath", "ID", "Cover", "Quality", "Split", "Yes", "Yes.Feature", "noPath", "No", "No.Feature", ".")) globalVariables(c("Feature", "ID", "Cover", "Quality", "Split", "Yes", "No", ".", "shape", "filledcolor", "label"))

View File

@ -16,7 +16,6 @@
#' bst <- xgb.load('xgb.model') #' bst <- xgb.load('xgb.model')
#' pred <- predict(bst, test$data) #' pred <- predict(bst, test$data)
#' @export #' @export
#'
xgb.save <- function(model, fname) { xgb.save <- function(model, fname) {
if (typeof(fname) != "character") { if (typeof(fname) != "character") {
stop("xgb.save: fname must be character") stop("xgb.save: fname must be character")

View File

@ -16,7 +16,6 @@
#' bst <- xgb.load(raw) #' bst <- xgb.load(raw)
#' pred <- predict(bst, test$data) #' pred <- predict(bst, test$data)
#' @export #' @export
#'
xgb.save.raw <- function(model) { xgb.save.raw <- function(model) {
if (class(model) == "xgb.Booster"){ if (class(model) == "xgb.Booster"){
model <- model$handle model <- model$handle

View File

@ -43,7 +43,7 @@
#' \item \code{binary:logistic} logistic regression for binary classification. Output probability. #' \item \code{binary:logistic} logistic regression for binary classification. Output probability.
#' \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation. #' \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
#' \item \code{num_class} set the number of classes. To use only with multiclass objectives. #' \item \code{num_class} set the number of classes. To use only with multiclass objectives.
#' \item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{tonum_class}. #' \item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{num_class}.
#' \item \code{multi:softprob} same as softmax, but output a vector of ndata * nclass, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class. #' \item \code{multi:softprob} same as softmax, but output a vector of ndata * nclass, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class.
#' \item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss. #' \item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
#' } #' }
@ -89,6 +89,7 @@
#' \itemize{ #' \itemize{
#' \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error} #' \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
#' \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood} #' \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
#' \item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss}
#' \item \code{error} Binary classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances. #' \item \code{error} Binary classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.
#' \item \code{merror} Multiclass classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. #' \item \code{merror} Multiclass classification error rate. It is calculated as \code{(wrong cases) / (all cases)}.
#' \item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation. #' \item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
@ -119,7 +120,6 @@
#' param <- list(max.depth = 2, eta = 1, silent = 1, objective=logregobj,eval_metric=evalerror) #' param <- list(max.depth = 2, eta = 1, silent = 1, objective=logregobj,eval_metric=evalerror)
#' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist) #' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist)
#' @export #' @export
#'
xgb.train <- function(params=list(), data, nrounds, watchlist = list(), xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
obj = NULL, feval = NULL, verbose = 1, print.every.n=1L, obj = NULL, feval = NULL, verbose = 1, print.every.n=1L,
early.stop.round = NULL, maximize = NULL, early.stop.round = NULL, maximize = NULL,

View File

@ -58,7 +58,6 @@
#' pred <- predict(bst, test$data) #' pred <- predict(bst, test$data)
#' #'
#' @export #' @export
#'
xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL, xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
params = list(), nrounds, params = list(), nrounds,
verbose = 1, print.every.n = 1L, early.stop.round = NULL, verbose = 1, print.every.n = 1L, early.stop.round = NULL,

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgboost.R % Please edit documentation in R/xgboost.R
\docType{data} \docType{data}
\name{agaricus.test} \name{agaricus.test}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgboost.R % Please edit documentation in R/xgboost.R
\docType{data} \docType{data}
\name{agaricus.train} \name{agaricus.train}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/getinfo.xgb.DMatrix.R % Please edit documentation in R/getinfo.xgb.DMatrix.R
\docType{methods} \docType{methods}
\name{getinfo} \name{getinfo}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/nrow.xgb.DMatrix.R % Please edit documentation in R/nrow.xgb.DMatrix.R
\docType{methods} \docType{methods}
\name{nrow,xgb.DMatrix-method} \name{nrow,xgb.DMatrix-method}
@ -18,5 +18,6 @@ data(agaricus.train, package='xgboost')
train <- agaricus.train train <- agaricus.train
dtrain <- xgb.DMatrix(train$data, label=train$label) dtrain <- xgb.DMatrix(train$data, label=train$label)
stopifnot(nrow(dtrain) == nrow(train$data)) stopifnot(nrow(dtrain) == nrow(train$data))
} }

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict.xgb.Booster.R % Please edit documentation in R/predict.xgb.Booster.R
\docType{methods} \docType{methods}
\name{predict,xgb.Booster-method} \name{predict,xgb.Booster-method}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict.xgb.Booster.handle.R % Please edit documentation in R/predict.xgb.Booster.handle.R
\docType{methods} \docType{methods}
\name{predict,xgb.Booster.handle-method} \name{predict,xgb.Booster.handle-method}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/setinfo.xgb.DMatrix.R % Please edit documentation in R/setinfo.xgb.DMatrix.R
\docType{methods} \docType{methods}
\name{setinfo} \name{setinfo}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/slice.xgb.DMatrix.R % Please edit documentation in R/slice.xgb.DMatrix.R
\docType{methods} \docType{methods}
\name{slice} \name{slice}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.DMatrix.R % Please edit documentation in R/xgb.DMatrix.R
\name{xgb.DMatrix} \name{xgb.DMatrix}
\alias{xgb.DMatrix} \alias{xgb.DMatrix}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.DMatrix.save.R % Please edit documentation in R/xgb.DMatrix.save.R
\name{xgb.DMatrix.save} \name{xgb.DMatrix.save}
\alias{xgb.DMatrix.save} \alias{xgb.DMatrix.save}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.cv.R % Please edit documentation in R/xgb.cv.R
\name{xgb.cv} \name{xgb.cv}
\alias{xgb.cv} \alias{xgb.cv}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.dump.R % Please edit documentation in R/xgb.dump.R
\name{xgb.dump} \name{xgb.dump}
\alias{xgb.dump} \alias{xgb.dump}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.importance.R % Please edit documentation in R/xgb.importance.R
\name{xgb.importance} \name{xgb.importance}
\alias{xgb.importance} \alias{xgb.importance}
@ -66,5 +66,6 @@ xgb.importance(train$data@Dimnames[[2]], model = bst)
# Same thing with co-occurence computation this time # Same thing with co-occurence computation this time
xgb.importance(train$data@Dimnames[[2]], model = bst, data = train$data, label = train$label) xgb.importance(train$data@Dimnames[[2]], model = bst, data = train$data, label = train$label)
} }

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.load.R % Please edit documentation in R/xgb.load.R
\name{xgb.load} \name{xgb.load}
\alias{xgb.load} \alias{xgb.load}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.model.dt.tree.R % Please edit documentation in R/xgb.model.dt.tree.R
\name{xgb.model.dt.tree} \name{xgb.model.dt.tree}
\alias{xgb.model.dt.tree} \alias{xgb.model.dt.tree}
@ -55,5 +55,6 @@ bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix. #agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], model = bst) xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], model = bst)
} }

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.plot.importance.R % Please edit documentation in R/xgb.plot.importance.R
\name{xgb.plot.importance} \name{xgb.plot.importance}
\alias{xgb.plot.importance} \alias{xgb.plot.importance}
@ -36,5 +36,6 @@ bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#train$data@Dimnames[[2]] represents the column names of the sparse matrix. #train$data@Dimnames[[2]] represents the column names of the sparse matrix.
importance_matrix <- xgb.importance(train$data@Dimnames[[2]], model = bst) importance_matrix <- xgb.importance(train$data@Dimnames[[2]], model = bst)
xgb.plot.importance(importance_matrix) xgb.plot.importance(importance_matrix)
} }

View File

@ -1,11 +1,11 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.plot.tree.R % Please edit documentation in R/xgb.plot.tree.R
\name{xgb.plot.tree} \name{xgb.plot.tree}
\alias{xgb.plot.tree} \alias{xgb.plot.tree}
\title{Plot a boosted tree model} \title{Plot a boosted tree model}
\usage{ \usage{
xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL, xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,
n_first_tree = NULL, CSSstyle = NULL, width = NULL, height = NULL) n_first_tree = NULL, width = NULL, height = NULL)
} }
\arguments{ \arguments{
\item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.} \item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
@ -16,8 +16,6 @@ xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,
\item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.} \item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.}
\item{CSSstyle}{a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.}
\item{width}{the width of the diagram in pixels.} \item{width}{the width of the diagram in pixels.}
\item{height}{the height of the diagram in pixels.} \item{height}{the height of the diagram in pixels.}
@ -39,7 +37,7 @@ The content of each node is organised that way:
} }
Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated. Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated.
It uses \href{https://github.com/knsv/mermaid/}{Mermaid} library for that purpose. It uses \href{http://www.graphviz.org/}{GraphViz} library for that purpose.
} }
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
@ -54,5 +52,6 @@ bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix. #agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
xgb.plot.tree(agaricus.train$data@Dimnames[[2]], model = bst) xgb.plot.tree(agaricus.train$data@Dimnames[[2]], model = bst)
} }

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.save.R % Please edit documentation in R/xgb.save.R
\name{xgb.save} \name{xgb.save}
\alias{xgb.save} \alias{xgb.save}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.save.raw.R % Please edit documentation in R/xgb.save.raw.R
\name{xgb.save.raw} \name{xgb.save.raw}
\alias{xgb.save.raw} \alias{xgb.save.raw}

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.train.R % Please edit documentation in R/xgb.train.R
\name{xgb.train} \name{xgb.train}
\alias{xgb.train} \alias{xgb.train}
@ -51,7 +51,7 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
\item \code{binary:logistic} logistic regression for binary classification. Output probability. \item \code{binary:logistic} logistic regression for binary classification. Output probability.
\item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation. \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
\item \code{num_class} set the number of classes. To use only with multiclass objectives. \item \code{num_class} set the number of classes. To use only with multiclass objectives.
\item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{tonum_class}. \item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{num_class}.
\item \code{multi:softprob} same as softmax, but output a vector of ndata * nclass, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class. \item \code{multi:softprob} same as softmax, but output a vector of ndata * nclass, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class.
\item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss. \item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
} }
@ -110,6 +110,7 @@ Number of threads can also be manually specified via \code{nthread} parameter.
\itemize{ \itemize{
\item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error} \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
\item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood} \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
\item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss}
\item \code{error} Binary classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances. \item \code{error} Binary classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.
\item \code{merror} Multiclass classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. \item \code{merror} Multiclass classification error rate. It is calculated as \code{(wrong cases) / (all cases)}.
\item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation. \item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.

View File

@ -1,4 +1,4 @@
% Generated by roxygen2 (4.1.1): do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgboost.R % Please edit documentation in R/xgboost.R
\name{xgboost} \name{xgboost}
\alias{xgboost} \alias{xgboost}
@ -78,5 +78,6 @@ test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2, objective = "binary:logistic") eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
pred <- predict(bst, test$data) pred <- predict(bst, test$data)
} }