Merge pull request #605 from pommedeterresautee/master
Rewrite Viz function
This commit is contained in:
commit
f5659e17d5
@ -3,16 +3,16 @@ Type: Package
|
|||||||
Title: Extreme Gradient Boosting
|
Title: Extreme Gradient Boosting
|
||||||
Version: 0.4-2
|
Version: 0.4-2
|
||||||
Date: 2015-08-01
|
Date: 2015-08-01
|
||||||
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>, Michael Benesty <michael@benesty.fr>
|
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
|
||||||
|
Michael Benesty <michael@benesty.fr>
|
||||||
Maintainer: Tong He <hetong007@gmail.com>
|
Maintainer: Tong He <hetong007@gmail.com>
|
||||||
Description: Extreme Gradient Boosting, which is an
|
Description: Extreme Gradient Boosting, which is an efficient implementation
|
||||||
efficient implementation of gradient boosting framework.
|
of gradient boosting framework. This package is its R interface. The package
|
||||||
This package is its R interface. The package includes efficient
|
includes efficient linear model solver and tree learning algorithms. The package
|
||||||
linear model solver and tree learning algorithms. The package can automatically
|
can automatically do parallel computation on a single machine which could be
|
||||||
do parallel computation on a single machine which could be more than 10 times faster
|
more than 10 times faster than existing gradient boosting packages. It supports
|
||||||
than existing gradient boosting packages. It supports various
|
various objective functions, including regression, classification and ranking.
|
||||||
objective functions, including regression, classification and ranking. The
|
The package is made to be extensible, so that users are also allowed to define
|
||||||
package is made to be extensible, so that users are also allowed to define
|
|
||||||
their own objectives easily.
|
their own objectives easily.
|
||||||
License: Apache License (== 2.0) | file LICENSE
|
License: Apache License (== 2.0) | file LICENSE
|
||||||
URL: https://github.com/dmlc/xgboost
|
URL: https://github.com/dmlc/xgboost
|
||||||
@ -21,7 +21,7 @@ VignetteBuilder: knitr
|
|||||||
Suggests:
|
Suggests:
|
||||||
knitr,
|
knitr,
|
||||||
ggplot2 (>= 1.0.0),
|
ggplot2 (>= 1.0.0),
|
||||||
DiagrammeR (>= 0.6),
|
DiagrammeR (>= 0.8.1),
|
||||||
Ckmeans.1d.dp (>= 3.3.1),
|
Ckmeans.1d.dp (>= 3.3.1),
|
||||||
vcd (>= 1.3),
|
vcd (>= 1.3),
|
||||||
testthat
|
testthat
|
||||||
@ -30,6 +30,7 @@ Depends:
|
|||||||
Imports:
|
Imports:
|
||||||
Matrix (>= 1.1-0),
|
Matrix (>= 1.1-0),
|
||||||
methods,
|
methods,
|
||||||
data.table (>= 1.9.4),
|
data.table (>= 1.9.6),
|
||||||
magrittr (>= 1.5),
|
magrittr (>= 1.5),
|
||||||
stringr (>= 0.6.2)
|
stringr (>= 0.6.2)
|
||||||
|
RoxygenNote: 5.0.0
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
# Generated by roxygen2 (4.1.1): do not edit by hand
|
# Generated by roxygen2: do not edit by hand
|
||||||
|
|
||||||
export(getinfo)
|
export(getinfo)
|
||||||
export(setinfo)
|
export(setinfo)
|
||||||
|
|||||||
@ -23,7 +23,6 @@ setClass('xgb.DMatrix')
|
|||||||
#' stopifnot(all(labels2 == 1-labels))
|
#' stopifnot(all(labels2 == 1-labels))
|
||||||
#' @rdname getinfo
|
#' @rdname getinfo
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
|
||||||
getinfo <- function(object, ...){
|
getinfo <- function(object, ...){
|
||||||
UseMethod("getinfo")
|
UseMethod("getinfo")
|
||||||
}
|
}
|
||||||
|
|||||||
@ -29,7 +29,6 @@ setClass("xgb.Booster",
|
|||||||
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
#' pred <- predict(bst, test$data)
|
#' pred <- predict(bst, test$data)
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
|
||||||
setMethod("predict", signature = "xgb.Booster",
|
setMethod("predict", signature = "xgb.Booster",
|
||||||
definition = function(object, newdata, missing = NA,
|
definition = function(object, newdata, missing = NA,
|
||||||
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) {
|
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) {
|
||||||
|
|||||||
@ -21,7 +21,6 @@
|
|||||||
#' stopifnot(all(labels2 == 1-labels))
|
#' stopifnot(all(labels2 == 1-labels))
|
||||||
#' @rdname setinfo
|
#' @rdname setinfo
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
|
||||||
setinfo <- function(object, ...){
|
setinfo <- function(object, ...){
|
||||||
UseMethod("setinfo")
|
UseMethod("setinfo")
|
||||||
}
|
}
|
||||||
|
|||||||
@ -13,7 +13,6 @@ setClass('xgb.DMatrix')
|
|||||||
#' dsub <- slice(dtrain, 1:3)
|
#' dsub <- slice(dtrain, 1:3)
|
||||||
#' @rdname slice
|
#' @rdname slice
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
|
||||||
slice <- function(object, ...){
|
slice <- function(object, ...){
|
||||||
UseMethod("slice")
|
UseMethod("slice")
|
||||||
}
|
}
|
||||||
|
|||||||
@ -17,7 +17,6 @@
|
|||||||
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
|
||||||
xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
|
xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
|
||||||
if (typeof(data) == "character") {
|
if (typeof(data) == "character") {
|
||||||
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE),
|
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE),
|
||||||
|
|||||||
@ -12,7 +12,6 @@
|
|||||||
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
|
||||||
xgb.DMatrix.save <- function(DMatrix, fname) {
|
xgb.DMatrix.save <- function(DMatrix, fname) {
|
||||||
if (typeof(fname) != "character") {
|
if (typeof(fname) != "character") {
|
||||||
stop("xgb.save: fname must be character")
|
stop("xgb.save: fname must be character")
|
||||||
|
|||||||
@ -90,7 +90,6 @@
|
|||||||
#' max.depth =3, eta = 1, objective = "binary:logistic")
|
#' max.depth =3, eta = 1, objective = "binary:logistic")
|
||||||
#' print(history)
|
#' print(history)
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
|
||||||
xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NA,
|
xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NA,
|
||||||
prediction = FALSE, showsd = TRUE, metrics=list(),
|
prediction = FALSE, showsd = TRUE, metrics=list(),
|
||||||
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T, print.every.n=1L,
|
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T, print.every.n=1L,
|
||||||
|
|||||||
@ -36,7 +36,6 @@
|
|||||||
#' # print the model without saving it to a file
|
#' # print the model without saving it to a file
|
||||||
#' print(xgb.dump(bst))
|
#' print(xgb.dump(bst))
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
|
||||||
xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with.stats=FALSE) {
|
xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with.stats=FALSE) {
|
||||||
if (class(model) != "xgb.Booster") {
|
if (class(model) != "xgb.Booster") {
|
||||||
stop("model: argument must be type xgb.Booster")
|
stop("model: argument must be type xgb.Booster")
|
||||||
|
|||||||
@ -15,7 +15,6 @@
|
|||||||
#' bst <- xgb.load('xgb.model')
|
#' bst <- xgb.load('xgb.model')
|
||||||
#' pred <- predict(bst, test$data)
|
#' pred <- predict(bst, test$data)
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
|
||||||
xgb.load <- function(modelfile) {
|
xgb.load <- function(modelfile) {
|
||||||
if (is.null(modelfile))
|
if (is.null(modelfile))
|
||||||
stop("xgb.load: modelfile cannot be NULL")
|
stop("xgb.load: modelfile cannot be NULL")
|
||||||
|
|||||||
@ -4,23 +4,13 @@
|
|||||||
#' Plotting only works for boosted tree model (not linear model).
|
#' Plotting only works for boosted tree model (not linear model).
|
||||||
#'
|
#'
|
||||||
#' @importFrom data.table data.table
|
#' @importFrom data.table data.table
|
||||||
#' @importFrom data.table set
|
|
||||||
#' @importFrom data.table rbindlist
|
|
||||||
#' @importFrom data.table :=
|
#' @importFrom data.table :=
|
||||||
#' @importFrom data.table copy
|
|
||||||
#' @importFrom magrittr %>%
|
#' @importFrom magrittr %>%
|
||||||
#' @importFrom magrittr not
|
|
||||||
#' @importFrom magrittr add
|
|
||||||
#' @importFrom stringr str_extract
|
|
||||||
#' @importFrom stringr str_split
|
|
||||||
#' @importFrom stringr str_extract
|
|
||||||
#' @importFrom stringr str_trim
|
|
||||||
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
|
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
|
||||||
#' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). Possible to provide a model directly (see \code{model} argument).
|
#' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). Possible to provide a model directly (see \code{model} argument).
|
||||||
#' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file.
|
#' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file.
|
||||||
#' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.
|
#' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.
|
||||||
#' @param CSSstyle a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.
|
#' @param width the width of the diagram in pixels.
|
||||||
#' @param width the width of the diagram in pixels.
|
|
||||||
#' @param height the height of the diagram in pixels.
|
#' @param height the height of the diagram in pixels.
|
||||||
#'
|
#'
|
||||||
#' @return A \code{DiagrammeR} of the model.
|
#' @return A \code{DiagrammeR} of the model.
|
||||||
@ -36,7 +26,7 @@
|
|||||||
#' }
|
#' }
|
||||||
#'
|
#'
|
||||||
#' Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated.
|
#' Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated.
|
||||||
#' It uses \href{https://github.com/knsv/mermaid/}{Mermaid} library for that purpose.
|
#' It uses \href{http://www.graphviz.org/}{GraphViz} library for that purpose.
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
@ -53,12 +43,7 @@
|
|||||||
#' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], model = bst)
|
#' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], model = bst)
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, width = NULL, height = NULL){
|
||||||
xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, CSSstyle = NULL, width = NULL, height = NULL){
|
|
||||||
|
|
||||||
if (!(class(CSSstyle) %in% c("character", "NULL") && length(CSSstyle) <= 1)) {
|
|
||||||
stop("style: Has to be a character vector of size 1.")
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!class(model) %in% c("xgb.Booster", "NULL")) {
|
if (!class(model) %in% c("xgb.Booster", "NULL")) {
|
||||||
stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
|
stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
|
||||||
@ -74,23 +59,38 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
|
|||||||
allTrees <- xgb.model.dt.tree(feature_names = feature_names, model = model, n_first_tree = n_first_tree)
|
allTrees <- xgb.model.dt.tree(feature_names = feature_names, model = model, n_first_tree = n_first_tree)
|
||||||
}
|
}
|
||||||
|
|
||||||
allTrees[Feature != "Leaf" ,yesPath := paste(ID,"(", Feature, "<br/>Cover: ", Cover, "<br/>Gain: ", Quality, ")-->|< ", Split, "|", Yes, ">", Yes.Feature, "]", sep = "")]
|
allTrees[, label:= paste0(Feature, "\nCover: ", Cover, "\nGain: ", Quality)]
|
||||||
|
allTrees[, shape:= "rectangle"][Feature == "Leaf", shape:= "oval"]
|
||||||
|
allTrees[, filledcolor:= "Beige"][Feature == "Leaf", filledcolor:= "Khaki"]
|
||||||
|
|
||||||
allTrees[Feature != "Leaf" ,noPath := paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")]
|
# rev is used to put the first tree on top.
|
||||||
|
nodes <- DiagrammeR::create_nodes(nodes = allTrees[,ID] %>% rev,
|
||||||
|
label = allTrees[,label] %>% rev,
|
||||||
|
style = "filled",
|
||||||
|
color = "DimGray",
|
||||||
|
fillcolor= allTrees[,filledcolor] %>% rev,
|
||||||
|
shape = allTrees[,shape] %>% rev,
|
||||||
|
data = allTrees[,Feature] %>% rev,
|
||||||
|
fontname = "Helvetica"
|
||||||
|
)
|
||||||
|
|
||||||
if(is.null(CSSstyle)){
|
edges <- DiagrammeR::create_edges(from = allTrees[Feature != "Leaf", c(ID)] %>% rep(2),
|
||||||
CSSstyle <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px"
|
to = allTrees[Feature != "Leaf", c(Yes, No)],
|
||||||
}
|
label = allTrees[Feature != "Leaf", paste("<",Split)] %>% c(rep("",nrow(allTrees[Feature != "Leaf"]))),
|
||||||
|
color = "DimGray",
|
||||||
|
arrowsize = "1.5",
|
||||||
|
arrowhead = "vee",
|
||||||
|
fontname = "Helvetica",
|
||||||
|
rel = "leading_to")
|
||||||
|
|
||||||
yes <- allTrees[Feature != "Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "")
|
graph <- DiagrammeR::create_graph(nodes_df = nodes,
|
||||||
|
edges_df = edges,
|
||||||
|
graph_attrs = "rankdir = LR")
|
||||||
|
|
||||||
no <- allTrees[Feature != "Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
|
DiagrammeR::render_graph(graph, width = width, height = height)
|
||||||
|
|
||||||
path <- allTrees[Feature != "Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
|
|
||||||
DiagrammeR::mermaid(path, width, height)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Avoid error messages during CRAN check.
|
# Avoid error messages during CRAN check.
|
||||||
# The reason is that these variables are never declared
|
# The reason is that these variables are never declared
|
||||||
# They are mainly column names inferred by Data.table...
|
# They are mainly column names inferred by Data.table...
|
||||||
globalVariables(c("Feature", "yesPath", "ID", "Cover", "Quality", "Split", "Yes", "Yes.Feature", "noPath", "No", "No.Feature", "."))
|
globalVariables(c("Feature", "ID", "Cover", "Quality", "Split", "Yes", "No", ".", "shape", "filledcolor", "label"))
|
||||||
|
|||||||
@ -16,7 +16,6 @@
|
|||||||
#' bst <- xgb.load('xgb.model')
|
#' bst <- xgb.load('xgb.model')
|
||||||
#' pred <- predict(bst, test$data)
|
#' pred <- predict(bst, test$data)
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
|
||||||
xgb.save <- function(model, fname) {
|
xgb.save <- function(model, fname) {
|
||||||
if (typeof(fname) != "character") {
|
if (typeof(fname) != "character") {
|
||||||
stop("xgb.save: fname must be character")
|
stop("xgb.save: fname must be character")
|
||||||
|
|||||||
@ -16,7 +16,6 @@
|
|||||||
#' bst <- xgb.load(raw)
|
#' bst <- xgb.load(raw)
|
||||||
#' pred <- predict(bst, test$data)
|
#' pred <- predict(bst, test$data)
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
|
||||||
xgb.save.raw <- function(model) {
|
xgb.save.raw <- function(model) {
|
||||||
if (class(model) == "xgb.Booster"){
|
if (class(model) == "xgb.Booster"){
|
||||||
model <- model$handle
|
model <- model$handle
|
||||||
|
|||||||
@ -43,7 +43,7 @@
|
|||||||
#' \item \code{binary:logistic} logistic regression for binary classification. Output probability.
|
#' \item \code{binary:logistic} logistic regression for binary classification. Output probability.
|
||||||
#' \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
|
#' \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
|
||||||
#' \item \code{num_class} set the number of classes. To use only with multiclass objectives.
|
#' \item \code{num_class} set the number of classes. To use only with multiclass objectives.
|
||||||
#' \item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{tonum_class}.
|
#' \item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{num_class}.
|
||||||
#' \item \code{multi:softprob} same as softmax, but output a vector of ndata * nclass, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class.
|
#' \item \code{multi:softprob} same as softmax, but output a vector of ndata * nclass, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class.
|
||||||
#' \item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
|
#' \item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
|
||||||
#' }
|
#' }
|
||||||
@ -89,6 +89,7 @@
|
|||||||
#' \itemize{
|
#' \itemize{
|
||||||
#' \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
|
#' \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
|
||||||
#' \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
|
#' \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
|
||||||
|
#' \item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss}
|
||||||
#' \item \code{error} Binary classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.
|
#' \item \code{error} Binary classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.
|
||||||
#' \item \code{merror} Multiclass classification error rate. It is calculated as \code{(wrong cases) / (all cases)}.
|
#' \item \code{merror} Multiclass classification error rate. It is calculated as \code{(wrong cases) / (all cases)}.
|
||||||
#' \item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
|
#' \item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
|
||||||
@ -119,7 +120,6 @@
|
|||||||
#' param <- list(max.depth = 2, eta = 1, silent = 1, objective=logregobj,eval_metric=evalerror)
|
#' param <- list(max.depth = 2, eta = 1, silent = 1, objective=logregobj,eval_metric=evalerror)
|
||||||
#' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist)
|
#' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist)
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
|
||||||
xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
||||||
obj = NULL, feval = NULL, verbose = 1, print.every.n=1L,
|
obj = NULL, feval = NULL, verbose = 1, print.every.n=1L,
|
||||||
early.stop.round = NULL, maximize = NULL,
|
early.stop.round = NULL, maximize = NULL,
|
||||||
|
|||||||
@ -58,7 +58,6 @@
|
|||||||
#' pred <- predict(bst, test$data)
|
#' pred <- predict(bst, test$data)
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
|
||||||
xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
|
xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
|
||||||
params = list(), nrounds,
|
params = list(), nrounds,
|
||||||
verbose = 1, print.every.n = 1L, early.stop.round = NULL,
|
verbose = 1, print.every.n = 1L, early.stop.round = NULL,
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/xgboost.R
|
% Please edit documentation in R/xgboost.R
|
||||||
\docType{data}
|
\docType{data}
|
||||||
\name{agaricus.test}
|
\name{agaricus.test}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/xgboost.R
|
% Please edit documentation in R/xgboost.R
|
||||||
\docType{data}
|
\docType{data}
|
||||||
\name{agaricus.train}
|
\name{agaricus.train}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/getinfo.xgb.DMatrix.R
|
% Please edit documentation in R/getinfo.xgb.DMatrix.R
|
||||||
\docType{methods}
|
\docType{methods}
|
||||||
\name{getinfo}
|
\name{getinfo}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/nrow.xgb.DMatrix.R
|
% Please edit documentation in R/nrow.xgb.DMatrix.R
|
||||||
\docType{methods}
|
\docType{methods}
|
||||||
\name{nrow,xgb.DMatrix-method}
|
\name{nrow,xgb.DMatrix-method}
|
||||||
@ -18,5 +18,6 @@ data(agaricus.train, package='xgboost')
|
|||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||||
stopifnot(nrow(dtrain) == nrow(train$data))
|
stopifnot(nrow(dtrain) == nrow(train$data))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/predict.xgb.Booster.R
|
% Please edit documentation in R/predict.xgb.Booster.R
|
||||||
\docType{methods}
|
\docType{methods}
|
||||||
\name{predict,xgb.Booster-method}
|
\name{predict,xgb.Booster-method}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/predict.xgb.Booster.handle.R
|
% Please edit documentation in R/predict.xgb.Booster.handle.R
|
||||||
\docType{methods}
|
\docType{methods}
|
||||||
\name{predict,xgb.Booster.handle-method}
|
\name{predict,xgb.Booster.handle-method}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/setinfo.xgb.DMatrix.R
|
% Please edit documentation in R/setinfo.xgb.DMatrix.R
|
||||||
\docType{methods}
|
\docType{methods}
|
||||||
\name{setinfo}
|
\name{setinfo}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/slice.xgb.DMatrix.R
|
% Please edit documentation in R/slice.xgb.DMatrix.R
|
||||||
\docType{methods}
|
\docType{methods}
|
||||||
\name{slice}
|
\name{slice}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/xgb.DMatrix.R
|
% Please edit documentation in R/xgb.DMatrix.R
|
||||||
\name{xgb.DMatrix}
|
\name{xgb.DMatrix}
|
||||||
\alias{xgb.DMatrix}
|
\alias{xgb.DMatrix}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/xgb.DMatrix.save.R
|
% Please edit documentation in R/xgb.DMatrix.save.R
|
||||||
\name{xgb.DMatrix.save}
|
\name{xgb.DMatrix.save}
|
||||||
\alias{xgb.DMatrix.save}
|
\alias{xgb.DMatrix.save}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/xgb.cv.R
|
% Please edit documentation in R/xgb.cv.R
|
||||||
\name{xgb.cv}
|
\name{xgb.cv}
|
||||||
\alias{xgb.cv}
|
\alias{xgb.cv}
|
||||||
@ -40,7 +40,7 @@ value that represents missing value. Sometime a data use 0 or other extreme valu
|
|||||||
|
|
||||||
\item{showsd}{\code{boolean}, whether show standard deviation of cross validation}
|
\item{showsd}{\code{boolean}, whether show standard deviation of cross validation}
|
||||||
|
|
||||||
\item{metrics,}{list of evaluation metrics to be used in corss validation,
|
\item{metrics, }{list of evaluation metrics to be used in corss validation,
|
||||||
when it is not specified, the evaluation metric is chosen according to objective function.
|
when it is not specified, the evaluation metric is chosen according to objective function.
|
||||||
Possible options are:
|
Possible options are:
|
||||||
\itemize{
|
\itemize{
|
||||||
@ -72,7 +72,7 @@ If set to an integer \code{k}, training with a validation set will stop if the p
|
|||||||
keeps getting worse consecutively for \code{k} rounds.}
|
keeps getting worse consecutively for \code{k} rounds.}
|
||||||
|
|
||||||
\item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
|
\item{maximize}{If \code{feval} and \code{early.stop.round} are set, then \code{maximize} must be set as well.
|
||||||
\code{maximize=TRUE} means the larger the evaluation score the better.}
|
\code{maximize=TRUE} means the larger the evaluation score the better.}
|
||||||
|
|
||||||
\item{...}{other parameters to pass to \code{params}.}
|
\item{...}{other parameters to pass to \code{params}.}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/xgb.dump.R
|
% Please edit documentation in R/xgb.dump.R
|
||||||
\name{xgb.dump}
|
\name{xgb.dump}
|
||||||
\alias{xgb.dump}
|
\alias{xgb.dump}
|
||||||
@ -19,9 +19,9 @@ See demo/ for walkthrough example in R, and
|
|||||||
for example Format.}
|
for example Format.}
|
||||||
|
|
||||||
\item{with.stats}{whether dump statistics of splits
|
\item{with.stats}{whether dump statistics of splits
|
||||||
When this option is on, the model dump comes with two additional statistics:
|
When this option is on, the model dump comes with two additional statistics:
|
||||||
gain is the approximate loss function gain we get in each split;
|
gain is the approximate loss function gain we get in each split;
|
||||||
cover is the sum of second order gradient in each node.}
|
cover is the sum of second order gradient in each node.}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.
|
if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/xgb.importance.R
|
% Please edit documentation in R/xgb.importance.R
|
||||||
\name{xgb.importance}
|
\name{xgb.importance}
|
||||||
\alias{xgb.importance}
|
\alias{xgb.importance}
|
||||||
@ -66,5 +66,6 @@ xgb.importance(train$data@Dimnames[[2]], model = bst)
|
|||||||
|
|
||||||
# Same thing with co-occurence computation this time
|
# Same thing with co-occurence computation this time
|
||||||
xgb.importance(train$data@Dimnames[[2]], model = bst, data = train$data, label = train$label)
|
xgb.importance(train$data@Dimnames[[2]], model = bst, data = train$data, label = train$label)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/xgb.load.R
|
% Please edit documentation in R/xgb.load.R
|
||||||
\name{xgb.load}
|
\name{xgb.load}
|
||||||
\alias{xgb.load}
|
\alias{xgb.load}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/xgb.model.dt.tree.R
|
% Please edit documentation in R/xgb.model.dt.tree.R
|
||||||
\name{xgb.model.dt.tree}
|
\name{xgb.model.dt.tree}
|
||||||
\alias{xgb.model.dt.tree}
|
\alias{xgb.model.dt.tree}
|
||||||
@ -55,5 +55,6 @@ bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
|||||||
|
|
||||||
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
||||||
xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], model = bst)
|
xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], model = bst)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/xgb.plot.importance.R
|
% Please edit documentation in R/xgb.plot.importance.R
|
||||||
\name{xgb.plot.importance}
|
\name{xgb.plot.importance}
|
||||||
\alias{xgb.plot.importance}
|
\alias{xgb.plot.importance}
|
||||||
@ -36,5 +36,6 @@ bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
|||||||
#train$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
#train$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
||||||
importance_matrix <- xgb.importance(train$data@Dimnames[[2]], model = bst)
|
importance_matrix <- xgb.importance(train$data@Dimnames[[2]], model = bst)
|
||||||
xgb.plot.importance(importance_matrix)
|
xgb.plot.importance(importance_matrix)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,11 +1,11 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/xgb.plot.tree.R
|
% Please edit documentation in R/xgb.plot.tree.R
|
||||||
\name{xgb.plot.tree}
|
\name{xgb.plot.tree}
|
||||||
\alias{xgb.plot.tree}
|
\alias{xgb.plot.tree}
|
||||||
\title{Plot a boosted tree model}
|
\title{Plot a boosted tree model}
|
||||||
\usage{
|
\usage{
|
||||||
xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,
|
xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,
|
||||||
n_first_tree = NULL, CSSstyle = NULL, width = NULL, height = NULL)
|
n_first_tree = NULL, width = NULL, height = NULL)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
|
\item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
|
||||||
@ -16,8 +16,6 @@ xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,
|
|||||||
|
|
||||||
\item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.}
|
\item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.}
|
||||||
|
|
||||||
\item{CSSstyle}{a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.}
|
|
||||||
|
|
||||||
\item{width}{the width of the diagram in pixels.}
|
\item{width}{the width of the diagram in pixels.}
|
||||||
|
|
||||||
\item{height}{the height of the diagram in pixels.}
|
\item{height}{the height of the diagram in pixels.}
|
||||||
@ -39,7 +37,7 @@ The content of each node is organised that way:
|
|||||||
}
|
}
|
||||||
|
|
||||||
Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated.
|
Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated.
|
||||||
It uses \href{https://github.com/knsv/mermaid/}{Mermaid} library for that purpose.
|
It uses \href{http://www.graphviz.org/}{GraphViz} library for that purpose.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
@ -54,5 +52,6 @@ bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
|||||||
|
|
||||||
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
||||||
xgb.plot.tree(agaricus.train$data@Dimnames[[2]], model = bst)
|
xgb.plot.tree(agaricus.train$data@Dimnames[[2]], model = bst)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/xgb.save.R
|
% Please edit documentation in R/xgb.save.R
|
||||||
\name{xgb.save}
|
\name{xgb.save}
|
||||||
\alias{xgb.save}
|
\alias{xgb.save}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/xgb.save.raw.R
|
% Please edit documentation in R/xgb.save.raw.R
|
||||||
\name{xgb.save.raw}
|
\name{xgb.save.raw}
|
||||||
\alias{xgb.save.raw}
|
\alias{xgb.save.raw}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/xgb.train.R
|
% Please edit documentation in R/xgb.train.R
|
||||||
\name{xgb.train}
|
\name{xgb.train}
|
||||||
\alias{xgb.train}
|
\alias{xgb.train}
|
||||||
@ -51,7 +51,7 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
|
|||||||
\item \code{binary:logistic} logistic regression for binary classification. Output probability.
|
\item \code{binary:logistic} logistic regression for binary classification. Output probability.
|
||||||
\item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
|
\item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
|
||||||
\item \code{num_class} set the number of classes. To use only with multiclass objectives.
|
\item \code{num_class} set the number of classes. To use only with multiclass objectives.
|
||||||
\item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{tonum_class}.
|
\item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{num_class}.
|
||||||
\item \code{multi:softprob} same as softmax, but output a vector of ndata * nclass, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class.
|
\item \code{multi:softprob} same as softmax, but output a vector of ndata * nclass, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class.
|
||||||
\item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
|
\item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
|
||||||
}
|
}
|
||||||
@ -64,10 +64,10 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
|
|||||||
\item{nrounds}{the max number of iterations}
|
\item{nrounds}{the max number of iterations}
|
||||||
|
|
||||||
\item{watchlist}{what information should be printed when \code{verbose=1} or
|
\item{watchlist}{what information should be printed when \code{verbose=1} or
|
||||||
\code{verbose=2}. Watchlist is used to specify validation set monitoring
|
\code{verbose=2}. Watchlist is used to specify validation set monitoring
|
||||||
during training. For example user can specify
|
during training. For example user can specify
|
||||||
watchlist=list(validation1=mat1, validation2=mat2) to watch
|
watchlist=list(validation1=mat1, validation2=mat2) to watch
|
||||||
the performance of each round's model on mat1 and mat2}
|
the performance of each round's model on mat1 and mat2}
|
||||||
|
|
||||||
\item{obj}{customized objective function. Returns gradient and second order
|
\item{obj}{customized objective function. Returns gradient and second order
|
||||||
gradient with given prediction and dtrain,}
|
gradient with given prediction and dtrain,}
|
||||||
@ -110,6 +110,7 @@ Number of threads can also be manually specified via \code{nthread} parameter.
|
|||||||
\itemize{
|
\itemize{
|
||||||
\item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
|
\item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
|
||||||
\item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
|
\item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
|
||||||
|
\item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss}
|
||||||
\item \code{error} Binary classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.
|
\item \code{error} Binary classification error rate. It is calculated as \code{(wrong cases) / (all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.
|
||||||
\item \code{merror} Multiclass classification error rate. It is calculated as \code{(wrong cases) / (all cases)}.
|
\item \code{merror} Multiclass classification error rate. It is calculated as \code{(wrong cases) / (all cases)}.
|
||||||
\item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
|
\item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
% Generated by roxygen2 (4.1.1): do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/xgboost.R
|
% Please edit documentation in R/xgboost.R
|
||||||
\name{xgboost}
|
\name{xgboost}
|
||||||
\alias{xgboost}
|
\alias{xgboost}
|
||||||
@ -78,5 +78,6 @@ test <- agaricus.test
|
|||||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
|
eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
|
||||||
pred <- predict(bst, test$data)
|
pred <- predict(bst, test$data)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user