@@ -87,8 +87,8 @@ xgb.Booster.check <- function(bst, saveraw = TRUE) {
|
||||
#' @param ... Parameters passed to \code{predict.xgb.Booster}
|
||||
#'
|
||||
#' @details
|
||||
#' Note that \code{ntreelimit} is not necesserily equal to the number of boosting iterations
|
||||
#' and it is not necesserily equal to the number of trees in a model.
|
||||
#' Note that \code{ntreelimit} is not necessarily equal to the number of boosting iterations
|
||||
#' and it is not necessarily equal to the number of trees in a model.
|
||||
#' E.g., in a random forest-like model, \code{ntreelimit} would limit the number of trees.
|
||||
#' But for multiclass classification, there are multiple trees per iteration,
|
||||
#' but \code{ntreelimit} limits the number of boosting iterations.
|
||||
@@ -242,7 +242,7 @@ predict.xgb.Booster.handle <- function(object, ...) {
|
||||
#' (from R or any other interface).
|
||||
#' In contrast, any R-attribute assigned to an R-object of \code{xgb.Booster} class
|
||||
#' would not be saved by \code{xgb.save} because an xgboost model is an external memory object
|
||||
#' and its serialization is handled extrnally.
|
||||
#' and its serialization is handled externally.
|
||||
#' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
|
||||
#' change the value of that parameter for a model.
|
||||
#' Use \code{\link{xgb.parameters<-}} to set or change model parameters.
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
#' @param feature_names names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
|
||||
#' @param model generated by the \code{xgb.train} function.
|
||||
#' @param data the dataset used for the training step. Will be used with \code{label} parameter for co-occurence computation. More information in \code{Detail} part. This parameter is optional.
|
||||
#' @param label the label vetor used for the training step. Will be used with \code{data} parameter for co-occurence computation. More information in \code{Detail} part. This parameter is optional.
|
||||
#' @param label the label vector used for the training step. Will be used with \code{data} parameter for co-occurence computation. More information in \code{Detail} part. This parameter is optional.
|
||||
#' @param target a function which returns \code{TRUE} or \code{1} when an observation should be count as a co-occurence and \code{FALSE} or \code{0} otherwise. Default function is provided for computing co-occurences in a binary classification. The \code{target} function should have only one parameter. This parameter will be used to provide each important feature vector after having applied the split condition, therefore these vector will be only made of 0 and 1 only, whatever was the information before. More information in \code{Detail} part. This parameter is optional.
|
||||
#'
|
||||
#' @return A \code{data.table} of the features used in the model with their average gain (and their weight for boosted tree model) in the model.
|
||||
@@ -14,7 +14,7 @@
|
||||
#' This function is for both linear and tree models.
|
||||
#'
|
||||
#' \code{data.table} is returned by the function.
|
||||
#' The columns are :
|
||||
#' The columns are:
|
||||
#' \itemize{
|
||||
#' \item \code{Features} name of the features as provided in \code{feature_names} or already present in the model dump;
|
||||
#' \item \code{Gain} contribution of each feature to the model. For boosted tree model, each gain of each feature of each tree is taken into account, then average per feature to give a vision of the entire model. Highest percentage means important feature to predict the \code{label} used for the training (only available for tree models);
|
||||
@@ -33,7 +33,7 @@
|
||||
#'
|
||||
#' Co-occurence computation is here to help in understanding this relation between a predictor and a specific class. It will count how many observations are returned as \code{TRUE} by the \code{target} function (see parameters). When you execute the example below, there are 92 times only over the 3140 observations of the train dataset where a mushroom have no odor and can be eaten safely.
|
||||
#'
|
||||
#' If you need to remember one thing only: until you want to leave us early, don't eat a mushroom which has no odor :-)
|
||||
#' If you need to remember only one thing: unless you want to leave us early, don't eat a mushroom which has no odor :-)
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
@@ -94,7 +94,7 @@ xgb.importance <- function(feature_names = NULL, model = NULL, data = NULL, labe
|
||||
d <- data[, result[,Feature], drop=FALSE] < as.numeric(result[,Split])
|
||||
apply(c & d, 2, . %>% target %>% sum) -> vec
|
||||
|
||||
result <- result[, "RealCover" := as.numeric(vec), with = F][, "RealCover %" := RealCover / sum(label)][,MissingNo := NULL]
|
||||
result <- result[, "RealCover" := as.numeric(vec), with = F][, "RealCover %" := RealCover / sum(label)][, MissingNo := NULL]
|
||||
}
|
||||
}
|
||||
result
|
||||
|
||||
@@ -20,7 +20,7 @@ xgb.load <- function(modelfile) {
|
||||
stop("xgb.load: modelfile cannot be NULL")
|
||||
|
||||
handle <- xgb.Booster(modelfile = modelfile)
|
||||
# re-use modelfile if it is raw so we donot need to serialize
|
||||
# re-use modelfile if it is raw so we do not need to serialize
|
||||
if (typeof(modelfile) == "raw") {
|
||||
bst <- xgb.handleToBooster(handle, modelfile)
|
||||
} else {
|
||||
|
||||
@@ -86,7 +86,7 @@
|
||||
#' @param save_period when it is non-NULL, model is saved to disk after every \code{save_period} rounds,
|
||||
#' 0 means save at the end. The saving is handled by the \code{\link{cb.save.model}} callback.
|
||||
#' @param save_name the name or path for periodically saved model file.
|
||||
#' @param xgb_model a previously built model to continue the trainig from.
|
||||
#' @param xgb_model a previously built model to continue the training from.
|
||||
#' Could be either an object of class \code{xgb.Booster}, or its raw data, or the name of a
|
||||
#' file with a previously saved model.
|
||||
#' @param callbacks a list of callback functions to perform various task during boosting.
|
||||
|
||||
@@ -110,7 +110,7 @@ This parameter is passed to the \code{\link{cb.early.stop}} callback.}
|
||||
|
||||
\item{save_name}{the name or path for periodically saved model file.}
|
||||
|
||||
\item{xgb_model}{a previously built model to continue the trainig from.
|
||||
\item{xgb_model}{a previously built model to continue the training from.
|
||||
Could be either an object of class \code{xgb.Booster}, or its raw data, or the name of a
|
||||
file with a previously saved model.}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user