fixed typos in R package docs (#4345)
* fixed typos in R package docs * updated verbosity parameter in xgb.train docs
This commit is contained in:
@@ -18,7 +18,7 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
|
||||
...)
|
||||
}
|
||||
\arguments{
|
||||
\item{params}{the list of parameters.
|
||||
\item{params}{the list of parameters.
|
||||
The complete list of parameters is available at \url{http://xgboost.readthedocs.io/en/latest/parameter.html}.
|
||||
Below is a shorter summary:
|
||||
|
||||
@@ -27,31 +27,32 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
|
||||
\itemize{
|
||||
\item \code{booster} which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree}.
|
||||
}
|
||||
|
||||
|
||||
2. Booster Parameters
|
||||
|
||||
2.1. Parameter for Tree Booster
|
||||
|
||||
\itemize{
|
||||
\item \code{eta} control the learning rate: scale the contribution of each tree by a factor of \code{0 < eta < 1} when it is added to the current approximation. Used to prevent overfitting by making the boosting process more conservative. Lower value for \code{eta} implies larger value for \code{nrounds}: low \code{eta} value means model more robust to overfitting but slower to compute. Default: 0.3
|
||||
\item \code{gamma} minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be.
|
||||
\item \code{gamma} minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be.
|
||||
\item \code{max_depth} maximum depth of a tree. Default: 6
|
||||
\item \code{min_child_weight} minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. Default: 1
|
||||
\item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nrounds}. Default: 1
|
||||
\item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nrounds}. Default: 1
|
||||
\item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
|
||||
\item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample < 1} and \code{round = 1}) accordingly. Default: 1
|
||||
\item \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length equals to the number of features in the training data. \code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint.
|
||||
\item \code{interaction_constraints} A list of vectors specifying feature indices of permitted interactions. Each item of the list represents one permitted interaction where specified features are allowed to interact with each other. Feature index values should start from \code{0} (\code{0} references the first column). Leave argument unspecified for no interaction constraints.
|
||||
}
|
||||
|
||||
2.2. Parameter for Linear Booster
|
||||
|
||||
|
||||
\itemize{
|
||||
\item \code{lambda} L2 regularization term on weights. Default: 0
|
||||
\item \code{lambda_bias} L2 regularization term on bias. Default: 0
|
||||
\item \code{alpha} L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0
|
||||
}
|
||||
|
||||
3. Task Parameters
|
||||
3. Task Parameters
|
||||
|
||||
\itemize{
|
||||
\item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below:
|
||||
@@ -76,31 +77,31 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
|
||||
|
||||
\item{watchlist}{named list of xgb.DMatrix datasets to use for evaluating model performance.
|
||||
Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
|
||||
of these datasets during each boosting iteration, and stored in the end as a field named
|
||||
\code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
|
||||
of these datasets during each boosting iteration, and stored in the end as a field named
|
||||
\code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
|
||||
\code{\link{cb.print.evaluation}} callback is engaged, the performance results are continuously
|
||||
printed out during the training.
|
||||
printed out during the training.
|
||||
E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
|
||||
the performance of each round's model on mat1 and mat2.}
|
||||
|
||||
\item{obj}{customized objective function. Returns gradient and second order
|
||||
\item{obj}{customized objective function. Returns gradient and second order
|
||||
gradient with given prediction and dtrain.}
|
||||
|
||||
\item{feval}{custimized evaluation function. Returns
|
||||
\code{list(metric='metric-name', value='metric-value')} with given
|
||||
\item{feval}{customized evaluation function. Returns
|
||||
\code{list(metric='metric-name', value='metric-value')} with given
|
||||
prediction and dtrain.}
|
||||
|
||||
\item{verbose}{If 0, xgboost will stay silent. If 1, it will print information about performance.
|
||||
If 2, some additional information will be printed out.
|
||||
Note that setting \code{verbose > 0} automatically engages the
|
||||
Note that setting \code{verbose > 0} automatically engages the
|
||||
\code{cb.print.evaluation(period=1)} callback function.}
|
||||
|
||||
\item{print_every_n}{Print each n-th iteration evaluation messages when \code{verbose>0}.
|
||||
Default is 1 which means all messages are printed. This parameter is passed to the
|
||||
Default is 1 which means all messages are printed. This parameter is passed to the
|
||||
\code{\link{cb.print.evaluation}} callback.}
|
||||
|
||||
\item{early_stopping_rounds}{If \code{NULL}, the early stopping function is not triggered.
|
||||
If set to an integer \code{k}, training with a validation set will stop if the performance
|
||||
\item{early_stopping_rounds}{If \code{NULL}, the early stopping function is not triggered.
|
||||
If set to an integer \code{k}, training with a validation set will stop if the performance
|
||||
doesn't improve for \code{k} rounds.
|
||||
Setting this parameter engages the \code{\link{cb.early.stop}} callback.}
|
||||
|
||||
@@ -115,17 +116,17 @@ This parameter is passed to the \code{\link{cb.early.stop}} callback.}
|
||||
\item{save_name}{the name or path for periodically saved model file.}
|
||||
|
||||
\item{xgb_model}{a previously built model to continue the training from.
|
||||
Could be either an object of class \code{xgb.Booster}, or its raw data, or the name of a
|
||||
Could be either an object of class \code{xgb.Booster}, or its raw data, or the name of a
|
||||
file with a previously saved model.}
|
||||
|
||||
\item{callbacks}{a list of callback functions to perform various task during boosting.
|
||||
See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
|
||||
parameters' values. User can provide either existing or their own callback methods in order
|
||||
See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
|
||||
parameters' values. User can provide either existing or their own callback methods in order
|
||||
to customize the training process.}
|
||||
|
||||
\item{...}{other parameters to pass to \code{params}.}
|
||||
|
||||
\item{label}{vector of response values. Should not be provided when data is
|
||||
\item{label}{vector of response values. Should not be provided when data is
|
||||
a local data file name or an \code{xgb.DMatrix}.}
|
||||
|
||||
\item{missing}{by default is set to NA, which means that NA values should be considered as 'missing'
|
||||
@@ -140,23 +141,23 @@ An object of class \code{xgb.Booster} with the following elements:
|
||||
\item \code{handle} a handle (pointer) to the xgboost model in memory.
|
||||
\item \code{raw} a cached memory dump of the xgboost model saved as R's \code{raw} type.
|
||||
\item \code{niter} number of boosting iterations.
|
||||
\item \code{evaluation_log} evaluation history storead as a \code{data.table} with the
|
||||
\item \code{evaluation_log} evaluation history stored as a \code{data.table} with the
|
||||
first column corresponding to iteration number and the rest corresponding to evaluation
|
||||
metrics' values. It is created by the \code{\link{cb.evaluation.log}} callback.
|
||||
\item \code{call} a function call.
|
||||
\item \code{params} parameters that were passed to the xgboost library. Note that it does not
|
||||
\item \code{params} parameters that were passed to the xgboost library. Note that it does not
|
||||
capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
|
||||
\item \code{callbacks} callback functions that were either automatically assigned or
|
||||
explicitely passed.
|
||||
\item \code{callbacks} callback functions that were either automatically assigned or
|
||||
explicitly passed.
|
||||
\item \code{best_iteration} iteration number with the best evaluation metric value
|
||||
(only available with early stopping).
|
||||
\item \code{best_ntreelimit} the \code{ntreelimit} value corresponding to the best iteration,
|
||||
\item \code{best_ntreelimit} the \code{ntreelimit} value corresponding to the best iteration,
|
||||
which could further be used in \code{predict} method
|
||||
(only available with early stopping).
|
||||
\item \code{best_score} the best evaluation metric value during early stopping.
|
||||
(only available with early stopping).
|
||||
\item \code{feature_names} names of the training dataset features
|
||||
(only when comun names were defined in training data).
|
||||
(only when column names were defined in training data).
|
||||
\item \code{nfeatures} number of features in training data.
|
||||
}
|
||||
}
|
||||
@@ -165,20 +166,20 @@ An object of class \code{xgb.Booster} with the following elements:
|
||||
The \code{xgboost} function is a simpler wrapper for \code{xgb.train}.
|
||||
}
|
||||
\details{
|
||||
These are the training functions for \code{xgboost}.
|
||||
These are the training functions for \code{xgboost}.
|
||||
|
||||
The \code{xgb.train} interface supports advanced features such as \code{watchlist},
|
||||
customized objective and evaluation metric functions, therefore it is more flexible
|
||||
The \code{xgb.train} interface supports advanced features such as \code{watchlist},
|
||||
customized objective and evaluation metric functions, therefore it is more flexible
|
||||
than the \code{xgboost} interface.
|
||||
|
||||
Parallelization is automatically enabled if \code{OpenMP} is present.
|
||||
Parallelization is automatically enabled if \code{OpenMP} is present.
|
||||
Number of threads can also be manually specified via \code{nthread} parameter.
|
||||
|
||||
The evaluation metric is chosen automatically by Xgboost (according to the objective)
|
||||
when the \code{eval_metric} parameter is not provided.
|
||||
User may set one or several \code{eval_metric} parameters.
|
||||
User may set one or several \code{eval_metric} parameters.
|
||||
Note that when using a customized metric, only this single metric can be used.
|
||||
The folloiwing is the list of built-in metrics for which Xgboost provides optimized implementation:
|
||||
The following is the list of built-in metrics for which Xgboost provides optimized implementation:
|
||||
\itemize{
|
||||
\item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
|
||||
\item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
|
||||
@@ -210,7 +211,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
watchlist <- list(train = dtrain, eval = dtest)
|
||||
|
||||
## A simple xgb.train example:
|
||||
param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2,
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
||||
objective = "binary:logistic", eval_metric = "auc")
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||
|
||||
@@ -231,12 +232,12 @@ evalerror <- function(preds, dtrain) {
|
||||
|
||||
# These functions could be used by passing them either:
|
||||
# as 'objective' and 'eval_metric' parameters in the params list:
|
||||
param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2,
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
||||
objective = logregobj, eval_metric = evalerror)
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||
|
||||
# or through the ... arguments:
|
||||
param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2)
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||
objective = logregobj, eval_metric = evalerror)
|
||||
|
||||
@@ -246,7 +247,7 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||
|
||||
|
||||
## An xgb.train example of using variable learning rates at each iteration:
|
||||
param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2,
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
||||
objective = "binary:logistic", eval_metric = "auc")
|
||||
my_etas <- list(eta = c(0.5, 0.1))
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||
@@ -257,8 +258,8 @@ bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
|
||||
early_stopping_rounds = 3)
|
||||
|
||||
## An 'xgboost' interface example:
|
||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
|
||||
max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
|
||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
|
||||
max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
|
||||
objective = "binary:logistic")
|
||||
pred <- predict(bst, agaricus.test$data)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user