[R] Re-generate Roxygen2 doc (#6915)
This commit is contained in:
parent
20f34d9776
commit
5472ef626c
@ -8,7 +8,7 @@ during its training.}
|
|||||||
cb.gblinear.history(sparse = FALSE)
|
cb.gblinear.history(sparse = FALSE)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{sparse}{when set to FALSE/TURE, a dense/sparse matrix is used to store the result.
|
\item{sparse}{when set to FALSE/TRUE, a dense/sparse matrix is used to store the result.
|
||||||
Sparse format is useful when one expects only a subset of coefficients to be non-zero,
|
Sparse format is useful when one expects only a subset of coefficients to be non-zero,
|
||||||
when using the "thrifty" feature selector with fairly small number of top features
|
when using the "thrifty" feature selector with fairly small number of top features
|
||||||
selected per iteration.}
|
selected per iteration.}
|
||||||
@ -36,7 +36,6 @@ Callback function expects the following values to be set in its calling frame:
|
|||||||
#
|
#
|
||||||
# In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
# In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
||||||
# without considering the 2nd order interactions:
|
# without considering the 2nd order interactions:
|
||||||
require(magrittr)
|
|
||||||
x <- model.matrix(Species ~ .^2, iris)[,-1]
|
x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||||
colnames(x)
|
colnames(x)
|
||||||
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"))
|
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"))
|
||||||
@ -57,7 +56,7 @@ matplot(coef_path, type = 'l')
|
|||||||
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 0.8,
|
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 0.8,
|
||||||
updater = 'coord_descent', feature_selector = 'thrifty', top_k = 1,
|
updater = 'coord_descent', feature_selector = 'thrifty', top_k = 1,
|
||||||
callbacks = list(cb.gblinear.history()))
|
callbacks = list(cb.gblinear.history()))
|
||||||
xgb.gblinear.history(bst) \%>\% matplot(type = 'l')
|
matplot(xgb.gblinear.history(bst), type = 'l')
|
||||||
# Componentwise boosting is known to have similar effect to Lasso regularization.
|
# Componentwise boosting is known to have similar effect to Lasso regularization.
|
||||||
# Try experimenting with various values of top_k, eta, nrounds,
|
# Try experimenting with various values of top_k, eta, nrounds,
|
||||||
# as well as different feature_selectors.
|
# as well as different feature_selectors.
|
||||||
@ -66,7 +65,7 @@ xgb.gblinear.history(bst) \%>\% matplot(type = 'l')
|
|||||||
bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
||||||
callbacks = list(cb.gblinear.history()))
|
callbacks = list(cb.gblinear.history()))
|
||||||
# coefficients in the CV fold #3
|
# coefficients in the CV fold #3
|
||||||
xgb.gblinear.history(bst)[[3]] \%>\% matplot(type = 'l')
|
matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
||||||
|
|
||||||
|
|
||||||
#### Multiclass classification:
|
#### Multiclass classification:
|
||||||
@ -79,15 +78,15 @@ param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
|||||||
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 70, eta = 0.5,
|
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 70, eta = 0.5,
|
||||||
callbacks = list(cb.gblinear.history()))
|
callbacks = list(cb.gblinear.history()))
|
||||||
# Will plot the coefficient paths separately for each class:
|
# Will plot the coefficient paths separately for each class:
|
||||||
xgb.gblinear.history(bst, class_index = 0) \%>\% matplot(type = 'l')
|
matplot(xgb.gblinear.history(bst, class_index = 0), type = 'l')
|
||||||
xgb.gblinear.history(bst, class_index = 1) \%>\% matplot(type = 'l')
|
matplot(xgb.gblinear.history(bst, class_index = 1), type = 'l')
|
||||||
xgb.gblinear.history(bst, class_index = 2) \%>\% matplot(type = 'l')
|
matplot(xgb.gblinear.history(bst, class_index = 2), type = 'l')
|
||||||
|
|
||||||
# CV:
|
# CV:
|
||||||
bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 70, eta = 0.5,
|
bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 70, eta = 0.5,
|
||||||
callbacks = list(cb.gblinear.history(FALSE)))
|
callbacks = list(cb.gblinear.history(FALSE)))
|
||||||
# 1st forld of 1st class
|
# 1st fold of 1st class
|
||||||
xgb.gblinear.history(bst, class_index = 0)[[1]] \%>\% matplot(type = 'l')
|
matplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = 'l')
|
||||||
|
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
|
|||||||
@ -34,8 +34,7 @@ The \code{name} field can be one of the following:
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
train <- agaricus.train
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
|
||||||
|
|
||||||
labels <- getinfo(dtrain, 'label')
|
labels <- getinfo(dtrain, 'label')
|
||||||
setinfo(dtrain, 'label', 1-labels)
|
setinfo(dtrain, 'label', 1-labels)
|
||||||
|
|||||||
@ -19,8 +19,7 @@ Currently it displays dimensions and presence of info-fields and colnames.
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
train <- agaricus.train
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
|
||||||
|
|
||||||
dtrain
|
dtrain
|
||||||
print(dtrain, verbose=TRUE)
|
print(dtrain, verbose=TRUE)
|
||||||
|
|||||||
@ -33,8 +33,7 @@ The \code{name} field can be one of the following:
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
train <- agaricus.train
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
|
||||||
|
|
||||||
labels <- getinfo(dtrain, 'label')
|
labels <- getinfo(dtrain, 'label')
|
||||||
setinfo(dtrain, 'label', 1-labels)
|
setinfo(dtrain, 'label', 1-labels)
|
||||||
|
|||||||
@ -28,8 +28,7 @@ original xgb.DMatrix object
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
train <- agaricus.train
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
|
||||||
|
|
||||||
dsub <- slice(dtrain, 1:42)
|
dsub <- slice(dtrain, 1:42)
|
||||||
labels1 <- getinfo(dsub, 'label')
|
labels1 <- getinfo(dsub, 'label')
|
||||||
|
|||||||
@ -27,8 +27,7 @@ Supported input file formats are either a libsvm text file or a binary file that
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
train <- agaricus.train
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
|
||||||
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||||
|
|||||||
@ -16,8 +16,7 @@ Save xgb.DMatrix object to binary file
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
train <- agaricus.train
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
|
||||||
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||||
|
|||||||
@ -59,8 +59,8 @@ a rule on certain features."
|
|||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||||
dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
|
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
|
||||||
|
|
||||||
param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||||
nrounds = 4
|
nrounds = 4
|
||||||
|
|||||||
@ -160,7 +160,7 @@ Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||||
cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
|
cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
|
||||||
max_depth = 3, eta = 1, objective = "binary:logistic")
|
max_depth = 3, eta = 1, objective = "binary:logistic")
|
||||||
print(cv)
|
print(cv)
|
||||||
|
|||||||
@ -54,7 +54,7 @@ xgboost(
|
|||||||
|
|
||||||
2. Booster Parameters
|
2. Booster Parameters
|
||||||
|
|
||||||
2.1. Parameter for Tree Booster
|
2.1. Parameters for Tree Booster
|
||||||
|
|
||||||
\itemize{
|
\itemize{
|
||||||
\item \code{eta} control the learning rate: scale the contribution of each tree by a factor of \code{0 < eta < 1} when it is added to the current approximation. Used to prevent overfitting by making the boosting process more conservative. Lower value for \code{eta} implies larger value for \code{nrounds}: low \code{eta} value means model more robust to overfitting but slower to compute. Default: 0.3
|
\item \code{eta} control the learning rate: scale the contribution of each tree by a factor of \code{0 < eta < 1} when it is added to the current approximation. Used to prevent overfitting by making the boosting process more conservative. Lower value for \code{eta} implies larger value for \code{nrounds}: low \code{eta} value means model more robust to overfitting but slower to compute. Default: 0.3
|
||||||
@ -63,12 +63,14 @@ xgboost(
|
|||||||
\item \code{min_child_weight} minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. Default: 1
|
\item \code{min_child_weight} minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. Default: 1
|
||||||
\item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nrounds}. Default: 1
|
\item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nrounds}. Default: 1
|
||||||
\item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
|
\item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
|
||||||
|
\item \code{lambda} L2 regularization term on weights. Default: 1
|
||||||
|
\item \code{alpha} L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0
|
||||||
\item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample < 1} and \code{round = 1}) accordingly. Default: 1
|
\item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample < 1} and \code{round = 1}) accordingly. Default: 1
|
||||||
\item \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length equals to the number of features in the training data. \code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint.
|
\item \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length equals to the number of features in the training data. \code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint.
|
||||||
\item \code{interaction_constraints} A list of vectors specifying feature indices of permitted interactions. Each item of the list represents one permitted interaction where specified features are allowed to interact with each other. Feature index values should start from \code{0} (\code{0} references the first column). Leave argument unspecified for no interaction constraints.
|
\item \code{interaction_constraints} A list of vectors specifying feature indices of permitted interactions. Each item of the list represents one permitted interaction where specified features are allowed to interact with each other. Feature index values should start from \code{0} (\code{0} references the first column). Leave argument unspecified for no interaction constraints.
|
||||||
}
|
}
|
||||||
|
|
||||||
2.2. Parameter for Linear Booster
|
2.2. Parameters for Linear Booster
|
||||||
|
|
||||||
\itemize{
|
\itemize{
|
||||||
\item \code{lambda} L2 regularization term on weights. Default: 0
|
\item \code{lambda} L2 regularization term on weights. Default: 0
|
||||||
@ -242,8 +244,8 @@ The following callbacks are automatically created when certain parameters are se
|
|||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label))
|
||||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label))
|
||||||
watchlist <- list(train = dtrain, eval = dtest)
|
watchlist <- list(train = dtrain, eval = dtest)
|
||||||
|
|
||||||
## A simple xgb.train example:
|
## A simple xgb.train example:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user