replace nround with nrounds to match actual parameter (#3592)
This commit is contained in:
committed by
Philip Hyunsu Cho
parent
73bd590a1d
commit
725f4c36f2
@@ -168,7 +168,7 @@ cb.evaluation.log <- function() {
|
||||
#' at the beginning of each iteration.
|
||||
#'
|
||||
#' Note that when training is resumed from some previous model, and a function is used to
|
||||
#' reset a parameter value, the \code{nround} argument in this function would be the
|
||||
#' reset a parameter value, the \code{nrounds} argument in this function would be the
|
||||
#' the number of boosting rounds in the current training.
|
||||
#'
|
||||
#' Callback function expects the following values to be set in its calling frame:
|
||||
|
||||
@@ -52,9 +52,9 @@
|
||||
#' dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
|
||||
#'
|
||||
#' param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
#' nround = 4
|
||||
#' nrounds = 4
|
||||
#'
|
||||
#' bst = xgb.train(params = param, data = dtrain, nrounds = nround, nthread = 2)
|
||||
#' bst = xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2)
|
||||
#'
|
||||
#' # Model accuracy without new features
|
||||
#' accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) /
|
||||
@@ -68,7 +68,7 @@
|
||||
#' new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
|
||||
#' new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
|
||||
#' watchlist <- list(train = new.dtrain)
|
||||
#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nround, nthread = 2)
|
||||
#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
|
||||
#'
|
||||
#' # Model accuracy with new features
|
||||
#' accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) /
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
#' \item \code{gamma} minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be.
|
||||
#' \item \code{max_depth} maximum depth of a tree. Default: 6
|
||||
#' \item \code{min_child_weight} minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. Default: 1
|
||||
#' \item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nround}. Default: 1
|
||||
#' \item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nrounds}. Default: 1
|
||||
#' \item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
|
||||
#' \item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample < 1} and \code{round = 1}) accordingly. Default: 1
|
||||
#' \item \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length equals to the number of features in the training data. \code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint.
|
||||
|
||||
@@ -5,20 +5,20 @@ data(agaricus.test, package='xgboost')
|
||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
|
||||
nround <- 2
|
||||
nrounds <- 2
|
||||
param <- list(max_depth=2, eta=1, silent=1, nthread=2, objective='binary:logistic')
|
||||
|
||||
cat('running cross validation\n')
|
||||
# do cross validation, this will print result out as
|
||||
# [iteration] metric_name:mean_value+std_value
|
||||
# std_value is standard deviation of the metric
|
||||
xgb.cv(param, dtrain, nround, nfold=5, metrics={'error'})
|
||||
xgb.cv(param, dtrain, nrounds, nfold=5, metrics={'error'})
|
||||
|
||||
cat('running cross validation, disable standard deviation display\n')
|
||||
# do cross validation, this will print result out as
|
||||
# [iteration] metric_name:mean_value+std_value
|
||||
# std_value is standard deviation of the metric
|
||||
xgb.cv(param, dtrain, nround, nfold=5,
|
||||
xgb.cv(param, dtrain, nrounds, nfold=5,
|
||||
metrics='error', showsd = FALSE)
|
||||
|
||||
###
|
||||
@@ -43,9 +43,9 @@ evalerror <- function(preds, dtrain) {
|
||||
param <- list(max_depth=2, eta=1, silent=1,
|
||||
objective = logregobj, eval_metric = evalerror)
|
||||
# train with customized objective
|
||||
xgb.cv(params = param, data = dtrain, nrounds = nround, nfold = 5)
|
||||
xgb.cv(params = param, data = dtrain, nrounds = nrounds, nfold = 5)
|
||||
|
||||
# do cross validation with prediction values for each fold
|
||||
res <- xgb.cv(params = param, data = dtrain, nrounds = nround, nfold = 5, prediction = TRUE)
|
||||
res <- xgb.cv(params = param, data = dtrain, nrounds = nrounds, nfold = 5, prediction = TRUE)
|
||||
res$evaluation_log
|
||||
length(res$pred)
|
||||
|
||||
@@ -7,10 +7,10 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
|
||||
param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
watchlist <- list(eval = dtest, train = dtrain)
|
||||
nround = 2
|
||||
nrounds = 2
|
||||
|
||||
# training the model for two rounds
|
||||
bst = xgb.train(param, dtrain, nround, nthread = 2, watchlist)
|
||||
bst = xgb.train(param, dtrain, nrounds, nthread = 2, watchlist)
|
||||
cat('start testing prediction from first n trees\n')
|
||||
labels <- getinfo(dtest,'label')
|
||||
|
||||
|
||||
@@ -11,10 +11,10 @@ dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
|
||||
|
||||
param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
nround = 4
|
||||
nrounds = 4
|
||||
|
||||
# training the model for two rounds
|
||||
bst = xgb.train(params = param, data = dtrain, nrounds = nround, nthread = 2)
|
||||
bst = xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2)
|
||||
|
||||
# Model accuracy without new features
|
||||
accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)
|
||||
@@ -43,7 +43,7 @@ new.features.test <- create.new.tree.features(bst, agaricus.test$data)
|
||||
new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
|
||||
new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
|
||||
watchlist <- list(train = new.dtrain)
|
||||
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nround, nthread = 2)
|
||||
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
|
||||
|
||||
# Model accuracy with new features
|
||||
accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)
|
||||
|
||||
@@ -22,7 +22,7 @@ This is a "pre-iteration" callback function used to reset booster's parameters
|
||||
at the beginning of each iteration.
|
||||
|
||||
Note that when training is resumed from some previous model, and a function is used to
|
||||
reset a parameter value, the \code{nround} argument in this function would be the
|
||||
reset a parameter value, the \code{nrounds} argument in this function would be the
|
||||
the number of boosting rounds in the current training.
|
||||
|
||||
Callback function expects the following values to be set in its calling frame:
|
||||
|
||||
@@ -63,9 +63,9 @@ dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
|
||||
|
||||
param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
nround = 4
|
||||
nrounds = 4
|
||||
|
||||
bst = xgb.train(params = param, data = dtrain, nrounds = nround, nthread = 2)
|
||||
bst = xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2)
|
||||
|
||||
# Model accuracy without new features
|
||||
accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) /
|
||||
@@ -79,7 +79,7 @@ new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
|
||||
new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
|
||||
new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
|
||||
watchlist <- list(train = new.dtrain)
|
||||
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nround, nthread = 2)
|
||||
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
|
||||
|
||||
# Model accuracy with new features
|
||||
accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) /
|
||||
|
||||
@@ -35,7 +35,7 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
|
||||
\item \code{gamma} minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be.
|
||||
\item \code{max_depth} maximum depth of a tree. Default: 6
|
||||
\item \code{min_child_weight} minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. Default: 1
|
||||
\item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nround}. Default: 1
|
||||
\item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nrounds}. Default: 1
|
||||
\item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
|
||||
\item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample < 1} and \code{round = 1}) accordingly. Default: 1
|
||||
\item \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length equals to the number of features in the training data. \code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint.
|
||||
|
||||
@@ -9,7 +9,7 @@ test_that("train and prediction when gctorture is on", {
|
||||
test <- agaricus.test
|
||||
gctorture(TRUE)
|
||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||
eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
|
||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
||||
pred <- predict(bst, test$data)
|
||||
gctorture(FALSE)
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user