To submit to CRAN we cannot use more than 2 threads in our examples/vignettes

This commit is contained in:
hetong 2015-03-03 00:21:24 -08:00
parent 87ec48c1d3
commit 41b080e35f
36 changed files with 61 additions and 59 deletions

View File

@ -94,8 +94,8 @@ Rpack:
cp xgboost/src/Makevars xgboost/src/Makevars.win cp xgboost/src/Makevars xgboost/src/Makevars.win
# R CMD build --no-build-vignettes xgboost # R CMD build --no-build-vignettes xgboost
R CMD build xgboost R CMD build xgboost
rm -rf xgboost #rm -rf xgboost
R CMD check --as-cran xgboost*.tar.gz #R CMD check --as-cran xgboost*.tar.gz
clean: clean:
$(RM) -rf $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~ $(RM) -rf $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~

View File

@ -24,6 +24,7 @@ importFrom(Ckmeans.1d.dp,Ckmeans.1d.dp)
importFrom(DiagrammeR,mermaid) importFrom(DiagrammeR,mermaid)
importFrom(Matrix,cBind) importFrom(Matrix,cBind)
importFrom(Matrix,colSums) importFrom(Matrix,colSums)
importFrom(Matrix,sparseVector)
importFrom(data.table,":=") importFrom(data.table,":=")
importFrom(data.table,as.data.table) importFrom(data.table,as.data.table)
importFrom(data.table,copy) importFrom(data.table,copy)
@ -51,4 +52,3 @@ importFrom(stringr,str_match)
importFrom(stringr,str_replace) importFrom(stringr,str_replace)
importFrom(stringr,str_split) importFrom(stringr,str_split)
importFrom(stringr,str_trim) importFrom(stringr,str_trim)
import(vcd)

View File

@ -26,7 +26,7 @@ setClass("xgb.Booster",
#' train <- agaricus.train #' train <- agaricus.train
#' test <- agaricus.test #' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic") #' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' pred <- predict(bst, test$data) #' pred <- predict(bst, test$data)
#' @export #' @export
#' #'

View File

@ -66,7 +66,7 @@
#' @examples #' @examples
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
#' dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) #' dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
#' history <- xgb.cv(data = dtrain, nround=3, nfold = 5, metrics=list("rmse","auc"), #' history <- xgb.cv(data = dtrain, nround=3, nthread = 2, nfold = 5, metrics=list("rmse","auc"),
#' "max.depth"=3, "eta"=1, "objective"="binary:logistic") #' "max.depth"=3, "eta"=1, "objective"="binary:logistic")
#' print(history) #' print(history)
#' @export #' @export

View File

@ -29,7 +29,7 @@
#' train <- agaricus.train #' train <- agaricus.train
#' test <- agaricus.test #' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic") #' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' # save the model in file 'xgb.model.dump' #' # save the model in file 'xgb.model.dump'
#' xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE) #' xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE)
#' #'

View File

@ -57,7 +57,7 @@
#' train <- agaricus.train #' train <- agaricus.train
#' #'
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic") #' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' #'
#' # train$data@@Dimnames[[2]] represents the column names of the sparse matrix. #' # train$data@@Dimnames[[2]] represents the column names of the sparse matrix.
#' xgb.importance(train$data@@Dimnames[[2]], model = bst) #' xgb.importance(train$data@@Dimnames[[2]], model = bst)

View File

@ -10,7 +10,7 @@
#' train <- agaricus.train #' train <- agaricus.train
#' test <- agaricus.test #' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic") #' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' xgb.save(bst, 'xgb.model') #' xgb.save(bst, 'xgb.model')
#' bst <- xgb.load('xgb.model') #' bst <- xgb.load('xgb.model')
#' pred <- predict(bst, test$data) #' pred <- predict(bst, test$data)

View File

@ -49,7 +49,7 @@
#' train <- agaricus.train #' train <- agaricus.train
#' #'
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic") #' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' #'
#' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix. #' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix.
#' xgb.model.dt.tree(agaricus.train$data@@Dimnames[[2]], model = bst) #' xgb.model.dt.tree(agaricus.train$data@@Dimnames[[2]], model = bst)

View File

@ -33,7 +33,7 @@
#' train <- agaricus.train #' train <- agaricus.train
#' #'
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic") #' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' #'
#' #train$data@@Dimnames[[2]] represents the column names of the sparse matrix. #' #train$data@@Dimnames[[2]] represents the column names of the sparse matrix.
#' importance_matrix <- xgb.importance(train$data@@Dimnames[[2]], model = bst) #' importance_matrix <- xgb.importance(train$data@@Dimnames[[2]], model = bst)

View File

@ -48,7 +48,7 @@
#' train <- agaricus.train #' train <- agaricus.train
#' #'
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic") #' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' #'
#' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix. #' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix.
#' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], model = bst) #' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], model = bst)

View File

@ -11,7 +11,7 @@
#' train <- agaricus.train #' train <- agaricus.train
#' test <- agaricus.test #' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic") #' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' xgb.save(bst, 'xgb.model') #' xgb.save(bst, 'xgb.model')
#' bst <- xgb.load('xgb.model') #' bst <- xgb.load('xgb.model')
#' pred <- predict(bst, test$data) #' pred <- predict(bst, test$data)

View File

@ -11,7 +11,7 @@
#' train <- agaricus.train #' train <- agaricus.train
#' test <- agaricus.test #' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic") #' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' raw <- xgb.save.raw(bst) #' raw <- xgb.save.raw(bst)
#' bst <- xgb.load(raw) #' bst <- xgb.load(raw)
#' pred <- predict(bst, test$data) #' pred <- predict(bst, test$data)

View File

@ -108,7 +108,7 @@
#' err <- as.numeric(sum(labels != (preds > 0)))/length(labels) #' err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
#' return(list(metric = "error", value = err)) #' return(list(metric = "error", value = err))
#' } #' }
#' bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror) #' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist, logregobj, evalerror)
#' @export #' @export
#' #'
xgb.train <- function(params=list(), data, nrounds, watchlist = list(), xgb.train <- function(params=list(), data, nrounds, watchlist = list(),

View File

@ -45,7 +45,7 @@
#' train <- agaricus.train #' train <- agaricus.train
#' test <- agaricus.test #' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic") #' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' pred <- predict(bst, test$data) #' pred <- predict(bst, test$data)
#' #'
#' @export #' @export

View File

@ -16,27 +16,28 @@ class(train$data)
# use sparse matrix when your feature is sparse(e.g. when you using one-hot encoding vector) # use sparse matrix when your feature is sparse(e.g. when you using one-hot encoding vector)
print("training xgboost with sparseMatrix") print("training xgboost with sparseMatrix")
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2,
objective = "binary:logistic") nthread = 2, objective = "binary:logistic")
# alternatively, you can put in dense matrix, i.e. basic R-matrix # alternatively, you can put in dense matrix, i.e. basic R-matrix
print("training xgboost with Matrix") print("training xgboost with Matrix")
bst <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nround = 2, bst <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nround = 2,
objective = "binary:logistic") nthread = 2, objective = "binary:logistic")
# you can also put in xgb.DMatrix object, stores label, data and other meta datas needed for advanced features # you can also put in xgb.DMatrix object, stores label, data and other meta datas needed for advanced features
print("training xgboost with xgb.DMatrix") print("training xgboost with xgb.DMatrix")
dtrain <- xgb.DMatrix(data = train$data, label = train$label) dtrain <- xgb.DMatrix(data = train$data, label = train$label)
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic") bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, nthread = 2,
objective = "binary:logistic")
# Verbose = 0,1,2 # Verbose = 0,1,2
print ('train xgboost with verbose 0, no message') print ('train xgboost with verbose 0, no message')
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
objective = "binary:logistic", verbose = 0) nthread = 2, objective = "binary:logistic", verbose = 0)
print ('train xgboost with verbose 1, print evaluation metric') print ('train xgboost with verbose 1, print evaluation metric')
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
objective = "binary:logistic", verbose = 1) nthread = 2, objective = "binary:logistic", verbose = 1)
print ('train xgboost with verbose 2, also print information about tree') print ('train xgboost with verbose 2, also print information about tree')
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
objective = "binary:logistic", verbose = 2) nthread = 2, objective = "binary:logistic", verbose = 2)
# you can also specify data as file path to a LibSVM format input # you can also specify data as file path to a LibSVM format input
# since we do not have this file with us, the following line is just for illustration # since we do not have this file with us, the following line is just for illustration
@ -77,19 +78,19 @@ watchlist <- list(train=dtrain, test=dtest)
# watchlist allows us to monitor the evaluation result on all data in the list # watchlist allows us to monitor the evaluation result on all data in the list
print ('train xgboost using xgb.train with watchlist') print ('train xgboost using xgb.train with watchlist')
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist,
objective = "binary:logistic") nthread = 2, objective = "binary:logistic")
# we can change evaluation metrics, or use multiple evaluation metrics # we can change evaluation metrics, or use multiple evaluation metrics
print ('train xgboost using xgb.train with watchlist, watch logloss and error') print ('train xgboost using xgb.train with watchlist, watch logloss and error')
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist,
eval.metric = "error", eval.metric = "logloss", eval.metric = "error", eval.metric = "logloss",
objective = "binary:logistic") nthread = 2, objective = "binary:logistic")
# xgb.DMatrix can also be saved using xgb.DMatrix.save # xgb.DMatrix can also be saved using xgb.DMatrix.save
xgb.DMatrix.save(dtrain, "dtrain.buffer") xgb.DMatrix.save(dtrain, "dtrain.buffer")
# to load it in, simply call xgb.DMatrix # to load it in, simply call xgb.DMatrix
dtrain2 <- xgb.DMatrix("dtrain.buffer") dtrain2 <- xgb.DMatrix("dtrain.buffer")
bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nround=2, watchlist=watchlist, bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nround=2, watchlist=watchlist,
objective = "binary:logistic") nthread = 2, objective = "binary:logistic")
# information can be extracted from xgb.DMatrix using getinfo # information can be extracted from xgb.DMatrix using getinfo
label = getinfo(dtest, "label") label = getinfo(dtest, "label")
pred <- predict(bst, dtest) pred <- predict(bst, dtest)

View File

@ -11,7 +11,7 @@ watchlist <- list(eval = dtest, train = dtrain)
# #
print('start running example to start from a initial prediction') print('start running example to start from a initial prediction')
# train xgboost for 1 round # train xgboost for 1 round
param <- list(max.depth=2,eta=1,silent=1,objective='binary:logistic') param <- list(max.depth=2,eta=1,nthread = 2, silent=1,objective='binary:logistic')
bst <- xgb.train( param, dtrain, 1, watchlist ) bst <- xgb.train( param, dtrain, 1, watchlist )
# Note: we need the margin value instead of transformed prediction in set_base_margin # Note: we need the margin value instead of transformed prediction in set_base_margin
# do predict with output_margin=TRUE, will always give you margin values before logistic transformation # do predict with output_margin=TRUE, will always give you margin values before logistic transformation

View File

@ -64,7 +64,7 @@ output_vector = df[,Y:=0][Improved == "Marked",Y:=1][,Y]
# Following is the same process as other demo # Following is the same process as other demo
cat("Learning...\n") cat("Learning...\n")
bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9, bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9,
eta = 1, nround = 10,objective = "binary:logistic") eta = 1, nthread = 2, nround = 10,objective = "binary:logistic")
xgb.dump(bst, 'xgb.model.dump', with.stats = T) xgb.dump(bst, 'xgb.model.dump', with.stats = T)
# sparse_matrix@Dimnames[[2]] represents the column names of the sparse matrix. # sparse_matrix@Dimnames[[2]] represents the column names of the sparse matrix.

View File

@ -6,7 +6,7 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
nround <- 2 nround <- 2
param <- list(max.depth=2,eta=1,silent=1,objective='binary:logistic') param <- list(max.depth=2,eta=1,silent=1,nthread = 2, objective='binary:logistic')
cat('running cross validation\n') cat('running cross validation\n')
# do cross validation, this will print result out as # do cross validation, this will print result out as

View File

@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
# note: for customized objective function, we leave objective as default # note: for customized objective function, we leave objective as default
# note: what we are getting is margin value in prediction # note: what we are getting is margin value in prediction
# you must know what you are doing # you must know what you are doing
param <- list(max.depth=2,eta=1,silent=1) param <- list(max.depth=2,eta=1,nthread = 2, silent=1)
watchlist <- list(eval = dtest, train = dtrain) watchlist <- list(eval = dtest, train = dtrain)
num_round <- 2 num_round <- 2

View File

@ -15,7 +15,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
# lambda is the L2 regularizer # lambda is the L2 regularizer
# you can also set lambda_bias which is L2 regularizer on the bias term # you can also set lambda_bias which is L2 regularizer on the bias term
param <- list(objective = "binary:logistic", booster = "gblinear", param <- list(objective = "binary:logistic", booster = "gblinear",
alpha = 0.0001, lambda = 1) nthread = 2, alpha = 0.0001, lambda = 1)
# normally, you do not need to set eta (step_size) # normally, you do not need to set eta (step_size)
# XGBoost uses a parallel coordinate descent algorithm (shotgun), # XGBoost uses a parallel coordinate descent algorithm (shotgun),

View File

@ -10,7 +10,7 @@ watchlist <- list(eval = dtest, train = dtrain)
nround = 2 nround = 2
# training the model for two rounds # training the model for two rounds
bst = xgb.train(param, dtrain, nround, watchlist) bst = xgb.train(param, dtrain, nround, nthread = 2, watchlist)
cat('start testing prediction from first n trees\n') cat('start testing prediction from first n trees\n')
labels <- getinfo(dtest,'label') labels <- getinfo(dtest,'label')

View File

@ -10,7 +10,7 @@ watchlist <- list(eval = dtest, train = dtrain)
nround = 5 nround = 5
# training the model for two rounds # training the model for two rounds
bst = xgb.train(param, dtrain, nround, watchlist) bst = xgb.train(param, dtrain, nround, nthread = 2, watchlist)
cat('start testing prediction from first n trees\n') cat('start testing prediction from first n trees\n')
### predict using first 2 tree ### predict using first 2 tree

View File

@ -37,7 +37,7 @@ data(agaricus.test, package='xgboost')
train <- agaricus.train train <- agaricus.train
test <- agaricus.test test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
pred <- predict(bst, test$data) pred <- predict(bst, test$data)
} }

View File

@ -78,7 +78,7 @@ This function only accepts an \code{xgb.DMatrix} object as the input.
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
history <- xgb.cv(data = dtrain, nround=3, nfold = 5, metrics=list("rmse","auc"), history <- xgb.cv(data = dtrain, nround=3, nthread = 2, nfold = 5, metrics=list("rmse","auc"),
"max.depth"=3, "eta"=1, "objective"="binary:logistic") "max.depth"=3, "eta"=1, "objective"="binary:logistic")
print(history) print(history)
} }

View File

@ -35,7 +35,7 @@ data(agaricus.test, package='xgboost')
train <- agaricus.train train <- agaricus.train
test <- agaricus.test test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
# save the model in file 'xgb.model.dump' # save the model in file 'xgb.model.dump'
xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE) xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE)

View File

@ -59,7 +59,7 @@ data(agaricus.train, package='xgboost')
train <- agaricus.train train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
# train$data@Dimnames[[2]] represents the column names of the sparse matrix. # train$data@Dimnames[[2]] represents the column names of the sparse matrix.
xgb.importance(train$data@Dimnames[[2]], model = bst) xgb.importance(train$data@Dimnames[[2]], model = bst)

View File

@ -18,7 +18,7 @@ data(agaricus.test, package='xgboost')
train <- agaricus.train train <- agaricus.train
test <- agaricus.test test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
xgb.save(bst, 'xgb.model') xgb.save(bst, 'xgb.model')
bst <- xgb.load('xgb.model') bst <- xgb.load('xgb.model')
pred <- predict(bst, test$data) pred <- predict(bst, test$data)

View File

@ -51,7 +51,7 @@ data(agaricus.train, package='xgboost')
train <- agaricus.train train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix. #agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], model = bst) xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], model = bst)

View File

@ -31,7 +31,7 @@ data(agaricus.train, package='xgboost')
train <- agaricus.train train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#train$data@Dimnames[[2]] represents the column names of the sparse matrix. #train$data@Dimnames[[2]] represents the column names of the sparse matrix.
importance_matrix <- xgb.importance(train$data@Dimnames[[2]], model = bst) importance_matrix <- xgb.importance(train$data@Dimnames[[2]], model = bst)

View File

@ -50,7 +50,7 @@ data(agaricus.train, package='xgboost')
train <- agaricus.train train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix. #agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
xgb.plot.tree(agaricus.train$data@Dimnames[[2]], model = bst) xgb.plot.tree(agaricus.train$data@Dimnames[[2]], model = bst)

View File

@ -20,7 +20,7 @@ data(agaricus.test, package='xgboost')
train <- agaricus.train train <- agaricus.train
test <- agaricus.test test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
xgb.save(bst, 'xgb.model') xgb.save(bst, 'xgb.model')
bst <- xgb.load('xgb.model') bst <- xgb.load('xgb.model')
pred <- predict(bst, test$data) pred <- predict(bst, test$data)

View File

@ -19,7 +19,7 @@ data(agaricus.test, package='xgboost')
train <- agaricus.train train <- agaricus.train
test <- agaricus.test test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
raw <- xgb.save.raw(bst) raw <- xgb.save.raw(bst)
bst <- xgb.load(raw) bst <- xgb.load(raw)
pred <- predict(bst, test$data) pred <- predict(bst, test$data)

View File

@ -121,6 +121,6 @@ evalerror <- function(preds, dtrain) {
err <- as.numeric(sum(labels != (preds > 0)))/length(labels) err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
return(list(metric = "error", value = err)) return(list(metric = "error", value = err))
} }
bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror) bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist, logregobj, evalerror)
} }

View File

@ -59,7 +59,7 @@ data(agaricus.test, package='xgboost')
train <- agaricus.train train <- agaricus.train
test <- agaricus.test test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic") eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
pred <- predict(bst, test$data) pred <- predict(bst, test$data)
} }

View File

@ -153,7 +153,7 @@ The code below is very usual. For more information, you can look at the document
```{r} ```{r}
bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 4, bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 4,
eta = 1, nround = 10,objective = "binary:logistic") eta = 1, nthread = 2, nround = 10,objective = "binary:logistic")
``` ```

View File

@ -141,10 +141,11 @@ We will train decision tree model using the following parameters:
* `objective = "binary:logistic"`: we will train a binary classification model ; * `objective = "binary:logistic"`: we will train a binary classification model ;
* `max.deph = 2`: the trees won't be deep, because our case is very simple ; * `max.deph = 2`: the trees won't be deep, because our case is very simple ;
* `nthread = 2`: the number of cpu threads we are going to use;
* `nround = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction. * `nround = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction.
```{r trainingSparse, message=F, warning=F} ```{r trainingSparse, message=F, warning=F}
bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic") bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
``` ```
> More complex the relationship between your features and your `label` is, more passes you need. > More complex the relationship between your features and your `label` is, more passes you need.
@ -156,7 +157,7 @@ bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta
Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R** matrix. Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R** matrix.
```{r trainingDense, message=F, warning=F} ```{r trainingDense, message=F, warning=F}
bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic") bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
``` ```
#### xgb.DMatrix #### xgb.DMatrix
@ -165,7 +166,7 @@ bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth
```{r trainingDmatrix, message=F, warning=F} ```{r trainingDmatrix, message=F, warning=F}
dtrain <- xgb.DMatrix(data = train$data, label = train$label) dtrain <- xgb.DMatrix(data = train$data, label = train$label)
bstDMatrix <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic") bstDMatrix <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
``` ```
#### Verbose option #### Verbose option
@ -176,17 +177,17 @@ One of the simplest way to see the training progress is to set the `verbose` opt
```{r trainingVerbose0, message=T, warning=F} ```{r trainingVerbose0, message=T, warning=F}
# verbose = 0, no message # verbose = 0, no message
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic", verbose = 0) bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 0)
``` ```
```{r trainingVerbose1, message=T, warning=F} ```{r trainingVerbose1, message=T, warning=F}
# verbose = 1, print evaluation metric # verbose = 1, print evaluation metric
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic", verbose = 1) bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 1)
``` ```
```{r trainingVerbose2, message=T, warning=F} ```{r trainingVerbose2, message=T, warning=F}
# verbose = 2, also print information about tree # verbose = 2, also print information about tree
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic", verbose = 2) bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 2)
``` ```
Basic prediction using Xgboost Basic prediction using Xgboost
@ -279,7 +280,7 @@ For the purpose of this example, we use `watchlist` parameter. It is a list of `
```{r watchlist, message=F, warning=F} ```{r watchlist, message=F, warning=F}
watchlist <- list(train=dtrain, test=dtest) watchlist <- list(train=dtrain, test=dtest)
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, objective = "binary:logistic") bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, objective = "binary:logistic")
``` ```
**Xgboost** has computed at each round the same average error metric than seen above (we set `nround` to 2, that is why we have two lines). Obviously, the `train-error` number is related to the training dataset (the one the algorithm learns from) and the `test-error` number to the test dataset. **Xgboost** has computed at each round the same average error metric than seen above (we set `nround` to 2, that is why we have two lines). Obviously, the `train-error` number is related to the training dataset (the one the algorithm learns from) and the `test-error` number to the test dataset.
@ -291,7 +292,7 @@ If with your own dataset you have not such results, you should think about how y
For a better understanding of the learning progression, you may want to have some specific metric or even use multiple evaluation metrics. For a better understanding of the learning progression, you may want to have some specific metric or even use multiple evaluation metrics.
```{r watchlist2, message=F, warning=F} ```{r watchlist2, message=F, warning=F}
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic") bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
``` ```
> `eval.metric` allows us to monitor two new metrics for each round, `logloss` and `error`. > `eval.metric` allows us to monitor two new metrics for each round, `logloss` and `error`.
@ -302,7 +303,7 @@ Linear boosting
Until know, all the learnings we have performed were based on boosting trees. **Xgboost** implements a second algorithm, based on linear boosting. The only difference with previous command is `booster = "gblinear"` parameter (and removing `eta` parameter). Until know, all the learnings we have performed were based on boosting trees. **Xgboost** implements a second algorithm, based on linear boosting. The only difference with previous command is `booster = "gblinear"` parameter (and removing `eta` parameter).
```{r linearBoosting, message=F, warning=F} ```{r linearBoosting, message=F, warning=F}
bst <- xgb.train(data=dtrain, booster = "gblinear", max.depth=2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic") bst <- xgb.train(data=dtrain, booster = "gblinear", max.depth=2, nthread = 2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
``` ```
In this specific case, *linear boosting* gets sligtly better performance metrics than decision trees based algorithm. In this specific case, *linear boosting* gets sligtly better performance metrics than decision trees based algorithm.
@ -320,7 +321,7 @@ Like saving models, `xgb.DMatrix` object (which groups both dataset and outcome)
xgb.DMatrix.save(dtrain, "dtrain.buffer") xgb.DMatrix.save(dtrain, "dtrain.buffer")
# to load it in, simply call xgb.DMatrix # to load it in, simply call xgb.DMatrix
dtrain2 <- xgb.DMatrix("dtrain.buffer") dtrain2 <- xgb.DMatrix("dtrain.buffer")
bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nround=2, watchlist=watchlist, objective = "binary:logistic") bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, objective = "binary:logistic")
``` ```
```{r DMatrixDel, include=FALSE} ```{r DMatrixDel, include=FALSE}