To submit to CRAN we cannot use more than 2 threads in our examples/vignettes

This commit is contained in:
hetong 2015-03-03 00:21:24 -08:00
parent 87ec48c1d3
commit 41b080e35f
36 changed files with 61 additions and 59 deletions

View File

@ -92,10 +92,10 @@ Rpack:
cp ./LICENSE xgboost
cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars
cp xgboost/src/Makevars xgboost/src/Makevars.win
#R CMD build --no-build-vignettes xgboost
# R CMD build --no-build-vignettes xgboost
R CMD build xgboost
rm -rf xgboost
R CMD check --as-cran xgboost*.tar.gz
#rm -rf xgboost
#R CMD check --as-cran xgboost*.tar.gz
clean:
$(RM) -rf $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~

View File

@ -24,6 +24,7 @@ importFrom(Ckmeans.1d.dp,Ckmeans.1d.dp)
importFrom(DiagrammeR,mermaid)
importFrom(Matrix,cBind)
importFrom(Matrix,colSums)
importFrom(Matrix,sparseVector)
importFrom(data.table,":=")
importFrom(data.table,as.data.table)
importFrom(data.table,copy)
@ -51,4 +52,3 @@ importFrom(stringr,str_match)
importFrom(stringr,str_replace)
importFrom(stringr,str_split)
importFrom(stringr,str_trim)
import(vcd)

View File

@ -26,7 +26,7 @@ setClass("xgb.Booster",
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic")
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' pred <- predict(bst, test$data)
#' @export
#'

View File

@ -66,7 +66,7 @@
#' @examples
#' data(agaricus.train, package='xgboost')
#' dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
#' history <- xgb.cv(data = dtrain, nround=3, nfold = 5, metrics=list("rmse","auc"),
#' history <- xgb.cv(data = dtrain, nround=3, nthread = 2, nfold = 5, metrics=list("rmse","auc"),
#' "max.depth"=3, "eta"=1, "objective"="binary:logistic")
#' print(history)
#' @export

View File

@ -29,7 +29,7 @@
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic")
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' # save the model in file 'xgb.model.dump'
#' xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE)
#'
@ -68,4 +68,4 @@ xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with.stats=FALSE) {
# Avoid error messages during CRAN check.
# The reason is that these variables are never declared
# They are mainly column names inferred by Data.table...
globalVariables(c("Lines", "."))
globalVariables(c("Lines", "."))

View File

@ -57,7 +57,7 @@
#' train <- agaricus.train
#'
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic")
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#'
#' # train$data@@Dimnames[[2]] represents the column names of the sparse matrix.
#' xgb.importance(train$data@@Dimnames[[2]], model = bst)

View File

@ -10,7 +10,7 @@
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic")
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' xgb.save(bst, 'xgb.model')
#' bst <- xgb.load('xgb.model')
#' pred <- predict(bst, test$data)

View File

@ -49,7 +49,7 @@
#' train <- agaricus.train
#'
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic")
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#'
#' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix.
#' xgb.model.dt.tree(agaricus.train$data@@Dimnames[[2]], model = bst)
@ -166,4 +166,4 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
# Avoid error messages during CRAN check.
# The reason is that these variables are never declared
# They are mainly column names inferred by Data.table...
globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence"))
globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence"))

View File

@ -33,7 +33,7 @@
#' train <- agaricus.train
#'
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic")
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#'
#' #train$data@@Dimnames[[2]] represents the column names of the sparse matrix.
#' importance_matrix <- xgb.importance(train$data@@Dimnames[[2]], model = bst)
@ -59,4 +59,4 @@ xgb.plot.importance <- function(importance_matrix = NULL, numberOfClusters = c(1
# Avoid error messages during CRAN check.
# The reason is that these variables are never declared
# They are mainly column names inferred by Data.table...
globalVariables(c("Feature", "Gain", "Cluster"))
globalVariables(c("Feature", "Gain", "Cluster"))

View File

@ -48,7 +48,7 @@
#' train <- agaricus.train
#'
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic")
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#'
#' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix.
#' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], model = bst)
@ -91,4 +91,4 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
# Avoid error messages during CRAN check.
# The reason is that these variables are never declared
# They are mainly column names inferred by Data.table...
globalVariables(c("Feature", "yesPath", "ID", "Cover", "Quality", "Split", "Yes", "Yes.Feature", "noPath", "No", "No.Feature", "."))
globalVariables(c("Feature", "yesPath", "ID", "Cover", "Quality", "Split", "Yes", "Yes.Feature", "noPath", "No", "No.Feature", "."))

View File

@ -11,7 +11,7 @@
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic")
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' xgb.save(bst, 'xgb.model')
#' bst <- xgb.load('xgb.model')
#' pred <- predict(bst, test$data)

View File

@ -11,7 +11,7 @@
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic")
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' raw <- xgb.save.raw(bst)
#' bst <- xgb.load(raw)
#' pred <- predict(bst, test$data)

View File

@ -108,7 +108,7 @@
#' err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
#' return(list(metric = "error", value = err))
#' }
#' bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror)
#' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist, logregobj, evalerror)
#' @export
#'
xgb.train <- function(params=list(), data, nrounds, watchlist = list(),

View File

@ -45,7 +45,7 @@
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nround = 2,objective = "binary:logistic")
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' pred <- predict(bst, test$data)
#'
#' @export

View File

@ -16,27 +16,28 @@ class(train$data)
# use sparse matrix when your feature is sparse(e.g. when you using one-hot encoding vector)
print("training xgboost with sparseMatrix")
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2,
objective = "binary:logistic")
nthread = 2, objective = "binary:logistic")
# alternatively, you can put in dense matrix, i.e. basic R-matrix
print("training xgboost with Matrix")
bst <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nround = 2,
objective = "binary:logistic")
nthread = 2, objective = "binary:logistic")
# you can also put in xgb.DMatrix object, stores label, data and other meta datas needed for advanced features
print("training xgboost with xgb.DMatrix")
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic")
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, nthread = 2,
objective = "binary:logistic")
# Verbose = 0,1,2
print ('train xgboost with verbose 0, no message')
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
objective = "binary:logistic", verbose = 0)
nthread = 2, objective = "binary:logistic", verbose = 0)
print ('train xgboost with verbose 1, print evaluation metric')
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
objective = "binary:logistic", verbose = 1)
nthread = 2, objective = "binary:logistic", verbose = 1)
print ('train xgboost with verbose 2, also print information about tree')
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
objective = "binary:logistic", verbose = 2)
nthread = 2, objective = "binary:logistic", verbose = 2)
# you can also specify data as file path to a LibSVM format input
# since we do not have this file with us, the following line is just for illustration
@ -77,19 +78,19 @@ watchlist <- list(train=dtrain, test=dtest)
# watchlist allows us to monitor the evaluation result on all data in the list
print ('train xgboost using xgb.train with watchlist')
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist,
objective = "binary:logistic")
nthread = 2, objective = "binary:logistic")
# we can change evaluation metrics, or use multiple evaluation metrics
print ('train xgboost using xgb.train with watchlist, watch logloss and error')
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist,
eval.metric = "error", eval.metric = "logloss",
objective = "binary:logistic")
nthread = 2, objective = "binary:logistic")
# xgb.DMatrix can also be saved using xgb.DMatrix.save
xgb.DMatrix.save(dtrain, "dtrain.buffer")
# to load it in, simply call xgb.DMatrix
dtrain2 <- xgb.DMatrix("dtrain.buffer")
bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nround=2, watchlist=watchlist,
objective = "binary:logistic")
nthread = 2, objective = "binary:logistic")
# information can be extracted from xgb.DMatrix using getinfo
label = getinfo(dtest, "label")
pred <- predict(bst, dtest)

View File

@ -11,7 +11,7 @@ watchlist <- list(eval = dtest, train = dtrain)
#
print('start running example to start from a initial prediction')
# train xgboost for 1 round
param <- list(max.depth=2,eta=1,silent=1,objective='binary:logistic')
param <- list(max.depth=2,eta=1,nthread = 2, silent=1,objective='binary:logistic')
bst <- xgb.train( param, dtrain, 1, watchlist )
# Note: we need the margin value instead of transformed prediction in set_base_margin
# do predict with output_margin=TRUE, will always give you margin values before logistic transformation

View File

@ -64,7 +64,7 @@ output_vector = df[,Y:=0][Improved == "Marked",Y:=1][,Y]
# Following is the same process as other demo
cat("Learning...\n")
bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9,
eta = 1, nround = 10,objective = "binary:logistic")
eta = 1, nthread = 2, nround = 10,objective = "binary:logistic")
xgb.dump(bst, 'xgb.model.dump', with.stats = T)
# sparse_matrix@Dimnames[[2]] represents the column names of the sparse matrix.

View File

@ -6,7 +6,7 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
nround <- 2
param <- list(max.depth=2,eta=1,silent=1,objective='binary:logistic')
param <- list(max.depth=2,eta=1,silent=1,nthread = 2, objective='binary:logistic')
cat('running cross validation\n')
# do cross validation, this will print result out as

View File

@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
# note: for customized objective function, we leave objective as default
# note: what we are getting is margin value in prediction
# you must know what you are doing
param <- list(max.depth=2,eta=1,silent=1)
param <- list(max.depth=2,eta=1,nthread = 2, silent=1)
watchlist <- list(eval = dtest, train = dtrain)
num_round <- 2

View File

@ -15,7 +15,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
# lambda is the L2 regularizer
# you can also set lambda_bias which is L2 regularizer on the bias term
param <- list(objective = "binary:logistic", booster = "gblinear",
alpha = 0.0001, lambda = 1)
nthread = 2, alpha = 0.0001, lambda = 1)
# normally, you do not need to set eta (step_size)
# XGBoost uses a parallel coordinate descent algorithm (shotgun),

View File

@ -10,7 +10,7 @@ watchlist <- list(eval = dtest, train = dtrain)
nround = 2
# training the model for two rounds
bst = xgb.train(param, dtrain, nround, watchlist)
bst = xgb.train(param, dtrain, nround, nthread = 2, watchlist)
cat('start testing prediction from first n trees\n')
labels <- getinfo(dtest,'label')

View File

@ -10,7 +10,7 @@ watchlist <- list(eval = dtest, train = dtrain)
nround = 5
# training the model for two rounds
bst = xgb.train(param, dtrain, nround, watchlist)
bst = xgb.train(param, dtrain, nround, nthread = 2, watchlist)
cat('start testing prediction from first n trees\n')
### predict using first 2 tree

View File

@ -37,7 +37,7 @@ data(agaricus.test, package='xgboost')
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic")
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
pred <- predict(bst, test$data)
}

View File

@ -78,7 +78,7 @@ This function only accepts an \code{xgb.DMatrix} object as the input.
\examples{
data(agaricus.train, package='xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
history <- xgb.cv(data = dtrain, nround=3, nfold = 5, metrics=list("rmse","auc"),
history <- xgb.cv(data = dtrain, nround=3, nthread = 2, nfold = 5, metrics=list("rmse","auc"),
"max.depth"=3, "eta"=1, "objective"="binary:logistic")
print(history)
}

View File

@ -35,7 +35,7 @@ data(agaricus.test, package='xgboost')
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic")
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
# save the model in file 'xgb.model.dump'
xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE)

View File

@ -59,7 +59,7 @@ data(agaricus.train, package='xgboost')
train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic")
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
# train$data@Dimnames[[2]] represents the column names of the sparse matrix.
xgb.importance(train$data@Dimnames[[2]], model = bst)

View File

@ -18,7 +18,7 @@ data(agaricus.test, package='xgboost')
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic")
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
xgb.save(bst, 'xgb.model')
bst <- xgb.load('xgb.model')
pred <- predict(bst, test$data)

View File

@ -51,7 +51,7 @@ data(agaricus.train, package='xgboost')
train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic")
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], model = bst)

View File

@ -31,7 +31,7 @@ data(agaricus.train, package='xgboost')
train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic")
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#train$data@Dimnames[[2]] represents the column names of the sparse matrix.
importance_matrix <- xgb.importance(train$data@Dimnames[[2]], model = bst)

View File

@ -50,7 +50,7 @@ data(agaricus.train, package='xgboost')
train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic")
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
xgb.plot.tree(agaricus.train$data@Dimnames[[2]], model = bst)

View File

@ -20,7 +20,7 @@ data(agaricus.test, package='xgboost')
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic")
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
xgb.save(bst, 'xgb.model')
bst <- xgb.load('xgb.model')
pred <- predict(bst, test$data)

View File

@ -19,7 +19,7 @@ data(agaricus.test, package='xgboost')
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic")
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
raw <- xgb.save.raw(bst)
bst <- xgb.load(raw)
pred <- predict(bst, test$data)

View File

@ -121,6 +121,6 @@ evalerror <- function(preds, dtrain) {
err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
return(list(metric = "error", value = err))
}
bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror)
bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist, logregobj, evalerror)
}

View File

@ -59,7 +59,7 @@ data(agaricus.test, package='xgboost')
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nround = 2,objective = "binary:logistic")
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
pred <- predict(bst, test$data)
}

View File

@ -153,7 +153,7 @@ The code below is very usual. For more information, you can look at the document
```{r}
bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 4,
eta = 1, nround = 10,objective = "binary:logistic")
eta = 1, nthread = 2, nround = 10,objective = "binary:logistic")
```

View File

@ -141,10 +141,11 @@ We will train decision tree model using the following parameters:
* `objective = "binary:logistic"`: we will train a binary classification model ;
* `max.deph = 2`: the trees won't be deep, because our case is very simple ;
* `nthread = 2`: the number of cpu threads we are going to use;
* `nround = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction.
```{r trainingSparse, message=F, warning=F}
bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic")
bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
```
> More complex the relationship between your features and your `label` is, more passes you need.
@ -156,7 +157,7 @@ bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta
Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R** matrix.
```{r trainingDense, message=F, warning=F}
bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic")
bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
```
#### xgb.DMatrix
@ -165,7 +166,7 @@ bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth
```{r trainingDmatrix, message=F, warning=F}
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
bstDMatrix <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic")
bstDMatrix <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
```
#### Verbose option
@ -176,17 +177,17 @@ One of the simplest way to see the training progress is to set the `verbose` opt
```{r trainingVerbose0, message=T, warning=F}
# verbose = 0, no message
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic", verbose = 0)
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 0)
```
```{r trainingVerbose1, message=T, warning=F}
# verbose = 1, print evaluation metric
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic", verbose = 1)
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 1)
```
```{r trainingVerbose2, message=T, warning=F}
# verbose = 2, also print information about tree
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic", verbose = 2)
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 2)
```
Basic prediction using Xgboost
@ -279,7 +280,7 @@ For the purpose of this example, we use `watchlist` parameter. It is a list of `
```{r watchlist, message=F, warning=F}
watchlist <- list(train=dtrain, test=dtest)
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, objective = "binary:logistic")
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, objective = "binary:logistic")
```
**Xgboost** has computed at each round the same average error metric than seen above (we set `nround` to 2, that is why we have two lines). Obviously, the `train-error` number is related to the training dataset (the one the algorithm learns from) and the `test-error` number to the test dataset.
@ -291,7 +292,7 @@ If with your own dataset you have not such results, you should think about how y
For a better understanding of the learning progression, you may want to have some specific metric or even use multiple evaluation metrics.
```{r watchlist2, message=F, warning=F}
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
```
> `eval.metric` allows us to monitor two new metrics for each round, `logloss` and `error`.
@ -302,7 +303,7 @@ Linear boosting
Until know, all the learnings we have performed were based on boosting trees. **Xgboost** implements a second algorithm, based on linear boosting. The only difference with previous command is `booster = "gblinear"` parameter (and removing `eta` parameter).
```{r linearBoosting, message=F, warning=F}
bst <- xgb.train(data=dtrain, booster = "gblinear", max.depth=2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
bst <- xgb.train(data=dtrain, booster = "gblinear", max.depth=2, nthread = 2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
```
In this specific case, *linear boosting* gets sligtly better performance metrics than decision trees based algorithm.
@ -320,7 +321,7 @@ Like saving models, `xgb.DMatrix` object (which groups both dataset and outcome)
xgb.DMatrix.save(dtrain, "dtrain.buffer")
# to load it in, simply call xgb.DMatrix
dtrain2 <- xgb.DMatrix("dtrain.buffer")
bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nround=2, watchlist=watchlist, objective = "binary:logistic")
bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, objective = "binary:logistic")
```
```{r DMatrixDel, include=FALSE}