To submit to CRAN we cannot use more than 2 threads in our examples/vignettes
This commit is contained in:
parent
87ec48c1d3
commit
41b080e35f
6
Makefile
6
Makefile
@ -92,10 +92,10 @@ Rpack:
|
|||||||
cp ./LICENSE xgboost
|
cp ./LICENSE xgboost
|
||||||
cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars
|
cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars
|
||||||
cp xgboost/src/Makevars xgboost/src/Makevars.win
|
cp xgboost/src/Makevars xgboost/src/Makevars.win
|
||||||
#R CMD build --no-build-vignettes xgboost
|
# R CMD build --no-build-vignettes xgboost
|
||||||
R CMD build xgboost
|
R CMD build xgboost
|
||||||
rm -rf xgboost
|
#rm -rf xgboost
|
||||||
R CMD check --as-cran xgboost*.tar.gz
|
#R CMD check --as-cran xgboost*.tar.gz
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
$(RM) -rf $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~
|
$(RM) -rf $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~
|
||||||
|
|||||||
@ -24,6 +24,7 @@ importFrom(Ckmeans.1d.dp,Ckmeans.1d.dp)
|
|||||||
importFrom(DiagrammeR,mermaid)
|
importFrom(DiagrammeR,mermaid)
|
||||||
importFrom(Matrix,cBind)
|
importFrom(Matrix,cBind)
|
||||||
importFrom(Matrix,colSums)
|
importFrom(Matrix,colSums)
|
||||||
|
importFrom(Matrix,sparseVector)
|
||||||
importFrom(data.table,":=")
|
importFrom(data.table,":=")
|
||||||
importFrom(data.table,as.data.table)
|
importFrom(data.table,as.data.table)
|
||||||
importFrom(data.table,copy)
|
importFrom(data.table,copy)
|
||||||
@ -51,4 +52,3 @@ importFrom(stringr,str_match)
|
|||||||
importFrom(stringr,str_replace)
|
importFrom(stringr,str_replace)
|
||||||
importFrom(stringr,str_split)
|
importFrom(stringr,str_split)
|
||||||
importFrom(stringr,str_trim)
|
importFrom(stringr,str_trim)
|
||||||
import(vcd)
|
|
||||||
|
|||||||
@ -26,7 +26,7 @@ setClass("xgb.Booster",
|
|||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' test <- agaricus.test
|
#' test <- agaricus.test
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
#' eta = 1, nround = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
#' pred <- predict(bst, test$data)
|
#' pred <- predict(bst, test$data)
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
#'
|
||||||
|
|||||||
@ -66,7 +66,7 @@
|
|||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
#' dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||||
#' history <- xgb.cv(data = dtrain, nround=3, nfold = 5, metrics=list("rmse","auc"),
|
#' history <- xgb.cv(data = dtrain, nround=3, nthread = 2, nfold = 5, metrics=list("rmse","auc"),
|
||||||
#' "max.depth"=3, "eta"=1, "objective"="binary:logistic")
|
#' "max.depth"=3, "eta"=1, "objective"="binary:logistic")
|
||||||
#' print(history)
|
#' print(history)
|
||||||
#' @export
|
#' @export
|
||||||
|
|||||||
@ -29,7 +29,7 @@
|
|||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' test <- agaricus.test
|
#' test <- agaricus.test
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
#' eta = 1, nround = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
#' # save the model in file 'xgb.model.dump'
|
#' # save the model in file 'xgb.model.dump'
|
||||||
#' xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE)
|
#' xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE)
|
||||||
#'
|
#'
|
||||||
@ -68,4 +68,4 @@ xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with.stats=FALSE) {
|
|||||||
# Avoid error messages during CRAN check.
|
# Avoid error messages during CRAN check.
|
||||||
# The reason is that these variables are never declared
|
# The reason is that these variables are never declared
|
||||||
# They are mainly column names inferred by Data.table...
|
# They are mainly column names inferred by Data.table...
|
||||||
globalVariables(c("Lines", "."))
|
globalVariables(c("Lines", "."))
|
||||||
|
|||||||
@ -57,7 +57,7 @@
|
|||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
#' eta = 1, nround = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
#'
|
#'
|
||||||
#' # train$data@@Dimnames[[2]] represents the column names of the sparse matrix.
|
#' # train$data@@Dimnames[[2]] represents the column names of the sparse matrix.
|
||||||
#' xgb.importance(train$data@@Dimnames[[2]], model = bst)
|
#' xgb.importance(train$data@@Dimnames[[2]], model = bst)
|
||||||
|
|||||||
@ -10,7 +10,7 @@
|
|||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' test <- agaricus.test
|
#' test <- agaricus.test
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
#' eta = 1, nround = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
#' xgb.save(bst, 'xgb.model')
|
#' xgb.save(bst, 'xgb.model')
|
||||||
#' bst <- xgb.load('xgb.model')
|
#' bst <- xgb.load('xgb.model')
|
||||||
#' pred <- predict(bst, test$data)
|
#' pred <- predict(bst, test$data)
|
||||||
|
|||||||
@ -49,7 +49,7 @@
|
|||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
#' eta = 1, nround = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
#'
|
#'
|
||||||
#' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix.
|
#' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix.
|
||||||
#' xgb.model.dt.tree(agaricus.train$data@@Dimnames[[2]], model = bst)
|
#' xgb.model.dt.tree(agaricus.train$data@@Dimnames[[2]], model = bst)
|
||||||
@ -166,4 +166,4 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
|
|||||||
# Avoid error messages during CRAN check.
|
# Avoid error messages during CRAN check.
|
||||||
# The reason is that these variables are never declared
|
# The reason is that these variables are never declared
|
||||||
# They are mainly column names inferred by Data.table...
|
# They are mainly column names inferred by Data.table...
|
||||||
globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence"))
|
globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence"))
|
||||||
|
|||||||
@ -33,7 +33,7 @@
|
|||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
#' eta = 1, nround = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
#'
|
#'
|
||||||
#' #train$data@@Dimnames[[2]] represents the column names of the sparse matrix.
|
#' #train$data@@Dimnames[[2]] represents the column names of the sparse matrix.
|
||||||
#' importance_matrix <- xgb.importance(train$data@@Dimnames[[2]], model = bst)
|
#' importance_matrix <- xgb.importance(train$data@@Dimnames[[2]], model = bst)
|
||||||
@ -59,4 +59,4 @@ xgb.plot.importance <- function(importance_matrix = NULL, numberOfClusters = c(1
|
|||||||
# Avoid error messages during CRAN check.
|
# Avoid error messages during CRAN check.
|
||||||
# The reason is that these variables are never declared
|
# The reason is that these variables are never declared
|
||||||
# They are mainly column names inferred by Data.table...
|
# They are mainly column names inferred by Data.table...
|
||||||
globalVariables(c("Feature", "Gain", "Cluster"))
|
globalVariables(c("Feature", "Gain", "Cluster"))
|
||||||
|
|||||||
@ -48,7 +48,7 @@
|
|||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
#' eta = 1, nround = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
#'
|
#'
|
||||||
#' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix.
|
#' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix.
|
||||||
#' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], model = bst)
|
#' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], model = bst)
|
||||||
@ -91,4 +91,4 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
|
|||||||
# Avoid error messages during CRAN check.
|
# Avoid error messages during CRAN check.
|
||||||
# The reason is that these variables are never declared
|
# The reason is that these variables are never declared
|
||||||
# They are mainly column names inferred by Data.table...
|
# They are mainly column names inferred by Data.table...
|
||||||
globalVariables(c("Feature", "yesPath", "ID", "Cover", "Quality", "Split", "Yes", "Yes.Feature", "noPath", "No", "No.Feature", "."))
|
globalVariables(c("Feature", "yesPath", "ID", "Cover", "Quality", "Split", "Yes", "Yes.Feature", "noPath", "No", "No.Feature", "."))
|
||||||
|
|||||||
@ -11,7 +11,7 @@
|
|||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' test <- agaricus.test
|
#' test <- agaricus.test
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
#' eta = 1, nround = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
#' xgb.save(bst, 'xgb.model')
|
#' xgb.save(bst, 'xgb.model')
|
||||||
#' bst <- xgb.load('xgb.model')
|
#' bst <- xgb.load('xgb.model')
|
||||||
#' pred <- predict(bst, test$data)
|
#' pred <- predict(bst, test$data)
|
||||||
|
|||||||
@ -11,7 +11,7 @@
|
|||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' test <- agaricus.test
|
#' test <- agaricus.test
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
#' eta = 1, nround = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
#' raw <- xgb.save.raw(bst)
|
#' raw <- xgb.save.raw(bst)
|
||||||
#' bst <- xgb.load(raw)
|
#' bst <- xgb.load(raw)
|
||||||
#' pred <- predict(bst, test$data)
|
#' pred <- predict(bst, test$data)
|
||||||
|
|||||||
@ -108,7 +108,7 @@
|
|||||||
#' err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
|
#' err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
|
||||||
#' return(list(metric = "error", value = err))
|
#' return(list(metric = "error", value = err))
|
||||||
#' }
|
#' }
|
||||||
#' bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror)
|
#' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist, logregobj, evalerror)
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
#'
|
||||||
xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
||||||
|
|||||||
@ -45,7 +45,7 @@
|
|||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' test <- agaricus.test
|
#' test <- agaricus.test
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
#' eta = 1, nround = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
#' pred <- predict(bst, test$data)
|
#' pred <- predict(bst, test$data)
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
|
|||||||
@ -16,27 +16,28 @@ class(train$data)
|
|||||||
# use sparse matrix when your feature is sparse(e.g. when you using one-hot encoding vector)
|
# use sparse matrix when your feature is sparse(e.g. when you using one-hot encoding vector)
|
||||||
print("training xgboost with sparseMatrix")
|
print("training xgboost with sparseMatrix")
|
||||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2,
|
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2,
|
||||||
objective = "binary:logistic")
|
nthread = 2, objective = "binary:logistic")
|
||||||
# alternatively, you can put in dense matrix, i.e. basic R-matrix
|
# alternatively, you can put in dense matrix, i.e. basic R-matrix
|
||||||
print("training xgboost with Matrix")
|
print("training xgboost with Matrix")
|
||||||
bst <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nround = 2,
|
bst <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nround = 2,
|
||||||
objective = "binary:logistic")
|
nthread = 2, objective = "binary:logistic")
|
||||||
|
|
||||||
# you can also put in xgb.DMatrix object, stores label, data and other meta datas needed for advanced features
|
# you can also put in xgb.DMatrix object, stores label, data and other meta datas needed for advanced features
|
||||||
print("training xgboost with xgb.DMatrix")
|
print("training xgboost with xgb.DMatrix")
|
||||||
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
|
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
|
||||||
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic")
|
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, nthread = 2,
|
||||||
|
objective = "binary:logistic")
|
||||||
|
|
||||||
# Verbose = 0,1,2
|
# Verbose = 0,1,2
|
||||||
print ('train xgboost with verbose 0, no message')
|
print ('train xgboost with verbose 0, no message')
|
||||||
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
|
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
|
||||||
objective = "binary:logistic", verbose = 0)
|
nthread = 2, objective = "binary:logistic", verbose = 0)
|
||||||
print ('train xgboost with verbose 1, print evaluation metric')
|
print ('train xgboost with verbose 1, print evaluation metric')
|
||||||
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
|
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
|
||||||
objective = "binary:logistic", verbose = 1)
|
nthread = 2, objective = "binary:logistic", verbose = 1)
|
||||||
print ('train xgboost with verbose 2, also print information about tree')
|
print ('train xgboost with verbose 2, also print information about tree')
|
||||||
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
|
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
|
||||||
objective = "binary:logistic", verbose = 2)
|
nthread = 2, objective = "binary:logistic", verbose = 2)
|
||||||
|
|
||||||
# you can also specify data as file path to a LibSVM format input
|
# you can also specify data as file path to a LibSVM format input
|
||||||
# since we do not have this file with us, the following line is just for illustration
|
# since we do not have this file with us, the following line is just for illustration
|
||||||
@ -77,19 +78,19 @@ watchlist <- list(train=dtrain, test=dtest)
|
|||||||
# watchlist allows us to monitor the evaluation result on all data in the list
|
# watchlist allows us to monitor the evaluation result on all data in the list
|
||||||
print ('train xgboost using xgb.train with watchlist')
|
print ('train xgboost using xgb.train with watchlist')
|
||||||
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist,
|
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist,
|
||||||
objective = "binary:logistic")
|
nthread = 2, objective = "binary:logistic")
|
||||||
# we can change evaluation metrics, or use multiple evaluation metrics
|
# we can change evaluation metrics, or use multiple evaluation metrics
|
||||||
print ('train xgboost using xgb.train with watchlist, watch logloss and error')
|
print ('train xgboost using xgb.train with watchlist, watch logloss and error')
|
||||||
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist,
|
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist,
|
||||||
eval.metric = "error", eval.metric = "logloss",
|
eval.metric = "error", eval.metric = "logloss",
|
||||||
objective = "binary:logistic")
|
nthread = 2, objective = "binary:logistic")
|
||||||
|
|
||||||
# xgb.DMatrix can also be saved using xgb.DMatrix.save
|
# xgb.DMatrix can also be saved using xgb.DMatrix.save
|
||||||
xgb.DMatrix.save(dtrain, "dtrain.buffer")
|
xgb.DMatrix.save(dtrain, "dtrain.buffer")
|
||||||
# to load it in, simply call xgb.DMatrix
|
# to load it in, simply call xgb.DMatrix
|
||||||
dtrain2 <- xgb.DMatrix("dtrain.buffer")
|
dtrain2 <- xgb.DMatrix("dtrain.buffer")
|
||||||
bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nround=2, watchlist=watchlist,
|
bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nround=2, watchlist=watchlist,
|
||||||
objective = "binary:logistic")
|
nthread = 2, objective = "binary:logistic")
|
||||||
# information can be extracted from xgb.DMatrix using getinfo
|
# information can be extracted from xgb.DMatrix using getinfo
|
||||||
label = getinfo(dtest, "label")
|
label = getinfo(dtest, "label")
|
||||||
pred <- predict(bst, dtest)
|
pred <- predict(bst, dtest)
|
||||||
|
|||||||
@ -11,7 +11,7 @@ watchlist <- list(eval = dtest, train = dtrain)
|
|||||||
#
|
#
|
||||||
print('start running example to start from a initial prediction')
|
print('start running example to start from a initial prediction')
|
||||||
# train xgboost for 1 round
|
# train xgboost for 1 round
|
||||||
param <- list(max.depth=2,eta=1,silent=1,objective='binary:logistic')
|
param <- list(max.depth=2,eta=1,nthread = 2, silent=1,objective='binary:logistic')
|
||||||
bst <- xgb.train( param, dtrain, 1, watchlist )
|
bst <- xgb.train( param, dtrain, 1, watchlist )
|
||||||
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
||||||
# do predict with output_margin=TRUE, will always give you margin values before logistic transformation
|
# do predict with output_margin=TRUE, will always give you margin values before logistic transformation
|
||||||
|
|||||||
@ -64,7 +64,7 @@ output_vector = df[,Y:=0][Improved == "Marked",Y:=1][,Y]
|
|||||||
# Following is the same process as other demo
|
# Following is the same process as other demo
|
||||||
cat("Learning...\n")
|
cat("Learning...\n")
|
||||||
bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9,
|
bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9,
|
||||||
eta = 1, nround = 10,objective = "binary:logistic")
|
eta = 1, nthread = 2, nround = 10,objective = "binary:logistic")
|
||||||
xgb.dump(bst, 'xgb.model.dump', with.stats = T)
|
xgb.dump(bst, 'xgb.model.dump', with.stats = T)
|
||||||
|
|
||||||
# sparse_matrix@Dimnames[[2]] represents the column names of the sparse matrix.
|
# sparse_matrix@Dimnames[[2]] represents the column names of the sparse matrix.
|
||||||
|
|||||||
@ -6,7 +6,7 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
|||||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||||
|
|
||||||
nround <- 2
|
nround <- 2
|
||||||
param <- list(max.depth=2,eta=1,silent=1,objective='binary:logistic')
|
param <- list(max.depth=2,eta=1,silent=1,nthread = 2, objective='binary:logistic')
|
||||||
|
|
||||||
cat('running cross validation\n')
|
cat('running cross validation\n')
|
||||||
# do cross validation, this will print result out as
|
# do cross validation, this will print result out as
|
||||||
|
|||||||
@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
|||||||
# note: for customized objective function, we leave objective as default
|
# note: for customized objective function, we leave objective as default
|
||||||
# note: what we are getting is margin value in prediction
|
# note: what we are getting is margin value in prediction
|
||||||
# you must know what you are doing
|
# you must know what you are doing
|
||||||
param <- list(max.depth=2,eta=1,silent=1)
|
param <- list(max.depth=2,eta=1,nthread = 2, silent=1)
|
||||||
watchlist <- list(eval = dtest, train = dtrain)
|
watchlist <- list(eval = dtest, train = dtrain)
|
||||||
num_round <- 2
|
num_round <- 2
|
||||||
|
|
||||||
|
|||||||
@ -15,7 +15,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
|||||||
# lambda is the L2 regularizer
|
# lambda is the L2 regularizer
|
||||||
# you can also set lambda_bias which is L2 regularizer on the bias term
|
# you can also set lambda_bias which is L2 regularizer on the bias term
|
||||||
param <- list(objective = "binary:logistic", booster = "gblinear",
|
param <- list(objective = "binary:logistic", booster = "gblinear",
|
||||||
alpha = 0.0001, lambda = 1)
|
nthread = 2, alpha = 0.0001, lambda = 1)
|
||||||
|
|
||||||
# normally, you do not need to set eta (step_size)
|
# normally, you do not need to set eta (step_size)
|
||||||
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
|
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
|
||||||
|
|||||||
@ -10,7 +10,7 @@ watchlist <- list(eval = dtest, train = dtrain)
|
|||||||
nround = 2
|
nround = 2
|
||||||
|
|
||||||
# training the model for two rounds
|
# training the model for two rounds
|
||||||
bst = xgb.train(param, dtrain, nround, watchlist)
|
bst = xgb.train(param, dtrain, nround, nthread = 2, watchlist)
|
||||||
cat('start testing prediction from first n trees\n')
|
cat('start testing prediction from first n trees\n')
|
||||||
labels <- getinfo(dtest,'label')
|
labels <- getinfo(dtest,'label')
|
||||||
|
|
||||||
|
|||||||
@ -10,7 +10,7 @@ watchlist <- list(eval = dtest, train = dtrain)
|
|||||||
nround = 5
|
nround = 5
|
||||||
|
|
||||||
# training the model for two rounds
|
# training the model for two rounds
|
||||||
bst = xgb.train(param, dtrain, nround, watchlist)
|
bst = xgb.train(param, dtrain, nround, nthread = 2, watchlist)
|
||||||
cat('start testing prediction from first n trees\n')
|
cat('start testing prediction from first n trees\n')
|
||||||
|
|
||||||
### predict using first 2 tree
|
### predict using first 2 tree
|
||||||
|
|||||||
@ -37,7 +37,7 @@ data(agaricus.test, package='xgboost')
|
|||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
eta = 1, nround = 2,objective = "binary:logistic")
|
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
pred <- predict(bst, test$data)
|
pred <- predict(bst, test$data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -78,7 +78,7 @@ This function only accepts an \code{xgb.DMatrix} object as the input.
|
|||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||||
history <- xgb.cv(data = dtrain, nround=3, nfold = 5, metrics=list("rmse","auc"),
|
history <- xgb.cv(data = dtrain, nround=3, nthread = 2, nfold = 5, metrics=list("rmse","auc"),
|
||||||
"max.depth"=3, "eta"=1, "objective"="binary:logistic")
|
"max.depth"=3, "eta"=1, "objective"="binary:logistic")
|
||||||
print(history)
|
print(history)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -35,7 +35,7 @@ data(agaricus.test, package='xgboost')
|
|||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
eta = 1, nround = 2,objective = "binary:logistic")
|
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
# save the model in file 'xgb.model.dump'
|
# save the model in file 'xgb.model.dump'
|
||||||
xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE)
|
xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE)
|
||||||
|
|
||||||
|
|||||||
@ -59,7 +59,7 @@ data(agaricus.train, package='xgboost')
|
|||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
|
|
||||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
eta = 1, nround = 2,objective = "binary:logistic")
|
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
|
|
||||||
# train$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
# train$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
||||||
xgb.importance(train$data@Dimnames[[2]], model = bst)
|
xgb.importance(train$data@Dimnames[[2]], model = bst)
|
||||||
|
|||||||
@ -18,7 +18,7 @@ data(agaricus.test, package='xgboost')
|
|||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
eta = 1, nround = 2,objective = "binary:logistic")
|
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
xgb.save(bst, 'xgb.model')
|
xgb.save(bst, 'xgb.model')
|
||||||
bst <- xgb.load('xgb.model')
|
bst <- xgb.load('xgb.model')
|
||||||
pred <- predict(bst, test$data)
|
pred <- predict(bst, test$data)
|
||||||
|
|||||||
@ -51,7 +51,7 @@ data(agaricus.train, package='xgboost')
|
|||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
|
|
||||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
eta = 1, nround = 2,objective = "binary:logistic")
|
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
|
|
||||||
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
||||||
xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], model = bst)
|
xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], model = bst)
|
||||||
|
|||||||
@ -31,7 +31,7 @@ data(agaricus.train, package='xgboost')
|
|||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
|
|
||||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
eta = 1, nround = 2,objective = "binary:logistic")
|
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
|
|
||||||
#train$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
#train$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
||||||
importance_matrix <- xgb.importance(train$data@Dimnames[[2]], model = bst)
|
importance_matrix <- xgb.importance(train$data@Dimnames[[2]], model = bst)
|
||||||
|
|||||||
@ -50,7 +50,7 @@ data(agaricus.train, package='xgboost')
|
|||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
|
|
||||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
eta = 1, nround = 2,objective = "binary:logistic")
|
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
|
|
||||||
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
||||||
xgb.plot.tree(agaricus.train$data@Dimnames[[2]], model = bst)
|
xgb.plot.tree(agaricus.train$data@Dimnames[[2]], model = bst)
|
||||||
|
|||||||
@ -20,7 +20,7 @@ data(agaricus.test, package='xgboost')
|
|||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
eta = 1, nround = 2,objective = "binary:logistic")
|
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
xgb.save(bst, 'xgb.model')
|
xgb.save(bst, 'xgb.model')
|
||||||
bst <- xgb.load('xgb.model')
|
bst <- xgb.load('xgb.model')
|
||||||
pred <- predict(bst, test$data)
|
pred <- predict(bst, test$data)
|
||||||
|
|||||||
@ -19,7 +19,7 @@ data(agaricus.test, package='xgboost')
|
|||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
eta = 1, nround = 2,objective = "binary:logistic")
|
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
raw <- xgb.save.raw(bst)
|
raw <- xgb.save.raw(bst)
|
||||||
bst <- xgb.load(raw)
|
bst <- xgb.load(raw)
|
||||||
pred <- predict(bst, test$data)
|
pred <- predict(bst, test$data)
|
||||||
|
|||||||
@ -121,6 +121,6 @@ evalerror <- function(preds, dtrain) {
|
|||||||
err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
|
err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
|
||||||
return(list(metric = "error", value = err))
|
return(list(metric = "error", value = err))
|
||||||
}
|
}
|
||||||
bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror)
|
bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist, logregobj, evalerror)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -59,7 +59,7 @@ data(agaricus.test, package='xgboost')
|
|||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
eta = 1, nround = 2,objective = "binary:logistic")
|
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
pred <- predict(bst, test$data)
|
pred <- predict(bst, test$data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -153,7 +153,7 @@ The code below is very usual. For more information, you can look at the document
|
|||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 4,
|
bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 4,
|
||||||
eta = 1, nround = 10,objective = "binary:logistic")
|
eta = 1, nthread = 2, nround = 10,objective = "binary:logistic")
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@ -141,10 +141,11 @@ We will train decision tree model using the following parameters:
|
|||||||
|
|
||||||
* `objective = "binary:logistic"`: we will train a binary classification model ;
|
* `objective = "binary:logistic"`: we will train a binary classification model ;
|
||||||
* `max.deph = 2`: the trees won't be deep, because our case is very simple ;
|
* `max.deph = 2`: the trees won't be deep, because our case is very simple ;
|
||||||
|
* `nthread = 2`: the number of cpu threads we are going to use;
|
||||||
* `nround = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction.
|
* `nround = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction.
|
||||||
|
|
||||||
```{r trainingSparse, message=F, warning=F}
|
```{r trainingSparse, message=F, warning=F}
|
||||||
bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic")
|
bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
|
||||||
```
|
```
|
||||||
|
|
||||||
> More complex the relationship between your features and your `label` is, more passes you need.
|
> More complex the relationship between your features and your `label` is, more passes you need.
|
||||||
@ -156,7 +157,7 @@ bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta
|
|||||||
Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R** matrix.
|
Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R** matrix.
|
||||||
|
|
||||||
```{r trainingDense, message=F, warning=F}
|
```{r trainingDense, message=F, warning=F}
|
||||||
bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic")
|
bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
|
||||||
```
|
```
|
||||||
|
|
||||||
#### xgb.DMatrix
|
#### xgb.DMatrix
|
||||||
@ -165,7 +166,7 @@ bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth
|
|||||||
|
|
||||||
```{r trainingDmatrix, message=F, warning=F}
|
```{r trainingDmatrix, message=F, warning=F}
|
||||||
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
|
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
|
||||||
bstDMatrix <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic")
|
bstDMatrix <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Verbose option
|
#### Verbose option
|
||||||
@ -176,17 +177,17 @@ One of the simplest way to see the training progress is to set the `verbose` opt
|
|||||||
|
|
||||||
```{r trainingVerbose0, message=T, warning=F}
|
```{r trainingVerbose0, message=T, warning=F}
|
||||||
# verbose = 0, no message
|
# verbose = 0, no message
|
||||||
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic", verbose = 0)
|
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 0)
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r trainingVerbose1, message=T, warning=F}
|
```{r trainingVerbose1, message=T, warning=F}
|
||||||
# verbose = 1, print evaluation metric
|
# verbose = 1, print evaluation metric
|
||||||
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic", verbose = 1)
|
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 1)
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r trainingVerbose2, message=T, warning=F}
|
```{r trainingVerbose2, message=T, warning=F}
|
||||||
# verbose = 2, also print information about tree
|
# verbose = 2, also print information about tree
|
||||||
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic", verbose = 2)
|
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 2)
|
||||||
```
|
```
|
||||||
|
|
||||||
Basic prediction using Xgboost
|
Basic prediction using Xgboost
|
||||||
@ -279,7 +280,7 @@ For the purpose of this example, we use `watchlist` parameter. It is a list of `
|
|||||||
```{r watchlist, message=F, warning=F}
|
```{r watchlist, message=F, warning=F}
|
||||||
watchlist <- list(train=dtrain, test=dtest)
|
watchlist <- list(train=dtrain, test=dtest)
|
||||||
|
|
||||||
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, objective = "binary:logistic")
|
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, objective = "binary:logistic")
|
||||||
```
|
```
|
||||||
|
|
||||||
**Xgboost** has computed at each round the same average error metric than seen above (we set `nround` to 2, that is why we have two lines). Obviously, the `train-error` number is related to the training dataset (the one the algorithm learns from) and the `test-error` number to the test dataset.
|
**Xgboost** has computed at each round the same average error metric than seen above (we set `nround` to 2, that is why we have two lines). Obviously, the `train-error` number is related to the training dataset (the one the algorithm learns from) and the `test-error` number to the test dataset.
|
||||||
@ -291,7 +292,7 @@ If with your own dataset you have not such results, you should think about how y
|
|||||||
For a better understanding of the learning progression, you may want to have some specific metric or even use multiple evaluation metrics.
|
For a better understanding of the learning progression, you may want to have some specific metric or even use multiple evaluation metrics.
|
||||||
|
|
||||||
```{r watchlist2, message=F, warning=F}
|
```{r watchlist2, message=F, warning=F}
|
||||||
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
|
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
|
||||||
```
|
```
|
||||||
|
|
||||||
> `eval.metric` allows us to monitor two new metrics for each round, `logloss` and `error`.
|
> `eval.metric` allows us to monitor two new metrics for each round, `logloss` and `error`.
|
||||||
@ -302,7 +303,7 @@ Linear boosting
|
|||||||
Until know, all the learnings we have performed were based on boosting trees. **Xgboost** implements a second algorithm, based on linear boosting. The only difference with previous command is `booster = "gblinear"` parameter (and removing `eta` parameter).
|
Until know, all the learnings we have performed were based on boosting trees. **Xgboost** implements a second algorithm, based on linear boosting. The only difference with previous command is `booster = "gblinear"` parameter (and removing `eta` parameter).
|
||||||
|
|
||||||
```{r linearBoosting, message=F, warning=F}
|
```{r linearBoosting, message=F, warning=F}
|
||||||
bst <- xgb.train(data=dtrain, booster = "gblinear", max.depth=2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
|
bst <- xgb.train(data=dtrain, booster = "gblinear", max.depth=2, nthread = 2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
|
||||||
```
|
```
|
||||||
|
|
||||||
In this specific case, *linear boosting* gets sligtly better performance metrics than decision trees based algorithm.
|
In this specific case, *linear boosting* gets sligtly better performance metrics than decision trees based algorithm.
|
||||||
@ -320,7 +321,7 @@ Like saving models, `xgb.DMatrix` object (which groups both dataset and outcome)
|
|||||||
xgb.DMatrix.save(dtrain, "dtrain.buffer")
|
xgb.DMatrix.save(dtrain, "dtrain.buffer")
|
||||||
# to load it in, simply call xgb.DMatrix
|
# to load it in, simply call xgb.DMatrix
|
||||||
dtrain2 <- xgb.DMatrix("dtrain.buffer")
|
dtrain2 <- xgb.DMatrix("dtrain.buffer")
|
||||||
bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nround=2, watchlist=watchlist, objective = "binary:logistic")
|
bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, objective = "binary:logistic")
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r DMatrixDel, include=FALSE}
|
```{r DMatrixDel, include=FALSE}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user