From 41b080e35fd0b90c71c13f7c33ac8c1194db6165 Mon Sep 17 00:00:00 2001 From: hetong Date: Tue, 3 Mar 2015 00:21:24 -0800 Subject: [PATCH] To submit to CRAN we cannot use more than 2 threads in our examples/vignettes --- Makefile | 6 +++--- R-package/NAMESPACE | 2 +- R-package/R/predict.xgb.Booster.R | 2 +- R-package/R/xgb.cv.R | 2 +- R-package/R/xgb.dump.R | 4 ++-- R-package/R/xgb.importance.R | 2 +- R-package/R/xgb.load.R | 2 +- R-package/R/xgb.model.dt.tree.R | 4 ++-- R-package/R/xgb.plot.importance.R | 4 ++-- R-package/R/xgb.plot.tree.R | 4 ++-- R-package/R/xgb.save.R | 2 +- R-package/R/xgb.save.raw.R | 2 +- R-package/R/xgb.train.R | 2 +- R-package/R/xgboost.R | 2 +- R-package/demo/basic_walkthrough.R | 19 ++++++++++--------- R-package/demo/boost_from_prediction.R | 2 +- R-package/demo/create_sparse_matrix.R | 2 +- R-package/demo/cross_validation.R | 2 +- R-package/demo/custom_objective.R | 2 +- R-package/demo/generalized_linear_model.R | 2 +- R-package/demo/predict_first_ntree.R | 2 +- R-package/demo/predict_leaf_indices.R | 2 +- R-package/man/predict-xgb.Booster-method.Rd | 2 +- R-package/man/xgb.cv.Rd | 2 +- R-package/man/xgb.dump.Rd | 2 +- R-package/man/xgb.importance.Rd | 2 +- R-package/man/xgb.load.Rd | 2 +- R-package/man/xgb.model.dt.tree.Rd | 2 +- R-package/man/xgb.plot.importance.Rd | 2 +- R-package/man/xgb.plot.tree.Rd | 2 +- R-package/man/xgb.save.Rd | 2 +- R-package/man/xgb.save.raw.Rd | 2 +- R-package/man/xgb.train.Rd | 2 +- R-package/man/xgboost.Rd | 2 +- R-package/vignettes/discoverYourData.Rmd | 2 +- R-package/vignettes/xgboostPresentation.Rmd | 21 +++++++++++---------- 36 files changed, 61 insertions(+), 59 deletions(-) diff --git a/Makefile b/Makefile index 5a51563f6..cddeb3c6f 100644 --- a/Makefile +++ b/Makefile @@ -92,10 +92,10 @@ Rpack: cp ./LICENSE xgboost cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars cp xgboost/src/Makevars xgboost/src/Makevars.win - #R CMD build --no-build-vignettes xgboost + # R CMD build --no-build-vignettes xgboost R CMD build xgboost - rm -rf xgboost - R CMD check --as-cran xgboost*.tar.gz + #rm -rf xgboost + #R CMD check --as-cran xgboost*.tar.gz clean: $(RM) -rf $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~ diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 37a158d6b..99c97c729 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -24,6 +24,7 @@ importFrom(Ckmeans.1d.dp,Ckmeans.1d.dp) importFrom(DiagrammeR,mermaid) importFrom(Matrix,cBind) importFrom(Matrix,colSums) +importFrom(Matrix,sparseVector) importFrom(data.table,":=") importFrom(data.table,as.data.table) importFrom(data.table,copy) @@ -51,4 +52,3 @@ importFrom(stringr,str_match) importFrom(stringr,str_replace) importFrom(stringr,str_split) importFrom(stringr,str_trim) -import(vcd) diff --git a/R-package/R/predict.xgb.Booster.R b/R-package/R/predict.xgb.Booster.R index 52c40df9b..0c50b2504 100644 --- a/R-package/R/predict.xgb.Booster.R +++ b/R-package/R/predict.xgb.Booster.R @@ -26,7 +26,7 @@ setClass("xgb.Booster", #' train <- agaricus.train #' test <- agaricus.test #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, -#' eta = 1, nround = 2,objective = "binary:logistic") +#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #' pred <- predict(bst, test$data) #' @export #' diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index 18c8cb716..167055327 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -66,7 +66,7 @@ #' @examples #' data(agaricus.train, package='xgboost') #' dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) -#' history <- xgb.cv(data = dtrain, nround=3, nfold = 5, metrics=list("rmse","auc"), +#' history <- xgb.cv(data = dtrain, nround=3, nthread = 2, nfold = 5, metrics=list("rmse","auc"), #' "max.depth"=3, "eta"=1, "objective"="binary:logistic") #' print(history) #' @export diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R index fa5fe4149..10ac18b47 100644 --- a/R-package/R/xgb.dump.R +++ b/R-package/R/xgb.dump.R @@ -29,7 +29,7 @@ #' train <- agaricus.train #' test <- agaricus.test #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, -#' eta = 1, nround = 2,objective = "binary:logistic") +#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #' # save the model in file 'xgb.model.dump' #' xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE) #' @@ -68,4 +68,4 @@ xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with.stats=FALSE) { # Avoid error messages during CRAN check. # The reason is that these variables are never declared # They are mainly column names inferred by Data.table... -globalVariables(c("Lines", ".")) \ No newline at end of file +globalVariables(c("Lines", ".")) diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R index c916a02f6..f7696d53e 100644 --- a/R-package/R/xgb.importance.R +++ b/R-package/R/xgb.importance.R @@ -57,7 +57,7 @@ #' train <- agaricus.train #' #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, -#' eta = 1, nround = 2,objective = "binary:logistic") +#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #' #' # train$data@@Dimnames[[2]] represents the column names of the sparse matrix. #' xgb.importance(train$data@@Dimnames[[2]], model = bst) diff --git a/R-package/R/xgb.load.R b/R-package/R/xgb.load.R index 33d440530..b19345b60 100644 --- a/R-package/R/xgb.load.R +++ b/R-package/R/xgb.load.R @@ -10,7 +10,7 @@ #' train <- agaricus.train #' test <- agaricus.test #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, -#' eta = 1, nround = 2,objective = "binary:logistic") +#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #' xgb.save(bst, 'xgb.model') #' bst <- xgb.load('xgb.model') #' pred <- predict(bst, test$data) diff --git a/R-package/R/xgb.model.dt.tree.R b/R-package/R/xgb.model.dt.tree.R index 42b3657b0..9d8bc5735 100644 --- a/R-package/R/xgb.model.dt.tree.R +++ b/R-package/R/xgb.model.dt.tree.R @@ -49,7 +49,7 @@ #' train <- agaricus.train #' #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, -#' eta = 1, nround = 2,objective = "binary:logistic") +#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #' #' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix. #' xgb.model.dt.tree(agaricus.train$data@@Dimnames[[2]], model = bst) @@ -166,4 +166,4 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model # Avoid error messages during CRAN check. # The reason is that these variables are never declared # They are mainly column names inferred by Data.table... -globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence")) \ No newline at end of file +globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence")) diff --git a/R-package/R/xgb.plot.importance.R b/R-package/R/xgb.plot.importance.R index b34a16c8b..66dcadaec 100644 --- a/R-package/R/xgb.plot.importance.R +++ b/R-package/R/xgb.plot.importance.R @@ -33,7 +33,7 @@ #' train <- agaricus.train #' #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, -#' eta = 1, nround = 2,objective = "binary:logistic") +#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #' #' #train$data@@Dimnames[[2]] represents the column names of the sparse matrix. #' importance_matrix <- xgb.importance(train$data@@Dimnames[[2]], model = bst) @@ -59,4 +59,4 @@ xgb.plot.importance <- function(importance_matrix = NULL, numberOfClusters = c(1 # Avoid error messages during CRAN check. # The reason is that these variables are never declared # They are mainly column names inferred by Data.table... -globalVariables(c("Feature", "Gain", "Cluster")) \ No newline at end of file +globalVariables(c("Feature", "Gain", "Cluster")) diff --git a/R-package/R/xgb.plot.tree.R b/R-package/R/xgb.plot.tree.R index 0747f0a14..7f7d74d57 100644 --- a/R-package/R/xgb.plot.tree.R +++ b/R-package/R/xgb.plot.tree.R @@ -48,7 +48,7 @@ #' train <- agaricus.train #' #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, -#' eta = 1, nround = 2,objective = "binary:logistic") +#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #' #' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix. #' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], model = bst) @@ -91,4 +91,4 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU # Avoid error messages during CRAN check. # The reason is that these variables are never declared # They are mainly column names inferred by Data.table... -globalVariables(c("Feature", "yesPath", "ID", "Cover", "Quality", "Split", "Yes", "Yes.Feature", "noPath", "No", "No.Feature", ".")) \ No newline at end of file +globalVariables(c("Feature", "yesPath", "ID", "Cover", "Quality", "Split", "Yes", "Yes.Feature", "noPath", "No", "No.Feature", ".")) diff --git a/R-package/R/xgb.save.R b/R-package/R/xgb.save.R index 59c5d2ecd..2600b8cff 100644 --- a/R-package/R/xgb.save.R +++ b/R-package/R/xgb.save.R @@ -11,7 +11,7 @@ #' train <- agaricus.train #' test <- agaricus.test #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, -#' eta = 1, nround = 2,objective = "binary:logistic") +#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #' xgb.save(bst, 'xgb.model') #' bst <- xgb.load('xgb.model') #' pred <- predict(bst, test$data) diff --git a/R-package/R/xgb.save.raw.R b/R-package/R/xgb.save.raw.R index 7f3a2df21..e885e6e7e 100644 --- a/R-package/R/xgb.save.raw.R +++ b/R-package/R/xgb.save.raw.R @@ -11,7 +11,7 @@ #' train <- agaricus.train #' test <- agaricus.test #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, -#' eta = 1, nround = 2,objective = "binary:logistic") +#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #' raw <- xgb.save.raw(bst) #' bst <- xgb.load(raw) #' pred <- predict(bst, test$data) diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index c3db198ae..79ef3b4a1 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -108,7 +108,7 @@ #' err <- as.numeric(sum(labels != (preds > 0)))/length(labels) #' return(list(metric = "error", value = err)) #' } -#' bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror) +#' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist, logregobj, evalerror) #' @export #' xgb.train <- function(params=list(), data, nrounds, watchlist = list(), diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R index f7c372aad..f20b260c9 100644 --- a/R-package/R/xgboost.R +++ b/R-package/R/xgboost.R @@ -45,7 +45,7 @@ #' train <- agaricus.train #' test <- agaricus.test #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, -#' eta = 1, nround = 2,objective = "binary:logistic") +#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #' pred <- predict(bst, test$data) #' #' @export diff --git a/R-package/demo/basic_walkthrough.R b/R-package/demo/basic_walkthrough.R index c00765f58..762a1c8e8 100644 --- a/R-package/demo/basic_walkthrough.R +++ b/R-package/demo/basic_walkthrough.R @@ -16,27 +16,28 @@ class(train$data) # use sparse matrix when your feature is sparse(e.g. when you using one-hot encoding vector) print("training xgboost with sparseMatrix") bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2, - objective = "binary:logistic") + nthread = 2, objective = "binary:logistic") # alternatively, you can put in dense matrix, i.e. basic R-matrix print("training xgboost with Matrix") bst <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nround = 2, - objective = "binary:logistic") + nthread = 2, objective = "binary:logistic") # you can also put in xgb.DMatrix object, stores label, data and other meta datas needed for advanced features print("training xgboost with xgb.DMatrix") dtrain <- xgb.DMatrix(data = train$data, label = train$label) -bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic") +bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, nthread = 2, + objective = "binary:logistic") # Verbose = 0,1,2 print ('train xgboost with verbose 0, no message') bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, - objective = "binary:logistic", verbose = 0) + nthread = 2, objective = "binary:logistic", verbose = 0) print ('train xgboost with verbose 1, print evaluation metric') bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, - objective = "binary:logistic", verbose = 1) + nthread = 2, objective = "binary:logistic", verbose = 1) print ('train xgboost with verbose 2, also print information about tree') bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, - objective = "binary:logistic", verbose = 2) + nthread = 2, objective = "binary:logistic", verbose = 2) # you can also specify data as file path to a LibSVM format input # since we do not have this file with us, the following line is just for illustration @@ -77,19 +78,19 @@ watchlist <- list(train=dtrain, test=dtest) # watchlist allows us to monitor the evaluation result on all data in the list print ('train xgboost using xgb.train with watchlist') bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, - objective = "binary:logistic") + nthread = 2, objective = "binary:logistic") # we can change evaluation metrics, or use multiple evaluation metrics print ('train xgboost using xgb.train with watchlist, watch logloss and error') bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", - objective = "binary:logistic") + nthread = 2, objective = "binary:logistic") # xgb.DMatrix can also be saved using xgb.DMatrix.save xgb.DMatrix.save(dtrain, "dtrain.buffer") # to load it in, simply call xgb.DMatrix dtrain2 <- xgb.DMatrix("dtrain.buffer") bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nround=2, watchlist=watchlist, - objective = "binary:logistic") + nthread = 2, objective = "binary:logistic") # information can be extracted from xgb.DMatrix using getinfo label = getinfo(dtest, "label") pred <- predict(bst, dtest) diff --git a/R-package/demo/boost_from_prediction.R b/R-package/demo/boost_from_prediction.R index bbf45f4a0..9d7db806b 100644 --- a/R-package/demo/boost_from_prediction.R +++ b/R-package/demo/boost_from_prediction.R @@ -11,7 +11,7 @@ watchlist <- list(eval = dtest, train = dtrain) # print('start running example to start from a initial prediction') # train xgboost for 1 round -param <- list(max.depth=2,eta=1,silent=1,objective='binary:logistic') +param <- list(max.depth=2,eta=1,nthread = 2, silent=1,objective='binary:logistic') bst <- xgb.train( param, dtrain, 1, watchlist ) # Note: we need the margin value instead of transformed prediction in set_base_margin # do predict with output_margin=TRUE, will always give you margin values before logistic transformation diff --git a/R-package/demo/create_sparse_matrix.R b/R-package/demo/create_sparse_matrix.R index ac96510a3..e3a536cfe 100644 --- a/R-package/demo/create_sparse_matrix.R +++ b/R-package/demo/create_sparse_matrix.R @@ -64,7 +64,7 @@ output_vector = df[,Y:=0][Improved == "Marked",Y:=1][,Y] # Following is the same process as other demo cat("Learning...\n") bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9, - eta = 1, nround = 10,objective = "binary:logistic") + eta = 1, nthread = 2, nround = 10,objective = "binary:logistic") xgb.dump(bst, 'xgb.model.dump', with.stats = T) # sparse_matrix@Dimnames[[2]] represents the column names of the sparse matrix. diff --git a/R-package/demo/cross_validation.R b/R-package/demo/cross_validation.R index ed78f93ed..fbb38f6d8 100644 --- a/R-package/demo/cross_validation.R +++ b/R-package/demo/cross_validation.R @@ -6,7 +6,7 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) nround <- 2 -param <- list(max.depth=2,eta=1,silent=1,objective='binary:logistic') +param <- list(max.depth=2,eta=1,silent=1,nthread = 2, objective='binary:logistic') cat('running cross validation\n') # do cross validation, this will print result out as diff --git a/R-package/demo/custom_objective.R b/R-package/demo/custom_objective.R index 6a2f34c15..b0a0a02ca 100644 --- a/R-package/demo/custom_objective.R +++ b/R-package/demo/custom_objective.R @@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) # note: for customized objective function, we leave objective as default # note: what we are getting is margin value in prediction # you must know what you are doing -param <- list(max.depth=2,eta=1,silent=1) +param <- list(max.depth=2,eta=1,nthread = 2, silent=1) watchlist <- list(eval = dtest, train = dtrain) num_round <- 2 diff --git a/R-package/demo/generalized_linear_model.R b/R-package/demo/generalized_linear_model.R index 5c931c986..3c2cdb542 100644 --- a/R-package/demo/generalized_linear_model.R +++ b/R-package/demo/generalized_linear_model.R @@ -15,7 +15,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) # lambda is the L2 regularizer # you can also set lambda_bias which is L2 regularizer on the bias term param <- list(objective = "binary:logistic", booster = "gblinear", - alpha = 0.0001, lambda = 1) + nthread = 2, alpha = 0.0001, lambda = 1) # normally, you do not need to set eta (step_size) # XGBoost uses a parallel coordinate descent algorithm (shotgun), diff --git a/R-package/demo/predict_first_ntree.R b/R-package/demo/predict_first_ntree.R index 964203e9f..422201b0a 100644 --- a/R-package/demo/predict_first_ntree.R +++ b/R-package/demo/predict_first_ntree.R @@ -10,7 +10,7 @@ watchlist <- list(eval = dtest, train = dtrain) nround = 2 # training the model for two rounds -bst = xgb.train(param, dtrain, nround, watchlist) +bst = xgb.train(param, dtrain, nround, nthread = 2, watchlist) cat('start testing prediction from first n trees\n') labels <- getinfo(dtest,'label') diff --git a/R-package/demo/predict_leaf_indices.R b/R-package/demo/predict_leaf_indices.R index 480578c1d..c03a17955 100644 --- a/R-package/demo/predict_leaf_indices.R +++ b/R-package/demo/predict_leaf_indices.R @@ -10,7 +10,7 @@ watchlist <- list(eval = dtest, train = dtrain) nround = 5 # training the model for two rounds -bst = xgb.train(param, dtrain, nround, watchlist) +bst = xgb.train(param, dtrain, nround, nthread = 2, watchlist) cat('start testing prediction from first n trees\n') ### predict using first 2 tree diff --git a/R-package/man/predict-xgb.Booster-method.Rd b/R-package/man/predict-xgb.Booster-method.Rd index 17d0e5a62..06fdb2ca8 100644 --- a/R-package/man/predict-xgb.Booster-method.Rd +++ b/R-package/man/predict-xgb.Booster-method.Rd @@ -37,7 +37,7 @@ data(agaricus.test, package='xgboost') train <- agaricus.train test <- agaricus.test bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 1, nround = 2,objective = "binary:logistic") + eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") pred <- predict(bst, test$data) } diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd index 75b31cee1..93feba569 100644 --- a/R-package/man/xgb.cv.Rd +++ b/R-package/man/xgb.cv.Rd @@ -78,7 +78,7 @@ This function only accepts an \code{xgb.DMatrix} object as the input. \examples{ data(agaricus.train, package='xgboost') dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) -history <- xgb.cv(data = dtrain, nround=3, nfold = 5, metrics=list("rmse","auc"), +history <- xgb.cv(data = dtrain, nround=3, nthread = 2, nfold = 5, metrics=list("rmse","auc"), "max.depth"=3, "eta"=1, "objective"="binary:logistic") print(history) } diff --git a/R-package/man/xgb.dump.Rd b/R-package/man/xgb.dump.Rd index 7958a72e8..3c074928d 100644 --- a/R-package/man/xgb.dump.Rd +++ b/R-package/man/xgb.dump.Rd @@ -35,7 +35,7 @@ data(agaricus.test, package='xgboost') train <- agaricus.train test <- agaricus.test bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 1, nround = 2,objective = "binary:logistic") + eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") # save the model in file 'xgb.model.dump' xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE) diff --git a/R-package/man/xgb.importance.Rd b/R-package/man/xgb.importance.Rd index f57251755..674a54622 100644 --- a/R-package/man/xgb.importance.Rd +++ b/R-package/man/xgb.importance.Rd @@ -59,7 +59,7 @@ data(agaricus.train, package='xgboost') train <- agaricus.train bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 1, nround = 2,objective = "binary:logistic") + eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") # train$data@Dimnames[[2]] represents the column names of the sparse matrix. xgb.importance(train$data@Dimnames[[2]], model = bst) diff --git a/R-package/man/xgb.load.Rd b/R-package/man/xgb.load.Rd index 433b38c79..4caef6239 100644 --- a/R-package/man/xgb.load.Rd +++ b/R-package/man/xgb.load.Rd @@ -18,7 +18,7 @@ data(agaricus.test, package='xgboost') train <- agaricus.train test <- agaricus.test bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 1, nround = 2,objective = "binary:logistic") + eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") xgb.save(bst, 'xgb.model') bst <- xgb.load('xgb.model') pred <- predict(bst, test$data) diff --git a/R-package/man/xgb.model.dt.tree.Rd b/R-package/man/xgb.model.dt.tree.Rd index 604607209..df308a954 100644 --- a/R-package/man/xgb.model.dt.tree.Rd +++ b/R-package/man/xgb.model.dt.tree.Rd @@ -51,7 +51,7 @@ data(agaricus.train, package='xgboost') train <- agaricus.train bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 1, nround = 2,objective = "binary:logistic") + eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix. xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], model = bst) diff --git a/R-package/man/xgb.plot.importance.Rd b/R-package/man/xgb.plot.importance.Rd index 5a8dab5c5..0797b89c2 100644 --- a/R-package/man/xgb.plot.importance.Rd +++ b/R-package/man/xgb.plot.importance.Rd @@ -31,7 +31,7 @@ data(agaricus.train, package='xgboost') train <- agaricus.train bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 1, nround = 2,objective = "binary:logistic") + eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #train$data@Dimnames[[2]] represents the column names of the sparse matrix. importance_matrix <- xgb.importance(train$data@Dimnames[[2]], model = bst) diff --git a/R-package/man/xgb.plot.tree.Rd b/R-package/man/xgb.plot.tree.Rd index 8aec827ec..476dbda11 100644 --- a/R-package/man/xgb.plot.tree.Rd +++ b/R-package/man/xgb.plot.tree.Rd @@ -50,7 +50,7 @@ data(agaricus.train, package='xgboost') train <- agaricus.train bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 1, nround = 2,objective = "binary:logistic") + eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix. xgb.plot.tree(agaricus.train$data@Dimnames[[2]], model = bst) diff --git a/R-package/man/xgb.save.Rd b/R-package/man/xgb.save.Rd index ded444446..6e6b23e54 100644 --- a/R-package/man/xgb.save.Rd +++ b/R-package/man/xgb.save.Rd @@ -20,7 +20,7 @@ data(agaricus.test, package='xgboost') train <- agaricus.train test <- agaricus.test bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 1, nround = 2,objective = "binary:logistic") + eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") xgb.save(bst, 'xgb.model') bst <- xgb.load('xgb.model') pred <- predict(bst, test$data) diff --git a/R-package/man/xgb.save.raw.Rd b/R-package/man/xgb.save.raw.Rd index f169a3d3d..94ae29416 100644 --- a/R-package/man/xgb.save.raw.Rd +++ b/R-package/man/xgb.save.raw.Rd @@ -19,7 +19,7 @@ data(agaricus.test, package='xgboost') train <- agaricus.train test <- agaricus.test bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 1, nround = 2,objective = "binary:logistic") + eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") raw <- xgb.save.raw(bst) bst <- xgb.load(raw) pred <- predict(bst, test$data) diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd index 4a4c91bfc..1c4376388 100644 --- a/R-package/man/xgb.train.Rd +++ b/R-package/man/xgb.train.Rd @@ -121,6 +121,6 @@ evalerror <- function(preds, dtrain) { err <- as.numeric(sum(labels != (preds > 0)))/length(labels) return(list(metric = "error", value = err)) } -bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror) +bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist, logregobj, evalerror) } diff --git a/R-package/man/xgboost.Rd b/R-package/man/xgboost.Rd index c92c811bb..cb9da74f7 100644 --- a/R-package/man/xgboost.Rd +++ b/R-package/man/xgboost.Rd @@ -59,7 +59,7 @@ data(agaricus.test, package='xgboost') train <- agaricus.train test <- agaricus.test bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 1, nround = 2,objective = "binary:logistic") + eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") pred <- predict(bst, test$data) } diff --git a/R-package/vignettes/discoverYourData.Rmd b/R-package/vignettes/discoverYourData.Rmd index 309b5042d..2f8f5dddb 100644 --- a/R-package/vignettes/discoverYourData.Rmd +++ b/R-package/vignettes/discoverYourData.Rmd @@ -153,7 +153,7 @@ The code below is very usual. For more information, you can look at the document ```{r} bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 4, - eta = 1, nround = 10,objective = "binary:logistic") + eta = 1, nthread = 2, nround = 10,objective = "binary:logistic") ``` diff --git a/R-package/vignettes/xgboostPresentation.Rmd b/R-package/vignettes/xgboostPresentation.Rmd index b6491f386..6e0ca3771 100644 --- a/R-package/vignettes/xgboostPresentation.Rmd +++ b/R-package/vignettes/xgboostPresentation.Rmd @@ -141,10 +141,11 @@ We will train decision tree model using the following parameters: * `objective = "binary:logistic"`: we will train a binary classification model ; * `max.deph = 2`: the trees won't be deep, because our case is very simple ; +* `nthread = 2`: the number of cpu threads we are going to use; * `nround = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction. ```{r trainingSparse, message=F, warning=F} -bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic") +bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic") ``` > More complex the relationship between your features and your `label` is, more passes you need. @@ -156,7 +157,7 @@ bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R** matrix. ```{r trainingDense, message=F, warning=F} -bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic") +bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic") ``` #### xgb.DMatrix @@ -165,7 +166,7 @@ bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth ```{r trainingDmatrix, message=F, warning=F} dtrain <- xgb.DMatrix(data = train$data, label = train$label) -bstDMatrix <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic") +bstDMatrix <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic") ``` #### Verbose option @@ -176,17 +177,17 @@ One of the simplest way to see the training progress is to set the `verbose` opt ```{r trainingVerbose0, message=T, warning=F} # verbose = 0, no message -bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic", verbose = 0) +bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 0) ``` ```{r trainingVerbose1, message=T, warning=F} # verbose = 1, print evaluation metric -bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic", verbose = 1) +bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 1) ``` ```{r trainingVerbose2, message=T, warning=F} # verbose = 2, also print information about tree -bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, objective = "binary:logistic", verbose = 2) +bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 2) ``` Basic prediction using Xgboost @@ -279,7 +280,7 @@ For the purpose of this example, we use `watchlist` parameter. It is a list of ` ```{r watchlist, message=F, warning=F} watchlist <- list(train=dtrain, test=dtest) -bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, objective = "binary:logistic") +bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, objective = "binary:logistic") ``` **Xgboost** has computed at each round the same average error metric than seen above (we set `nround` to 2, that is why we have two lines). Obviously, the `train-error` number is related to the training dataset (the one the algorithm learns from) and the `test-error` number to the test dataset. @@ -291,7 +292,7 @@ If with your own dataset you have not such results, you should think about how y For a better understanding of the learning progression, you may want to have some specific metric or even use multiple evaluation metrics. ```{r watchlist2, message=F, warning=F} -bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic") +bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic") ``` > `eval.metric` allows us to monitor two new metrics for each round, `logloss` and `error`. @@ -302,7 +303,7 @@ Linear boosting Until know, all the learnings we have performed were based on boosting trees. **Xgboost** implements a second algorithm, based on linear boosting. The only difference with previous command is `booster = "gblinear"` parameter (and removing `eta` parameter). ```{r linearBoosting, message=F, warning=F} -bst <- xgb.train(data=dtrain, booster = "gblinear", max.depth=2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic") +bst <- xgb.train(data=dtrain, booster = "gblinear", max.depth=2, nthread = 2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic") ``` In this specific case, *linear boosting* gets sligtly better performance metrics than decision trees based algorithm. @@ -320,7 +321,7 @@ Like saving models, `xgb.DMatrix` object (which groups both dataset and outcome) xgb.DMatrix.save(dtrain, "dtrain.buffer") # to load it in, simply call xgb.DMatrix dtrain2 <- xgb.DMatrix("dtrain.buffer") -bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nround=2, watchlist=watchlist, objective = "binary:logistic") +bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, objective = "binary:logistic") ``` ```{r DMatrixDel, include=FALSE}