add basic walkthrough

2014-09-06 10:11:45 -07:00
parent 8ad9293437
commit b858283ec5
4 changed files with 111 additions and 10 deletions
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -29,6 +29,9 @@
 #' @param feval custimized evaluation function. Returns 
 #'   \code{list(metric='metric-name', value='metric-value')} with given 
 #'   prediction and dtrain,
 #' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print 
 #'   information of performance. If 2, xgboost will print information of both
 #'
 #' @param ... other parameters to pass to \code{params}.
 #' 
 #' @details 
@@ -65,7 +68,7 @@
 #' @export
 #' 
 xgb.train <- function(params=list(), data, nrounds, watchlist = list(), 
-                      obj = NULL, feval = NULL, ...) {
+                      obj = NULL, feval = NULL, verbose = 1, ...) {
  dtrain <- data
  if (typeof(params) != "list") {
    stop("xgb.train: first argument params must be list")
@@ -73,7 +76,17 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
  if (class(dtrain) != "xgb.DMatrix") {
    stop("xgb.train: second argument dtrain must be xgb.DMatrix")
  }
  if (verbose > 1) {
    params <- append(params, list(silent = 0))
  } else {
    params <- append(params, list(silent = 1))
  }
  if (length(watchlist) != 0 && verbose == 0) {
    warning('watchlist is provided but verbose=0, no evaluation information will be printed')
    watchlist <- list()
  }
  params = append(params, list(...))
  bst <- xgb.Booster(params, append(watchlist, dtrain))
  for (i in 1:nrounds) {
    succ <- xgb.iter.update(bst, dtrain, i - 1, obj)
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -40,14 +40,7 @@
 #' 
 xgboost <- function(data = NULL, label = NULL, params = list(), nrounds, 
                    verbose = 1, ...) {
-  dtrain <- xgb.get.DMatrix(data, label)
+  dtrain <- xgb.get.DMatrix(data, label)  
  if (verbose > 1) {
    silent <- 0 
  } else {
    silent <- 1
  }
  params <- append(params, list(silent = silent))
  params <- append(params, list(...))
  if (verbose > 0) {
@@ -56,7 +49,7 @@ xgboost <- function(data = NULL, label = NULL, params = list(), nrounds,
    watchlist <- list()
  }
-  bst <- xgb.train(params, dtrain, nrounds, watchlist)
+  bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose=verbose)
  return(bst)
 } 
--- a/R-package/data/README.md
+++ b/R-package/data/README.md
@@ -0,0 +1,2 @@
 This folder contains processed example dataset used by the demos.
 Copyright of the dataset belongs to the original copyright holder
--- a/R-package/demo/basic_walkthrough.R
+++ b/R-package/demo/basic_walkthrough.R
@@ -0,0 +1,93 @@
 require(xgboost)
 require(methods)
 # we load in the agaricus dataset
 # In this example, we are aiming to predict whether a mushroom can be eated
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
 dtrain <- agaricus.train
 dtest <- agaricus.test
 # the loaded data is stored in sparseMatrix, and label is a numeric vector in {0,1}
 class(dtrain$label)
 class(dtrain$data)
 #-------------Basic Training using XGBoost-----------------
 # this is the basic usage of xgboost you can put matrix in data field
 # note: we are puting in sparse matrix here, xgboost naturally handles sparse input
 # use sparse matrix when your feature is sparse(e.g. when you using one-hot encoding vector)
 print("training xgboost with sparseMatrix")
 bst <- xgboost(data = dtrain$data, label = dtrain$label, max_depth = 2, eta = 1, nround = 2,
               objective = "binary:logistic")
 # alternatively, you can put in dense matrix, i.e. basic R-matrix
 print("training xgboost with Matrix")
 bst <- xgboost(data = as.matrix(dtrain$data), label = dtrain$label, max_depth = 2, eta = 1, nround = 2,
               objective = "binary:logistic")
 # you can also put in xgb.DMatrix object, stores label, data and other meta datas needed for advanced features
 print("training xgboost with xgb.DMatrix")
 dmat <- xgb.DMatrix(data = dtrain$data, label = dtrain$label)
 bst <- xgboost(data = dmat, max_depth = 2, eta = 1, nround = 2, objective = "binary:logistic")
 # Verbose = 0,1,2
 print ('train xgboost with verbose 0, no message')
 bst <- xgboost(data = dmat, max_depth = 2, eta = 1, nround = 2,
               objective = "binary:logistic", verbose = 0)
 print ('train xgboost with verbose 1, print evaluation metric')
 bst <- xgboost(data = dmat, max_depth = 2, eta = 1, nround = 2,
               objective = "binary:logistic", verbose = 1)
 print ('train xgboost with verbose 2, also print information about tree')
 bst <- xgboost(data = dmat, max_depth = 2, eta = 1, nround = 2,
               objective = "binary:logistic", verbose = 2)
 # you can also specify data as file path to a LibSVM format input
 # since we do not have this file with us, the following line is just for illustration
 # bst <- xgboost(data = 'agaricus.train.svm', max_depth = 2, eta = 1, nround = 2,objective = "binary:logistic")
 #--------------------basic prediction using xgboost--------------
 # you can do prediction using the following line
 # you can put in Matrix, sparseMatrix, or xgb.DMatrix 
 pred <- predict(bst, dtest$data)
 err <- as.numeric(sum(as.integer(pred > 0.5) != dtest$label))/length(dtest$label)
 print(paste("test-error=", err))
 #-------------------save and load models-------------------------
 # save model to binary local file
 xgb.save(bst, "xgboost.model")
 # load binary model to R
 bst2 <- xgb.load("xgboost.model")
 pred2 <- predict(bst2, dtest$data)
 # pred2 should be identical to pred
 print(paste("sum(abs(pred2-pred))=", sum(abs(pred2-pred))))
 #----------------Advanced features --------------
 # to use advanced features, we need to put data in xgb.DMatrix
 dtrain <- xgb.DMatrix(data = dtrain$data, label=dtrain$label)
 dtest <- xgb.DMatrix(data = dtest$data, label=dtest$label)
 #---------------Using watchlist----------------
 # watchlist is a list of xgb.DMatrix, each of them tagged with name
 watchlist <- list(train=dtrain, test=dtest)
 # to train with watchlist, use xgb.train, which contains more advanced features
 # watchlist allows us to monitor the evaluation result on all data in the list 
 print ('train xgboost using xgb.train with watchlist')
 bst <- xgb.train(data=dtrain, "max_depth"=2, eta=1, nround=2, watchlist=watchlist,
                 objective = "binary:logistic")
 # we can change evaluation metrics, or use multiple evaluation metrics
 print ('train xgboost using xgb.train with watchlist, watch logloss and error')
 bst <- xgb.train(data=dtrain, "max_depth"=2, eta=1, nround=2, watchlist=watchlist,
                 "eval_metric" = "error", "eval_metric" = "logloss",
                 objective = "binary:logistic")
 # xgb.DMatrix can also be saved using xgb.DMatrix.save
 xgb.DMatrix.save(dtrain, "dtrain.buffer")
 # to load it in, simply call xgb.DMatrix
 dtrain2 <- xgb.DMatrix("dtrain.buffer")
 bst <- xgb.train(data=dtrain2, "max_depth"=2, eta=1, nround=2, watchlist=watchlist,
                 objective = "binary:logistic")
 # information can be extracted from xgb.DMatrix using getinfo
 label = getinfo(dtest, "label")
 pred <- predict(bst, dtest)
 err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label)
 print(paste("test-error=", err))
 # Finally, you can dump the tree you learned using xgb.dump into a text file
 xgb.dump(bst, "dump.raw.txt")
		`@@ -0,0 +1,2 @@`
							`This folder contains processed example dataset used by the demos.`
							`Copyright of the dataset belongs to the original copyright holder`