add basic walkthrough
This commit is contained in:
parent
8ad9293437
commit
b858283ec5
@ -29,6 +29,9 @@
|
|||||||
#' @param feval custimized evaluation function. Returns
|
#' @param feval custimized evaluation function. Returns
|
||||||
#' \code{list(metric='metric-name', value='metric-value')} with given
|
#' \code{list(metric='metric-name', value='metric-value')} with given
|
||||||
#' prediction and dtrain,
|
#' prediction and dtrain,
|
||||||
|
#' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print
|
||||||
|
#' information of performance. If 2, xgboost will print information of both
|
||||||
|
#'
|
||||||
#' @param ... other parameters to pass to \code{params}.
|
#' @param ... other parameters to pass to \code{params}.
|
||||||
#'
|
#'
|
||||||
#' @details
|
#' @details
|
||||||
@ -65,7 +68,7 @@
|
|||||||
#' @export
|
#' @export
|
||||||
#'
|
#'
|
||||||
xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
||||||
obj = NULL, feval = NULL, ...) {
|
obj = NULL, feval = NULL, verbose = 1, ...) {
|
||||||
dtrain <- data
|
dtrain <- data
|
||||||
if (typeof(params) != "list") {
|
if (typeof(params) != "list") {
|
||||||
stop("xgb.train: first argument params must be list")
|
stop("xgb.train: first argument params must be list")
|
||||||
@ -73,7 +76,17 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
|||||||
if (class(dtrain) != "xgb.DMatrix") {
|
if (class(dtrain) != "xgb.DMatrix") {
|
||||||
stop("xgb.train: second argument dtrain must be xgb.DMatrix")
|
stop("xgb.train: second argument dtrain must be xgb.DMatrix")
|
||||||
}
|
}
|
||||||
|
if (verbose > 1) {
|
||||||
|
params <- append(params, list(silent = 0))
|
||||||
|
} else {
|
||||||
|
params <- append(params, list(silent = 1))
|
||||||
|
}
|
||||||
|
if (length(watchlist) != 0 && verbose == 0) {
|
||||||
|
warning('watchlist is provided but verbose=0, no evaluation information will be printed')
|
||||||
|
watchlist <- list()
|
||||||
|
}
|
||||||
params = append(params, list(...))
|
params = append(params, list(...))
|
||||||
|
|
||||||
bst <- xgb.Booster(params, append(watchlist, dtrain))
|
bst <- xgb.Booster(params, append(watchlist, dtrain))
|
||||||
for (i in 1:nrounds) {
|
for (i in 1:nrounds) {
|
||||||
succ <- xgb.iter.update(bst, dtrain, i - 1, obj)
|
succ <- xgb.iter.update(bst, dtrain, i - 1, obj)
|
||||||
|
|||||||
@ -40,14 +40,7 @@
|
|||||||
#'
|
#'
|
||||||
xgboost <- function(data = NULL, label = NULL, params = list(), nrounds,
|
xgboost <- function(data = NULL, label = NULL, params = list(), nrounds,
|
||||||
verbose = 1, ...) {
|
verbose = 1, ...) {
|
||||||
dtrain <- xgb.get.DMatrix(data, label)
|
dtrain <- xgb.get.DMatrix(data, label)
|
||||||
if (verbose > 1) {
|
|
||||||
silent <- 0
|
|
||||||
} else {
|
|
||||||
silent <- 1
|
|
||||||
}
|
|
||||||
|
|
||||||
params <- append(params, list(silent = silent))
|
|
||||||
params <- append(params, list(...))
|
params <- append(params, list(...))
|
||||||
|
|
||||||
if (verbose > 0) {
|
if (verbose > 0) {
|
||||||
@ -56,7 +49,7 @@ xgboost <- function(data = NULL, label = NULL, params = list(), nrounds,
|
|||||||
watchlist <- list()
|
watchlist <- list()
|
||||||
}
|
}
|
||||||
|
|
||||||
bst <- xgb.train(params, dtrain, nrounds, watchlist)
|
bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose=verbose)
|
||||||
|
|
||||||
return(bst)
|
return(bst)
|
||||||
}
|
}
|
||||||
|
|||||||
2
R-package/data/README.md
Normal file
2
R-package/data/README.md
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
This folder contains processed example dataset used by the demos.
|
||||||
|
Copyright of the dataset belongs to the original copyright holder
|
||||||
93
R-package/demo/basic_walkthrough.R
Normal file
93
R-package/demo/basic_walkthrough.R
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
require(xgboost)
|
||||||
|
require(methods)
|
||||||
|
# we load in the agaricus dataset
|
||||||
|
# In this example, we are aiming to predict whether a mushroom can be eated
|
||||||
|
data(agaricus.train, package='xgboost')
|
||||||
|
data(agaricus.test, package='xgboost')
|
||||||
|
dtrain <- agaricus.train
|
||||||
|
dtest <- agaricus.test
|
||||||
|
# the loaded data is stored in sparseMatrix, and label is a numeric vector in {0,1}
|
||||||
|
class(dtrain$label)
|
||||||
|
class(dtrain$data)
|
||||||
|
|
||||||
|
#-------------Basic Training using XGBoost-----------------
|
||||||
|
# this is the basic usage of xgboost you can put matrix in data field
|
||||||
|
# note: we are puting in sparse matrix here, xgboost naturally handles sparse input
|
||||||
|
# use sparse matrix when your feature is sparse(e.g. when you using one-hot encoding vector)
|
||||||
|
print("training xgboost with sparseMatrix")
|
||||||
|
bst <- xgboost(data = dtrain$data, label = dtrain$label, max_depth = 2, eta = 1, nround = 2,
|
||||||
|
objective = "binary:logistic")
|
||||||
|
# alternatively, you can put in dense matrix, i.e. basic R-matrix
|
||||||
|
print("training xgboost with Matrix")
|
||||||
|
bst <- xgboost(data = as.matrix(dtrain$data), label = dtrain$label, max_depth = 2, eta = 1, nround = 2,
|
||||||
|
objective = "binary:logistic")
|
||||||
|
|
||||||
|
# you can also put in xgb.DMatrix object, stores label, data and other meta datas needed for advanced features
|
||||||
|
print("training xgboost with xgb.DMatrix")
|
||||||
|
dmat <- xgb.DMatrix(data = dtrain$data, label = dtrain$label)
|
||||||
|
bst <- xgboost(data = dmat, max_depth = 2, eta = 1, nround = 2, objective = "binary:logistic")
|
||||||
|
|
||||||
|
# Verbose = 0,1,2
|
||||||
|
print ('train xgboost with verbose 0, no message')
|
||||||
|
bst <- xgboost(data = dmat, max_depth = 2, eta = 1, nround = 2,
|
||||||
|
objective = "binary:logistic", verbose = 0)
|
||||||
|
print ('train xgboost with verbose 1, print evaluation metric')
|
||||||
|
bst <- xgboost(data = dmat, max_depth = 2, eta = 1, nround = 2,
|
||||||
|
objective = "binary:logistic", verbose = 1)
|
||||||
|
print ('train xgboost with verbose 2, also print information about tree')
|
||||||
|
bst <- xgboost(data = dmat, max_depth = 2, eta = 1, nround = 2,
|
||||||
|
objective = "binary:logistic", verbose = 2)
|
||||||
|
|
||||||
|
# you can also specify data as file path to a LibSVM format input
|
||||||
|
# since we do not have this file with us, the following line is just for illustration
|
||||||
|
# bst <- xgboost(data = 'agaricus.train.svm', max_depth = 2, eta = 1, nround = 2,objective = "binary:logistic")
|
||||||
|
|
||||||
|
#--------------------basic prediction using xgboost--------------
|
||||||
|
# you can do prediction using the following line
|
||||||
|
# you can put in Matrix, sparseMatrix, or xgb.DMatrix
|
||||||
|
pred <- predict(bst, dtest$data)
|
||||||
|
err <- as.numeric(sum(as.integer(pred > 0.5) != dtest$label))/length(dtest$label)
|
||||||
|
print(paste("test-error=", err))
|
||||||
|
|
||||||
|
#-------------------save and load models-------------------------
|
||||||
|
# save model to binary local file
|
||||||
|
xgb.save(bst, "xgboost.model")
|
||||||
|
# load binary model to R
|
||||||
|
bst2 <- xgb.load("xgboost.model")
|
||||||
|
pred2 <- predict(bst2, dtest$data)
|
||||||
|
# pred2 should be identical to pred
|
||||||
|
print(paste("sum(abs(pred2-pred))=", sum(abs(pred2-pred))))
|
||||||
|
|
||||||
|
#----------------Advanced features --------------
|
||||||
|
# to use advanced features, we need to put data in xgb.DMatrix
|
||||||
|
dtrain <- xgb.DMatrix(data = dtrain$data, label=dtrain$label)
|
||||||
|
dtest <- xgb.DMatrix(data = dtest$data, label=dtest$label)
|
||||||
|
#---------------Using watchlist----------------
|
||||||
|
# watchlist is a list of xgb.DMatrix, each of them tagged with name
|
||||||
|
watchlist <- list(train=dtrain, test=dtest)
|
||||||
|
# to train with watchlist, use xgb.train, which contains more advanced features
|
||||||
|
# watchlist allows us to monitor the evaluation result on all data in the list
|
||||||
|
print ('train xgboost using xgb.train with watchlist')
|
||||||
|
bst <- xgb.train(data=dtrain, "max_depth"=2, eta=1, nround=2, watchlist=watchlist,
|
||||||
|
objective = "binary:logistic")
|
||||||
|
# we can change evaluation metrics, or use multiple evaluation metrics
|
||||||
|
print ('train xgboost using xgb.train with watchlist, watch logloss and error')
|
||||||
|
bst <- xgb.train(data=dtrain, "max_depth"=2, eta=1, nround=2, watchlist=watchlist,
|
||||||
|
"eval_metric" = "error", "eval_metric" = "logloss",
|
||||||
|
objective = "binary:logistic")
|
||||||
|
|
||||||
|
# xgb.DMatrix can also be saved using xgb.DMatrix.save
|
||||||
|
xgb.DMatrix.save(dtrain, "dtrain.buffer")
|
||||||
|
# to load it in, simply call xgb.DMatrix
|
||||||
|
dtrain2 <- xgb.DMatrix("dtrain.buffer")
|
||||||
|
bst <- xgb.train(data=dtrain2, "max_depth"=2, eta=1, nround=2, watchlist=watchlist,
|
||||||
|
objective = "binary:logistic")
|
||||||
|
# information can be extracted from xgb.DMatrix using getinfo
|
||||||
|
label = getinfo(dtest, "label")
|
||||||
|
pred <- predict(bst, dtest)
|
||||||
|
err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label)
|
||||||
|
print(paste("test-error=", err))
|
||||||
|
|
||||||
|
# Finally, you can dump the tree you learned using xgb.dump into a text file
|
||||||
|
xgb.dump(bst, "dump.raw.txt")
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user