This commit is contained in:
hetong 2014-09-06 10:20:05 -07:00
commit 166df74024
6 changed files with 153 additions and 13 deletions

View File

@ -133,9 +133,9 @@ xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) {
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, .Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain,
PACKAGE = "xgboost") PACKAGE = "xgboost")
} else { } else {
pred <- xgb.predict(bst, dtrain) pred <- xgb.predict(booster, dtrain)
gpair <- obj(pred, dtrain) gpair <- obj(pred, dtrain)
succ <- xgb.iter.boost(bst, dtrain, gpair) succ <- xgb.iter.boost(booster, dtrain, gpair)
} }
return(TRUE) return(TRUE)
} }
@ -172,7 +172,7 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) {
if (length(names(w)) == 0) { if (length(names(w)) == 0) {
stop("xgb.eval: name tag must be presented for every elements in watchlist") stop("xgb.eval: name tag must be presented for every elements in watchlist")
} }
ret <- feval(xgb.predict(bst, w[[1]]), w[[1]]) ret <- feval(xgb.predict(booster, w[[1]]), w[[1]])
msg <- paste(msg, "\t", names(w), "-", ret$metric, ":", ret$value, sep="") msg <- paste(msg, "\t", names(w), "-", ret$metric, ":", ret$value, sep="")
} }
} }

View File

@ -29,6 +29,9 @@
#' @param feval custimized evaluation function. Returns #' @param feval custimized evaluation function. Returns
#' \code{list(metric='metric-name', value='metric-value')} with given #' \code{list(metric='metric-name', value='metric-value')} with given
#' prediction and dtrain, #' prediction and dtrain,
#' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print
#' information of performance. If 2, xgboost will print information of both
#'
#' @param ... other parameters to pass to \code{params}. #' @param ... other parameters to pass to \code{params}.
#' #'
#' @details #' @details
@ -65,7 +68,7 @@
#' @export #' @export
#' #'
xgb.train <- function(params=list(), data, nrounds, watchlist = list(), xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
obj = NULL, feval = NULL, ...) { obj = NULL, feval = NULL, verbose = 1, ...) {
dtrain <- data dtrain <- data
if (typeof(params) != "list") { if (typeof(params) != "list") {
stop("xgb.train: first argument params must be list") stop("xgb.train: first argument params must be list")
@ -73,7 +76,17 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
if (class(dtrain) != "xgb.DMatrix") { if (class(dtrain) != "xgb.DMatrix") {
stop("xgb.train: second argument dtrain must be xgb.DMatrix") stop("xgb.train: second argument dtrain must be xgb.DMatrix")
} }
if (verbose > 1) {
params <- append(params, list(silent = 0))
} else {
params <- append(params, list(silent = 1))
}
if (length(watchlist) != 0 && verbose == 0) {
warning('watchlist is provided but verbose=0, no evaluation information will be printed')
watchlist <- list()
}
params = append(params, list(...)) params = append(params, list(...))
bst <- xgb.Booster(params, append(watchlist, dtrain)) bst <- xgb.Booster(params, append(watchlist, dtrain))
for (i in 1:nrounds) { for (i in 1:nrounds) {
succ <- xgb.iter.update(bst, dtrain, i - 1, obj) succ <- xgb.iter.update(bst, dtrain, i - 1, obj)

View File

@ -40,14 +40,7 @@
#' #'
xgboost <- function(data = NULL, label = NULL, params = list(), nrounds, xgboost <- function(data = NULL, label = NULL, params = list(), nrounds,
verbose = 1, ...) { verbose = 1, ...) {
dtrain <- xgb.get.DMatrix(data, label) dtrain <- xgb.get.DMatrix(data, label)
if (verbose > 1) {
silent <- 0
} else {
silent <- 1
}
params <- append(params, list(silent = silent))
params <- append(params, list(...)) params <- append(params, list(...))
if (verbose > 0) { if (verbose > 0) {
@ -56,7 +49,7 @@ xgboost <- function(data = NULL, label = NULL, params = list(), nrounds,
watchlist <- list() watchlist <- list()
} }
bst <- xgb.train(params, dtrain, nrounds, watchlist) bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose=verbose)
return(bst) return(bst)
} }

2
R-package/data/README.md Normal file
View File

@ -0,0 +1,2 @@
This folder contains processed example dataset used by the demos.
Copyright of the dataset belongs to the original copyright holder

View File

@ -0,0 +1,93 @@
require(xgboost)
require(methods)
# we load in the agaricus dataset
# In this example, we are aiming to predict whether a mushroom can be eated
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
dtrain <- agaricus.train
dtest <- agaricus.test
# the loaded data is stored in sparseMatrix, and label is a numeric vector in {0,1}
class(dtrain$label)
class(dtrain$data)
#-------------Basic Training using XGBoost-----------------
# this is the basic usage of xgboost you can put matrix in data field
# note: we are puting in sparse matrix here, xgboost naturally handles sparse input
# use sparse matrix when your feature is sparse(e.g. when you using one-hot encoding vector)
print("training xgboost with sparseMatrix")
bst <- xgboost(data = dtrain$data, label = dtrain$label, max_depth = 2, eta = 1, nround = 2,
objective = "binary:logistic")
# alternatively, you can put in dense matrix, i.e. basic R-matrix
print("training xgboost with Matrix")
bst <- xgboost(data = as.matrix(dtrain$data), label = dtrain$label, max_depth = 2, eta = 1, nround = 2,
objective = "binary:logistic")
# you can also put in xgb.DMatrix object, stores label, data and other meta datas needed for advanced features
print("training xgboost with xgb.DMatrix")
dmat <- xgb.DMatrix(data = dtrain$data, label = dtrain$label)
bst <- xgboost(data = dmat, max_depth = 2, eta = 1, nround = 2, objective = "binary:logistic")
# Verbose = 0,1,2
print ('train xgboost with verbose 0, no message')
bst <- xgboost(data = dmat, max_depth = 2, eta = 1, nround = 2,
objective = "binary:logistic", verbose = 0)
print ('train xgboost with verbose 1, print evaluation metric')
bst <- xgboost(data = dmat, max_depth = 2, eta = 1, nround = 2,
objective = "binary:logistic", verbose = 1)
print ('train xgboost with verbose 2, also print information about tree')
bst <- xgboost(data = dmat, max_depth = 2, eta = 1, nround = 2,
objective = "binary:logistic", verbose = 2)
# you can also specify data as file path to a LibSVM format input
# since we do not have this file with us, the following line is just for illustration
# bst <- xgboost(data = 'agaricus.train.svm', max_depth = 2, eta = 1, nround = 2,objective = "binary:logistic")
#--------------------basic prediction using xgboost--------------
# you can do prediction using the following line
# you can put in Matrix, sparseMatrix, or xgb.DMatrix
pred <- predict(bst, dtest$data)
err <- as.numeric(sum(as.integer(pred > 0.5) != dtest$label))/length(dtest$label)
print(paste("test-error=", err))
#-------------------save and load models-------------------------
# save model to binary local file
xgb.save(bst, "xgboost.model")
# load binary model to R
bst2 <- xgb.load("xgboost.model")
pred2 <- predict(bst2, dtest$data)
# pred2 should be identical to pred
print(paste("sum(abs(pred2-pred))=", sum(abs(pred2-pred))))
#----------------Advanced features --------------
# to use advanced features, we need to put data in xgb.DMatrix
dtrain <- xgb.DMatrix(data = dtrain$data, label=dtrain$label)
dtest <- xgb.DMatrix(data = dtest$data, label=dtest$label)
#---------------Using watchlist----------------
# watchlist is a list of xgb.DMatrix, each of them tagged with name
watchlist <- list(train=dtrain, test=dtest)
# to train with watchlist, use xgb.train, which contains more advanced features
# watchlist allows us to monitor the evaluation result on all data in the list
print ('train xgboost using xgb.train with watchlist')
bst <- xgb.train(data=dtrain, "max_depth"=2, eta=1, nround=2, watchlist=watchlist,
objective = "binary:logistic")
# we can change evaluation metrics, or use multiple evaluation metrics
print ('train xgboost using xgb.train with watchlist, watch logloss and error')
bst <- xgb.train(data=dtrain, "max_depth"=2, eta=1, nround=2, watchlist=watchlist,
"eval_metric" = "error", "eval_metric" = "logloss",
objective = "binary:logistic")
# xgb.DMatrix can also be saved using xgb.DMatrix.save
xgb.DMatrix.save(dtrain, "dtrain.buffer")
# to load it in, simply call xgb.DMatrix
dtrain2 <- xgb.DMatrix("dtrain.buffer")
bst <- xgb.train(data=dtrain2, "max_depth"=2, eta=1, nround=2, watchlist=watchlist,
objective = "binary:logistic")
# information can be extracted from xgb.DMatrix using getinfo
label = getinfo(dtest, "label")
pred <- predict(bst, dtest)
err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label)
print(paste("test-error=", err))
# Finally, you can dump the tree you learned using xgb.dump into a text file
xgb.dump(bst, "dump.raw.txt")

View File

@ -0,0 +1,39 @@
require(xgboost)
# load in the agaricus dataset
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
# note: for customized objective function, we leave objective as default
# note: what we are getting is margin value in prediction
# you must know what you are doing
param <- list(max_depth=2,eta=1,silent=1)
watchlist <- list(eval = dtest, train = dtrain)
num_round <- 2
# user define objective function, given prediction, return gradient and second order gradient
# this is loglikelihood loss
logregobj <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
preds <- 1/(1 + exp(-preds))
grad <- preds - labels
hess <- preds * (1 - preds)
return(list(grad = grad, hess = hess))
}
# user defined evaluation function, return a pair metric_name, result
# NOTE: when you do customized loss function, the default prediction value is margin
# this may make buildin evalution metric not function properly
# for example, we are doing logistic loss, the prediction is score before logistic transformation
# the buildin evaluation error assumes input is after logistic transformation
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
evalerror <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
return(list(metric = "error", value = err))
}
print ('start training with user customized objective')
# training with customized objective, we can also do step by step training
# simply look at xgboost.py's implementation of train
bst <- xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)