style cleanup, incomplete CV
This commit is contained in:
parent
2b170ecda4
commit
984102e586
@ -81,20 +81,28 @@ xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
|
|||||||
|
|
||||||
## ----the following are low level iteratively function, not needed if
|
## ----the following are low level iteratively function, not needed if
|
||||||
## you do not want to use them ---------------------------------------
|
## you do not want to use them ---------------------------------------
|
||||||
|
# get dmatrix from data, label
|
||||||
# iteratively update booster with dtrain
|
xgb.get.DMatrix <- function(data, label = NULL) {
|
||||||
xgb.iter.update <- function(booster, dtrain, iter) {
|
inClass <- class(data)
|
||||||
if (class(booster) != "xgb.Booster") {
|
if (inClass == "dgCMatrix" || inClass == "matrix") {
|
||||||
stop("xgb.iter.update: first argument must be type xgb.Booster")
|
if (is.null(label)) {
|
||||||
|
stop("xgboost: need label when data is a matrix")
|
||||||
|
}
|
||||||
|
dtrain <- xgb.DMatrix(data, label = label)
|
||||||
|
} else {
|
||||||
|
if (!is.null(label)) {
|
||||||
|
warning("xgboost: label will be ignored.")
|
||||||
|
}
|
||||||
|
if (inClass == "character") {
|
||||||
|
dtrain <- xgb.DMatrix(data)
|
||||||
|
} else if (inClass == "xgb.DMatrix") {
|
||||||
|
dtrain <- data
|
||||||
|
} else {
|
||||||
|
stop("xgboost: Invalid input of data")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (class(dtrain) != "xgb.DMatrix") {
|
return (dtrain)
|
||||||
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
|
|
||||||
}
|
|
||||||
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain,
|
|
||||||
PACKAGE = "xgboost")
|
|
||||||
return(TRUE)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# iteratively update booster with customized statistics
|
# iteratively update booster with customized statistics
|
||||||
xgb.iter.boost <- function(booster, dtrain, gpair) {
|
xgb.iter.boost <- function(booster, dtrain, gpair) {
|
||||||
if (class(booster) != "xgb.Booster") {
|
if (class(booster) != "xgb.Booster") {
|
||||||
@ -108,8 +116,28 @@ xgb.iter.boost <- function(booster, dtrain, gpair) {
|
|||||||
return(TRUE)
|
return(TRUE)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# iteratively update booster with dtrain
|
||||||
|
xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) {
|
||||||
|
if (class(booster) != "xgb.Booster") {
|
||||||
|
stop("xgb.iter.update: first argument must be type xgb.Booster")
|
||||||
|
}
|
||||||
|
if (class(dtrain) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is.null(obj)) {
|
||||||
|
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain,
|
||||||
|
PACKAGE = "xgboost")
|
||||||
|
} else {
|
||||||
|
pred <- xgb.predict(bst, dtrain)
|
||||||
|
gpair <- obj(pred, dtrain)
|
||||||
|
succ <- xgb.iter.boost(bst, dtrain, gpair)
|
||||||
|
}
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
# iteratively evaluate one iteration
|
# iteratively evaluate one iteration
|
||||||
xgb.iter.eval <- function(booster, watchlist, iter) {
|
xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) {
|
||||||
if (class(booster) != "xgb.Booster") {
|
if (class(booster) != "xgb.Booster") {
|
||||||
stop("xgb.eval: first argument must be type xgb.Booster")
|
stop("xgb.eval: first argument must be type xgb.Booster")
|
||||||
}
|
}
|
||||||
@ -122,18 +150,47 @@ xgb.iter.eval <- function(booster, watchlist, iter) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (length(watchlist) != 0) {
|
if (length(watchlist) != 0) {
|
||||||
evnames <- list()
|
if (is.null(feval)) {
|
||||||
for (i in 1:length(watchlist)) {
|
evnames <- list()
|
||||||
w <- watchlist[i]
|
for (i in 1:length(watchlist)) {
|
||||||
if (length(names(w)) == 0) {
|
w <- watchlist[i]
|
||||||
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
if (length(names(w)) == 0) {
|
||||||
|
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
||||||
|
}
|
||||||
|
evnames <- append(evnames, names(w))
|
||||||
|
}
|
||||||
|
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
|
||||||
|
evnames, PACKAGE = "xgboost")
|
||||||
|
} else {
|
||||||
|
msg <- paste("[", iter, "]", sep="")
|
||||||
|
for (j in 1:length(watchlist)) {
|
||||||
|
w <- watchlist[j]
|
||||||
|
if (length(names(w)) == 0) {
|
||||||
|
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
||||||
|
}
|
||||||
|
ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
|
||||||
|
msg <- paste(msg, "\t", names(w), "-", ret$metric, ":", ret$value, sep="")
|
||||||
}
|
}
|
||||||
evnames <- append(evnames, names(w))
|
|
||||||
}
|
}
|
||||||
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
|
|
||||||
evnames, PACKAGE = "xgboost")
|
|
||||||
} else {
|
} else {
|
||||||
msg <- ""
|
msg <- ""
|
||||||
}
|
}
|
||||||
return(msg)
|
return(msg)
|
||||||
}
|
}
|
||||||
|
#------------------------------------------
|
||||||
|
# helper functions for cross validation
|
||||||
|
#
|
||||||
|
xgb.cv.mknfold <- function(dall, nfold, param, metrics=list(), fpreproc = NULL) {
|
||||||
|
randidx <- sample(1 : xgb.numrow(dall))
|
||||||
|
kstep <- length(randidx) / nfold
|
||||||
|
idset <- list()
|
||||||
|
for (i in 1:nfold) {
|
||||||
|
idset = append(idset, randidx[ ((i-1) * kstep + 1) : min(i * kstep, length(randidx)) ])
|
||||||
|
}
|
||||||
|
ret <- list()
|
||||||
|
for (k in 1:nfold) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
57
R-package/R/xgb.cv.R
Normal file
57
R-package/R/xgb.cv.R
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
#' eXtreme Gradient Boosting Training
|
||||||
|
#'
|
||||||
|
#' The training function of xgboost
|
||||||
|
#'
|
||||||
|
#' @param params the list of parameters. Commonly used ones are:
|
||||||
|
#' \itemize{
|
||||||
|
#' \item \code{objective} objective function, common ones are
|
||||||
|
#' \itemize{
|
||||||
|
#' \item \code{reg:linear} linear regression
|
||||||
|
#' \item \code{binary:logistic} logistic regression for classification
|
||||||
|
#' }
|
||||||
|
#' \item \code{eta} step size of each boosting step
|
||||||
|
#' \item \code{max_depth} maximum depth of the tree
|
||||||
|
#' \item \code{nthread} number of thread used in training, if not set, all threads are used
|
||||||
|
#' }
|
||||||
|
#'
|
||||||
|
#' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
||||||
|
#' further details. See also inst/examples/demo.R for walkthrough example in R.
|
||||||
|
#' @param data takes an \code{xgb.DMatrix} as the input.
|
||||||
|
#' @param nrounds the max number of iterations
|
||||||
|
#' @param metrics, list of evaluation metrics to be used in corss validation,
|
||||||
|
#' when it is not specified, the evaluation metric is chosen according to objective function.
|
||||||
|
#' Possible options are:
|
||||||
|
#' \itemize{
|
||||||
|
#' \item \code{error} binary classification error rate
|
||||||
|
#' \item \code{rmse} Rooted mean square error
|
||||||
|
#' \item \code{logloss} negative log-likelihood function
|
||||||
|
#' \item \code{auc} Area under curve
|
||||||
|
#' \item \code{merror} Exact matching error, used to evaluate multi-class classification
|
||||||
|
#' }
|
||||||
|
#'
|
||||||
|
#' @param obj customized objective function. Returns gradient and second order
|
||||||
|
#' gradient with given prediction and dtrain,
|
||||||
|
#' @param feval custimized evaluation function. Returns
|
||||||
|
#' \code{list(metric='metric-name', value='metric-value')} with given
|
||||||
|
#' prediction and dtrain,
|
||||||
|
#' @param ... other parameters to pass to \code{params}.
|
||||||
|
#'
|
||||||
|
#' @details
|
||||||
|
#' This is the cross validation function for xgboost
|
||||||
|
#'
|
||||||
|
#' Parallelization is automatically enabled if OpenMP is present.
|
||||||
|
#' Number of threads can also be manually specified via "nthread" parameter.
|
||||||
|
#'
|
||||||
|
#' This function only accepts an \code{xgb.DMatrix} object as the input.
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
#'
|
||||||
|
xgb.cv <- function(params=list(), data, nrounds, metrics=list(), label = NULL,
|
||||||
|
obj = NULL, feval = NULL, ...) {
|
||||||
|
if (typeof(params) != "list") {
|
||||||
|
stop("xgb.cv: first argument params must be list")
|
||||||
|
}
|
||||||
|
dtrain <- xgb.get.DMatrix(data, label)
|
||||||
|
params = append(params, list(...))
|
||||||
|
|
||||||
|
}
|
||||||
@ -16,7 +16,7 @@
|
|||||||
#'
|
#'
|
||||||
#' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
#' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
|
||||||
#' further details. See also inst/examples/demo.R for walkthrough example in R.
|
#' further details. See also inst/examples/demo.R for walkthrough example in R.
|
||||||
#' @param dtrain takes an \code{xgb.DMatrix} as the input.
|
#' @param data takes an \code{xgb.DMatrix} as the input.
|
||||||
#' @param nrounds the max number of iterations
|
#' @param nrounds the max number of iterations
|
||||||
#' @param watchlist what information should be printed when \code{verbose=1} or
|
#' @param watchlist what information should be printed when \code{verbose=1} or
|
||||||
#' \code{verbose=2}. Watchlist is used to specify validation set monitoring
|
#' \code{verbose=2}. Watchlist is used to specify validation set monitoring
|
||||||
@ -64,8 +64,9 @@
|
|||||||
#' bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror)
|
#' bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror)
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
#'
|
||||||
xgb.train <- function(params=list(), dtrain, nrounds, watchlist = list(),
|
xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
||||||
obj = NULL, feval = NULL, ...) {
|
obj = NULL, feval = NULL, ...) {
|
||||||
|
dtrain <- data
|
||||||
if (typeof(params) != "list") {
|
if (typeof(params) != "list") {
|
||||||
stop("xgb.train: first argument params must be list")
|
stop("xgb.train: first argument params must be list")
|
||||||
}
|
}
|
||||||
@ -75,37 +76,10 @@ xgb.train <- function(params=list(), dtrain, nrounds, watchlist = list(),
|
|||||||
params = append(params, list(...))
|
params = append(params, list(...))
|
||||||
bst <- xgb.Booster(params, append(watchlist, dtrain))
|
bst <- xgb.Booster(params, append(watchlist, dtrain))
|
||||||
for (i in 1:nrounds) {
|
for (i in 1:nrounds) {
|
||||||
if (is.null(obj)) {
|
succ <- xgb.iter.update(bst, dtrain, i - 1, obj)
|
||||||
succ <- xgb.iter.update(bst, dtrain, i - 1)
|
|
||||||
} else {
|
|
||||||
pred <- xgb.predict(bst, dtrain)
|
|
||||||
gpair <- obj(pred, dtrain)
|
|
||||||
succ <- xgb.iter.boost(bst, dtrain, gpair)
|
|
||||||
}
|
|
||||||
if (length(watchlist) != 0) {
|
if (length(watchlist) != 0) {
|
||||||
if (is.null(feval)) {
|
msg <- xgb.iter.eval(bst, watchlist, i - 1, feval)
|
||||||
msg <- xgb.iter.eval(bst, watchlist, i - 1)
|
cat(paste(msg, "\n", sep=""))
|
||||||
cat(msg)
|
|
||||||
cat("\n")
|
|
||||||
} else {
|
|
||||||
cat("[")
|
|
||||||
cat(i)
|
|
||||||
cat("]")
|
|
||||||
for (j in 1:length(watchlist)) {
|
|
||||||
w <- watchlist[j]
|
|
||||||
if (length(names(w)) == 0) {
|
|
||||||
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
|
||||||
}
|
|
||||||
ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
|
|
||||||
cat("\t")
|
|
||||||
cat(names(w))
|
|
||||||
cat("-")
|
|
||||||
cat(ret$metric)
|
|
||||||
cat(":")
|
|
||||||
cat(ret$value)
|
|
||||||
}
|
|
||||||
cat("\n")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return(bst)
|
return(bst)
|
||||||
|
|||||||
@ -40,19 +40,7 @@
|
|||||||
#'
|
#'
|
||||||
xgboost <- function(data = NULL, label = NULL, params = list(), nrounds,
|
xgboost <- function(data = NULL, label = NULL, params = list(), nrounds,
|
||||||
verbose = 1, ...) {
|
verbose = 1, ...) {
|
||||||
inClass <- class(data)
|
dtrain <- xgb.get.DMatrix(data, label)
|
||||||
if (inClass == "dgCMatrix" || inClass == "matrix") {
|
|
||||||
if (is.null(label))
|
|
||||||
stop("xgboost: need label when data is a matrix")
|
|
||||||
dtrain <- xgb.DMatrix(data, label = label)
|
|
||||||
} else {
|
|
||||||
if (!is.null(label))
|
|
||||||
warning("xgboost: label will be ignored.")
|
|
||||||
if (inClass == "character")
|
|
||||||
dtrain <- xgb.DMatrix(data) else if (inClass == "xgb.DMatrix")
|
|
||||||
dtrain <- data else stop("xgboost: Invalid input of data")
|
|
||||||
}
|
|
||||||
|
|
||||||
if (verbose > 1) {
|
if (verbose > 1) {
|
||||||
silent <- 0
|
silent <- 0
|
||||||
} else {
|
} else {
|
||||||
@ -62,8 +50,11 @@ xgboost <- function(data = NULL, label = NULL, params = list(), nrounds,
|
|||||||
params <- append(params, list(silent = silent))
|
params <- append(params, list(silent = silent))
|
||||||
params <- append(params, list(...))
|
params <- append(params, list(...))
|
||||||
|
|
||||||
if (verbose > 0)
|
if (verbose > 0) {
|
||||||
watchlist <- list(train = dtrain) else watchlist <- list()
|
watchlist <- list(train = dtrain)
|
||||||
|
} else {
|
||||||
|
watchlist <- list()
|
||||||
|
}
|
||||||
|
|
||||||
bst <- xgb.train(params, dtrain, nrounds, watchlist)
|
bst <- xgb.train(params, dtrain, nrounds, watchlist)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user