Merge branch 'master' of https://github.com/tqchen/xgboost
This commit is contained in:
commit
4d00be84c3
@ -8,6 +8,7 @@ export(xgb.dump)
|
|||||||
export(xgb.load)
|
export(xgb.load)
|
||||||
export(xgb.save)
|
export(xgb.save)
|
||||||
export(xgb.train)
|
export(xgb.train)
|
||||||
|
export(xgb.cv)
|
||||||
export(xgboost)
|
export(xgboost)
|
||||||
exportMethods(predict)
|
exportMethods(predict)
|
||||||
import(methods)
|
import(methods)
|
||||||
|
|||||||
@ -103,6 +103,10 @@ xgb.get.DMatrix <- function(data, label = NULL) {
|
|||||||
}
|
}
|
||||||
return (dtrain)
|
return (dtrain)
|
||||||
}
|
}
|
||||||
|
xgb.numrow <- function(dmat) {
|
||||||
|
nrow <- .Call("XGDMatrixNumRow_R", dmat, PACKAGE="xgboost")
|
||||||
|
return(nrow)
|
||||||
|
}
|
||||||
# iteratively update booster with customized statistics
|
# iteratively update booster with customized statistics
|
||||||
xgb.iter.boost <- function(booster, dtrain, gpair) {
|
xgb.iter.boost <- function(booster, dtrain, gpair) {
|
||||||
if (class(booster) != "xgb.Booster") {
|
if (class(booster) != "xgb.Booster") {
|
||||||
@ -174,23 +178,51 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
msg <- ""
|
msg <- ""
|
||||||
}
|
}
|
||||||
return(msg)
|
return(msg)
|
||||||
}
|
}
|
||||||
#------------------------------------------
|
#------------------------------------------
|
||||||
# helper functions for cross validation
|
# helper functions for cross validation
|
||||||
#
|
#
|
||||||
xgb.cv.mknfold <- function(dall, nfold, param, metrics=list(), fpreproc = NULL) {
|
xgb.cv.mknfold <- function(dall, nfold, param) {
|
||||||
randidx <- sample(1 : xgb.numrow(dall))
|
randidx <- sample(1 : xgb.numrow(dall))
|
||||||
kstep <- length(randidx) / nfold
|
kstep <- length(randidx) / nfold
|
||||||
idset <- list()
|
idset <- list()
|
||||||
for (i in 1:nfold) {
|
for (i in 1:nfold) {
|
||||||
idset = append(idset, randidx[ ((i-1) * kstep + 1) : min(i * kstep, length(randidx)) ])
|
idset[[i]] <- randidx[ ((i-1) * kstep + 1) : min(i * kstep, length(randidx)) ]
|
||||||
}
|
}
|
||||||
ret <- list()
|
ret <- list()
|
||||||
for (k in 1:nfold) {
|
for (k in 1:nfold) {
|
||||||
|
dtest <- slice(dall, idset[[k]])
|
||||||
|
didx = c()
|
||||||
|
for (i in 1:nfold) {
|
||||||
|
if (i != k) {
|
||||||
|
didx <- append(didx, idset[[i]])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dtrain <- slice(dall, didx)
|
||||||
|
bst <- xgb.Booster(param, list(dtrain, dtest))
|
||||||
|
watchlist = list(train=dtrain, test=dtest)
|
||||||
|
ret[[k]] <- list(dtrain=dtrain, booster=bst, watchlist=watchlist)
|
||||||
}
|
}
|
||||||
|
return (ret)
|
||||||
|
}
|
||||||
|
xgb.cv.aggcv <- function(res, showsd = TRUE) {
|
||||||
|
header <- res[[1]]
|
||||||
|
ret <- header[1]
|
||||||
|
for (i in 2:length(header)) {
|
||||||
|
kv <- strsplit(header[i], ":")[[1]]
|
||||||
|
ret <- paste(ret, "\t", kv[1], ":", sep="")
|
||||||
|
stats <- c()
|
||||||
|
stats[1] <- as.numeric(kv[2])
|
||||||
|
for (j in 2:length(res)) {
|
||||||
|
tkv <- strsplit(res[[j]][i], ":")[[1]]
|
||||||
|
stats[j] <- as.numeric(tkv[2])
|
||||||
|
}
|
||||||
|
ret <- paste(ret, sprintf("%f", mean(stats)), sep="")
|
||||||
|
if (showsd) {
|
||||||
|
ret <- paste(ret, sprintf("+%f", sd(stats)), sep="")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (ret)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -18,6 +18,9 @@
|
|||||||
#' further details. See also inst/examples/demo.R for walkthrough example in R.
|
#' further details. See also inst/examples/demo.R for walkthrough example in R.
|
||||||
#' @param data takes an \code{xgb.DMatrix} as the input.
|
#' @param data takes an \code{xgb.DMatrix} as the input.
|
||||||
#' @param nrounds the max number of iterations
|
#' @param nrounds the max number of iterations
|
||||||
|
#' @param nfold number of folds used
|
||||||
|
#' @param label option field, when data is Matrix
|
||||||
|
#' @param showd boolean, whether show standard deviation of cross validation
|
||||||
#' @param metrics, list of evaluation metrics to be used in corss validation,
|
#' @param metrics, list of evaluation metrics to be used in corss validation,
|
||||||
#' when it is not specified, the evaluation metric is chosen according to objective function.
|
#' when it is not specified, the evaluation metric is chosen according to objective function.
|
||||||
#' Possible options are:
|
#' Possible options are:
|
||||||
@ -28,7 +31,6 @@
|
|||||||
#' \item \code{auc} Area under curve
|
#' \item \code{auc} Area under curve
|
||||||
#' \item \code{merror} Exact matching error, used to evaluate multi-class classification
|
#' \item \code{merror} Exact matching error, used to evaluate multi-class classification
|
||||||
#' }
|
#' }
|
||||||
#'
|
|
||||||
#' @param obj customized objective function. Returns gradient and second order
|
#' @param obj customized objective function. Returns gradient and second order
|
||||||
#' gradient with given prediction and dtrain,
|
#' gradient with given prediction and dtrain,
|
||||||
#' @param feval custimized evaluation function. Returns
|
#' @param feval custimized evaluation function. Returns
|
||||||
@ -46,12 +48,33 @@
|
|||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
#'
|
||||||
xgb.cv <- function(params=list(), data, nrounds, metrics=list(), label = NULL,
|
xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL,
|
||||||
obj = NULL, feval = NULL, ...) {
|
showsd = TRUE, metrics=list(), obj = NULL, feval = NULL, ...) {
|
||||||
if (typeof(params) != "list") {
|
if (typeof(params) != "list") {
|
||||||
stop("xgb.cv: first argument params must be list")
|
stop("xgb.cv: first argument params must be list")
|
||||||
}
|
}
|
||||||
|
if (nfold <= 1) {
|
||||||
|
stop("nfold must be bigger than 1")
|
||||||
|
}
|
||||||
dtrain <- xgb.get.DMatrix(data, label)
|
dtrain <- xgb.get.DMatrix(data, label)
|
||||||
params = append(params, list(...))
|
params <- append(params, list(...))
|
||||||
|
params <- append(params, list(silent=1))
|
||||||
|
for (mc in metrics) {
|
||||||
|
params <- append(params, list("eval_metric"=mc))
|
||||||
|
}
|
||||||
|
|
||||||
|
folds <- xgb.cv.mknfold(dtrain, nfold, params)
|
||||||
|
history <- list()
|
||||||
|
for (i in 1:nrounds) {
|
||||||
|
msg <- list()
|
||||||
|
for (k in 1:nfold) {
|
||||||
|
fd <- folds[[k]]
|
||||||
|
succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj)
|
||||||
|
msg[[k]] <- strsplit(xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval), "\t")[[1]]
|
||||||
|
}
|
||||||
|
ret <- xgb.cv.aggcv(msg, showsd)
|
||||||
|
history <- append(history, ret)
|
||||||
|
cat(paste(ret, "\n", sep=""))
|
||||||
|
}
|
||||||
|
return (history)
|
||||||
}
|
}
|
||||||
|
|||||||
10
R-package/inst/examples/cross_validation.R
Normal file
10
R-package/inst/examples/cross_validation.R
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
require(xgboost)
|
||||||
|
require(methods)
|
||||||
|
# Directly read in local file
|
||||||
|
dtrain <- xgb.DMatrix("agaricus.txt.train")
|
||||||
|
|
||||||
|
history <- xgb.cv( data = dtrain, nround=3, nfold = 5, metrics=list("rmse","auc"),
|
||||||
|
"max_depth"=3, "eta"=1,
|
||||||
|
"objective"="binary:logistic")
|
||||||
|
|
||||||
|
|
||||||
@ -174,6 +174,10 @@ extern "C" {
|
|||||||
_WrapperEnd();
|
_WrapperEnd();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
SEXP XGDMatrixNumRow_R(SEXP handle) {
|
||||||
|
bst_ulong nrow = XGDMatrixNumRow(R_ExternalPtrAddr(handle));
|
||||||
|
return ScalarInteger(static_cast<int>(nrow));
|
||||||
|
}
|
||||||
// functions related to booster
|
// functions related to booster
|
||||||
void _BoosterFinalizer(SEXP ext) {
|
void _BoosterFinalizer(SEXP ext) {
|
||||||
if (R_ExternalPtrAddr(ext) == NULL) return;
|
if (R_ExternalPtrAddr(ext) == NULL) return;
|
||||||
|
|||||||
@ -65,6 +65,11 @@ extern "C" {
|
|||||||
* \return info vector
|
* \return info vector
|
||||||
*/
|
*/
|
||||||
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field);
|
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field);
|
||||||
|
/*!
|
||||||
|
* \brief return number of rows
|
||||||
|
* \param handle a instance of data matrix
|
||||||
|
*/
|
||||||
|
SEXP XGDMatrixNumRow_R(SEXP handle);
|
||||||
/*!
|
/*!
|
||||||
* \brief create xgboost learner
|
* \brief create xgboost learner
|
||||||
* \param dmats a list of dmatrix handles that will be cached
|
* \param dmats a list of dmatrix handles that will be cached
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user