major change in the design of R interface

This commit is contained in:
unknown
2014-08-26 23:41:03 -07:00
parent 84e5fc285b
commit 0130be4acc
21 changed files with 1175 additions and 128 deletions

View File

@@ -6,42 +6,6 @@
library.dynam.unload("xgboost", libpath);
}
# constructing DMatrix
xgb.DMatrix <- function(data, info=list(), missing=0.0) {
if (typeof(data) == "character") {
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost")
} else if(is.matrix(data)) {
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE="xgboost")
} else if(class(data) == "dgCMatrix") {
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE="xgboost")
} else {
stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data)))
}
dmat <- structure(handle, class="xgb.DMatrix")
if (length(info) != 0) {
for (i in 1:length(info)) {
p <- info[i]
xgb.setinfo(dmat, names(p), p[[1]])
}
}
return(dmat)
}
# get information from dmatrix
xgb.getinfo <- function(dmat, name) {
if (typeof(name) != "character") {
stop("xgb.getinfo: name must be character")
}
if (class(dmat) != "xgb.DMatrix") {
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
}
if (name != "label" &&
name != "weight" &&
name != "base_margin" ) {
stop(paste("xgb.getinfo: unknown info name", name))
}
ret <- .Call("XGDMatrixGetInfo_R", dmat, name, PACKAGE="xgboost")
return(ret)
}
# set information into dmatrix, this mutate dmatrix
xgb.setinfo <- function(dmat, name, info) {
if (class(dmat) != "xgb.DMatrix") {
@@ -63,9 +27,10 @@ xgb.setinfo <- function(dmat, name, info) {
.Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info), PACKAGE="xgboost")
return(TRUE)
}
stop(pase("xgb.setinfo: unknown info name", name))
stop(paste("xgb.setinfo: unknown info name", name))
return(FALSE)
}
# construct a Booster from cachelist
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
if (typeof(cachelist) != "list") {
@@ -92,61 +57,9 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
}
return(structure(handle, class="xgb.Booster"))
}
# train a model using given parameters
xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) {
if (typeof(params) != "list") {
stop("xgb.train: first argument params must be list");
}
if (class(dtrain) != "xgb.DMatrix") {
stop("xgb.train: second argument dtrain must be xgb.DMatrix");
}
bst <- xgb.Booster(params, append(watchlist,dtrain))
for (i in 1:nrounds) {
if (is.null(obj)) {
succ <- xgb.iter.update(bst, dtrain, i-1)
} else {
pred <- xgb.predict(bst, dtrain)
gpair <- obj(pred, dtrain)
succ <- xgb.iter.boost(bst, dtrain, gpair)
}
if (length(watchlist) != 0) {
if (is.null(feval)) {
msg <- xgb.iter.eval(bst, watchlist, i-1)
cat(msg); cat("\n")
} else {
cat("["); cat(i); cat("]");
for (j in 1:length(watchlist)) {
w <- watchlist[j]
if (length(names(w)) == 0) {
stop("xgb.eval: name tag must be presented for every elements in watchlist")
}
ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
cat("\t"); cat(names(w)); cat("-"); cat(ret$metric);
cat(":"); cat(ret$value)
}
cat("\n")
}
}
}
return(bst)
}
# save model or DMatrix to file
xgb.save <- function(handle, fname) {
if (typeof(fname) != "character") {
stop("xgb.save: fname must be character")
}
if (class(handle) == "xgb.Booster") {
.Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost")
return(TRUE)
}
if (class(handle) == "xgb.DMatrix") {
.Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
return(TRUE)
}
stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
return(FALSE)
}
# predict
# predict, depreciated
xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
if (class(booster) != "xgb.Booster") {
stop("xgb.predict: first argument must be type xgb.Booster")
@@ -157,21 +70,12 @@ xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
ret <- .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin), PACKAGE="xgboost")
return(ret)
}
# dump model
xgb.dump <- function(booster, fname, fmap = "") {
if (class(booster) != "xgb.Booster") {
stop("xgb.dump: first argument must be type xgb.Booster")
}
if (typeof(fname) != "character"){
stop("xgb.dump: second argument must be type character")
}
.Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE="xgboost")
return(TRUE)
}
##--------------------------------------
# the following are low level iteratively function, not needed
# if you do not want to use them
#---------------------------------------
# iteratively update booster with dtrain
xgb.iter.update <- function(booster, dtrain, iter) {
if (class(booster) != "xgb.Booster") {
@@ -183,6 +87,7 @@ xgb.iter.update <- function(booster, dtrain, iter) {
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, PACKAGE="xgboost")
return(TRUE)
}
# iteratively update booster with customized statistics
xgb.iter.boost <- function(booster, dtrain, gpair) {
if (class(booster) != "xgb.Booster") {
@@ -194,6 +99,7 @@ xgb.iter.boost <- function(booster, dtrain, gpair) {
.Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE="xgboost")
return(TRUE)
}
# iteratively evaluate one iteration
xgb.iter.eval <- function(booster, watchlist, iter) {
if (class(booster) != "xgb.Booster") {

22
R-package/R/xgb.DMatrix.R Normal file
View File

@@ -0,0 +1,22 @@
# constructing DMatrix
xgb.DMatrix <- function(data, missing=0.0, ...) {
if (typeof(data) == "character") {
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost")
} else if(is.matrix(data)) {
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE="xgboost")
} else if(class(data) == "dgCMatrix") {
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE="xgboost")
} else {
stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data)))
}
dmat <- structure(handle, class="xgb.DMatrix")
info = list(...)
if (length(info)==0)
return(dmat)
for (i in 1:length(info)) {
p = info[i]
xgb.setinfo(dmat, names(p), p[[1]])
}
return(dmat)
}

11
R-package/R/xgb.dump.R Normal file
View File

@@ -0,0 +1,11 @@
# dump model
xgb.dump <- function(booster, fname, fmap = "") {
if (class(booster) != "xgb.Booster") {
stop("xgb.dump: first argument must be type xgb.Booster")
}
if (typeof(fname) != "character"){
stop("xgb.dump: second argument must be type character")
}
.Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE="xgboost")
return(TRUE)
}

16
R-package/R/xgb.getinfo.R Normal file
View File

@@ -0,0 +1,16 @@
# get information from dmatrix
xgb.getinfo <- function(dmat, name) {
if (typeof(name) != "character") {
stop("xgb.getinfo: name must be character")
}
if (class(dmat) != "xgb.DMatrix") {
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
}
if (name != "label" &&
name != "weight" &&
name != "base_margin" ) {
stop(paste("xgb.getinfo: unknown info name", name))
}
ret <- .Call("XGDMatrixGetInfo_R", dmat, name, PACKAGE="xgboost")
return(ret)
}

5
R-package/R/xgb.load.R Normal file
View File

@@ -0,0 +1,5 @@
xgb.load <- function(modelfile) {
if (is.null(modelfile))
stop('xgb.load: modelfile cannot be NULL')
xgb.Booster(modelfile=modelfile)
}

16
R-package/R/xgb.save.R Normal file
View File

@@ -0,0 +1,16 @@
# save model or DMatrix to file
xgb.save <- function(handle, fname) {
if (typeof(fname) != "character") {
stop("xgb.save: fname must be character")
}
if (class(handle) == "xgb.Booster") {
.Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost")
return(TRUE)
}
if (class(handle) == "xgb.DMatrix") {
.Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
return(TRUE)
}
stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
return(FALSE)
}

38
R-package/R/xgb.train.R Normal file
View File

@@ -0,0 +1,38 @@
# train a model using given parameters
xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) {
if (typeof(params) != "list") {
stop("xgb.train: first argument params must be list");
}
if (class(dtrain) != "xgb.DMatrix") {
stop("xgb.train: second argument dtrain must be xgb.DMatrix");
}
bst <- xgb.Booster(params, append(watchlist,dtrain))
for (i in 1:nrounds) {
if (is.null(obj)) {
succ <- xgb.iter.update(bst, dtrain, i-1)
} else {
pred <- xgb.predict(bst, dtrain)
gpair <- obj(pred, dtrain)
succ <- xgb.iter.boost(bst, dtrain, gpair)
}
if (length(watchlist) != 0) {
if (is.null(feval)) {
msg <- xgb.iter.eval(bst, watchlist, i-1)
cat(msg); cat("\n")
} else {
cat("["); cat(i); cat("]");
for (j in 1:length(watchlist)) {
w <- watchlist[j]
if (length(names(w)) == 0) {
stop("xgb.eval: name tag must be presented for every elements in watchlist")
}
ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
cat("\t"); cat(names(w)); cat("-"); cat(ret$metric);
cat(":"); cat(ret$value)
}
cat("\n")
}
}
}
return(bst)
}

View File

@@ -1,23 +1,48 @@
# Main function for xgboost-package
xgboost = function(x=NULL,y=NULL,file=NULL,nrounds=10,params,watchlist=list(),
obj=NULL, feval=NULL, margin=NULL)
xgboost = function(x=NULL,y=NULL,DMatrix=NULL, file=NULL, validation=NULL,
nrounds=10, obj=NULL, feval=NULL, margin=NULL, verbose = T, ...)
{
if (is.null(x) && is.null(y))
{
if (is.null(file))
stop('xgboost need input data, either R objects or local files.')
dtrain = xgb.DMatrix(file)
}
if (!is.null(DMatrix))
dtrain = DMatrix
else
dtrain = xgb.DMatrix(x, info=list(label=y))
if (!is.null(margin))
{
succ <- xgb.setinfo(dtrain, "base_margin", margin)
if (!succ)
warning('Attemp to use margin failed.')
if (is.null(x) && is.null(y))
{
if (is.null(file))
stop('xgboost need input data, either R objects, local files or DMatrix object.')
dtrain = xgb.DMatrix(file)
}
else
dtrain = xgb.DMatrix(x, label=y)
if (!is.null(margin))
{
succ <- xgb.setinfo(dtrain, "base_margin", margin)
if (!succ)
warning('Attemp to use margin failed.')
}
}
params = list(...)
watchlist=list()
if (verbose)
{
if (!is.null(validation))
{
if (class(validation)!='xgb.DMatrix')
dtest = xgb.DMatrix(validation)
else
dtest = validation
watchlist = list(eval=dtest,train=dtrain)
}
else
watchlist = list(train=dtrain)
}
bst <- xgb.train(params, dtrain, nrounds, watchlist, obj, feval)
return(bst)
}