modify xgb.getinfo to getinfo

This commit is contained in:
unknown 2014-08-27 15:03:24 -07:00
parent a060a2e9a6
commit 0f0c12707c
14 changed files with 268 additions and 256 deletions

View File

@ -3,8 +3,8 @@ Type: Package
Title: R wrapper of xgboost Title: R wrapper of xgboost
Version: 0.3-0 Version: 0.3-0
Date: 2014-08-23 Date: 2014-08-23
Author: Tianqi Chen Author: Tianqi Chen, Tong He
Maintainer: Tianqi Chen <tianqi.tchen@gmail.com> Maintainer: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>
Description: xgboost Description: xgboost
License: See LICENSE file License: See LICENSE file
URL: https://github.com/tqchen/xgboost URL: https://github.com/tqchen/xgboost

View File

@ -2,8 +2,8 @@ importClassesFrom("Matrix", dgCMatrix, dgeMatrix)
export(xgboost) export(xgboost)
export(xgb.DMatrix) export(xgb.DMatrix)
export(xgb.getinfo)
exportMethods(predict) exportMethods(predict)
exportMethods(getinfo)
export(xgb.train) export(xgb.train)
export(xgb.save) export(xgb.save)
export(xgb.load) export(xgb.load)

View File

@ -0,0 +1,21 @@
setClass('xgb.DMatrix')
getinfo <- function(object, ...){
UseMethod("getinfo")
}
setMethod("getinfo", signature = "xgb.DMatrix",
definition = function(object, name) {
if (typeof(name) != "character") {
stop("xgb.getinfo: name must be character")
}
if (class(object) != "xgb.DMatrix") {
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix")
}
if (name != "label" && name != "weight" && name != "base_margin") {
stop(paste("xgb.getinfo: unknown info name", name))
}
ret <- .Call("XGDMatrixGetInfo_R", object, name, PACKAGE = "xgboost")
return(ret)
})

View File

@ -2,15 +2,12 @@
setClass("xgb.Booster") setClass("xgb.Booster")
#' @export #' @export
setMethod("predict", setMethod("predict", signature = "xgb.Booster",
signature = "xgb.Booster", definition = function(object, newdata, outputmargin = FALSE) {
definition = function(object, newdata, outputmargin = FALSE) if (class(newdata) != "xgb.DMatrix") {
{ newdata <- xgb.DMatrix(newdata)
if (class(newdata) != "xgb.DMatrix") { }
newdata = xgb.DMatrix(newdata) ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), PACKAGE = "xgboost")
} return(ret)
ret <- .Call("XGBoosterPredict_R", object, newdata, })
as.integer(outputmargin), PACKAGE="xgboost")
return(ret)
})

View File

@ -1,128 +1,135 @@
# depends on matrix # depends on matrix
.onLoad <- function(libname, pkgname) { .onLoad <- function(libname, pkgname) {
library.dynam("xgboost", pkgname, libname); library.dynam("xgboost", pkgname, libname)
} }
.onUnload <- function(libpath) { .onUnload <- function(libpath) {
library.dynam.unload("xgboost", libpath); library.dynam.unload("xgboost", libpath)
} }
# set information into dmatrix, this mutate dmatrix # set information into dmatrix, this mutate dmatrix
xgb.setinfo <- function(dmat, name, info) { xgb.setinfo <- function(dmat, name, info) {
if (class(dmat) != "xgb.DMatrix") { if (class(dmat) != "xgb.DMatrix") {
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix"); stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix")
} }
if (name == "label") { if (name == "label") {
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost") .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
return(TRUE) PACKAGE = "xgboost")
} return(TRUE)
if (name == "weight") { }
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost") if (name == "weight") {
return(TRUE) .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
} PACKAGE = "xgboost")
if (name == "base_margin") { return(TRUE)
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost") }
return(TRUE) if (name == "base_margin") {
} .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
if (name == "group") { PACKAGE = "xgboost")
.Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info), PACKAGE="xgboost") return(TRUE)
return(TRUE) }
} if (name == "group") {
stop(paste("xgb.setinfo: unknown info name", name)) .Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info),
return(FALSE) PACKAGE = "xgboost")
return(TRUE)
}
stop(paste("xgb.setinfo: unknown info name", name))
return(FALSE)
} }
# construct a Booster from cachelist # construct a Booster from cachelist
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) { xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
if (typeof(cachelist) != "list") { if (typeof(cachelist) != "list") {
stop("xgb.Booster: only accepts list of DMatrix as cachelist") stop("xgb.Booster: only accepts list of DMatrix as cachelist")
}
for (dm in cachelist) {
if (class(dm) != "xgb.DMatrix") {
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
} }
for (dm in cachelist) { }
if (class(dm) != "xgb.DMatrix") { handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost")
stop("xgb.Booster: only accepts list of DMatrix as cachelist") .Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE = "xgboost")
} if (length(params) != 0) {
for (i in 1:length(params)) {
p <- params[i]
.Call("XGBoosterSetParam_R", handle, names(p), as.character(p),
PACKAGE = "xgboost")
} }
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE="xgboost") }
.Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE="xgboost") if (!is.null(modelfile)) {
if (length(params) != 0) { if (typeof(modelfile) != "character") {
for (i in 1:length(params)) { stop("xgb.Booster: modelfile must be character")
p <- params[i]
.Call("XGBoosterSetParam_R", handle, names(p), as.character(p), PACKAGE="xgboost")
}
} }
if (!is.null(modelfile)) { .Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost")
if (typeof(modelfile) != "character"){ }
stop("xgb.Booster: modelfile must be character"); return(structure(handle, class = "xgb.Booster"))
}
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE="xgboost")
}
return(structure(handle, class="xgb.Booster"))
} }
# predict, depreciated # predict, depreciated
xgb.predict <- function(booster, dmat, outputmargin = FALSE) { xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
if (class(booster) != "xgb.Booster") { if (class(booster) != "xgb.Booster") {
stop("xgb.predict: first argument must be type xgb.Booster") stop("xgb.predict: first argument must be type xgb.Booster")
} }
if (class(dmat) != "xgb.DMatrix") { if (class(dmat) != "xgb.DMatrix") {
stop("xgb.predict: second argument must be type xgb.DMatrix") stop("xgb.predict: second argument must be type xgb.DMatrix")
} }
ret <- .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin), PACKAGE="xgboost") ret <- .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin),
return(ret) PACKAGE = "xgboost")
return(ret)
} }
##-------------------------------------- ## ----the following are low level iteratively function, not needed if
# the following are low level iteratively function, not needed ## you do not want to use them ---------------------------------------
# if you do not want to use them
#---------------------------------------
# iteratively update booster with dtrain # iteratively update booster with dtrain
xgb.iter.update <- function(booster, dtrain, iter) { xgb.iter.update <- function(booster, dtrain, iter) {
if (class(booster) != "xgb.Booster") { if (class(booster) != "xgb.Booster") {
stop("xgb.iter.update: first argument must be type xgb.Booster") stop("xgb.iter.update: first argument must be type xgb.Booster")
} }
if (class(dtrain) != "xgb.DMatrix") { if (class(dtrain) != "xgb.DMatrix") {
stop("xgb.iter.update: second argument must be type xgb.DMatrix") stop("xgb.iter.update: second argument must be type xgb.DMatrix")
} }
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, PACKAGE="xgboost") .Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain,
return(TRUE) PACKAGE = "xgboost")
return(TRUE)
} }
# iteratively update booster with customized statistics # iteratively update booster with customized statistics
xgb.iter.boost <- function(booster, dtrain, gpair) { xgb.iter.boost <- function(booster, dtrain, gpair) {
if (class(booster) != "xgb.Booster") { if (class(booster) != "xgb.Booster") {
stop("xgb.iter.update: first argument must be type xgb.Booster") stop("xgb.iter.update: first argument must be type xgb.Booster")
} }
if (class(dtrain) != "xgb.DMatrix") { if (class(dtrain) != "xgb.DMatrix") {
stop("xgb.iter.update: second argument must be type xgb.DMatrix") stop("xgb.iter.update: second argument must be type xgb.DMatrix")
} }
.Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE="xgboost") .Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess,
return(TRUE) PACKAGE = "xgboost")
return(TRUE)
} }
# iteratively evaluate one iteration # iteratively evaluate one iteration
xgb.iter.eval <- function(booster, watchlist, iter) { xgb.iter.eval <- function(booster, watchlist, iter) {
if (class(booster) != "xgb.Booster") { if (class(booster) != "xgb.Booster") {
stop("xgb.eval: first argument must be type xgb.Booster") stop("xgb.eval: first argument must be type xgb.Booster")
}
if (typeof(watchlist) != "list") {
stop("xgb.eval: only accepts list of DMatrix as watchlist")
}
for (w in watchlist) {
if (class(w) != "xgb.DMatrix") {
stop("xgb.eval: watch list can only contain xgb.DMatrix")
} }
if (typeof(watchlist) != "list") { }
stop("xgb.eval: only accepts list of DMatrix as watchlist") evnames <- list()
if (length(watchlist) != 0) {
for (i in 1:length(watchlist)) {
w <- watchlist[i]
if (length(names(w)) == 0) {
stop("xgb.eval: name tag must be presented for every elements in watchlist")
}
evnames <- append(evnames, names(w))
} }
for (w in watchlist) { }
if (class(w) != "xgb.DMatrix") { msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
stop("xgb.eval: watch list can only contain xgb.DMatrix") evnames, PACKAGE = "xgboost")
} return(msg)
}
evnames <- list()
if (length(watchlist) != 0) {
for (i in 1:length(watchlist)) {
w <- watchlist[i]
if (length(names(w)) == 0) {
stop("xgb.eval: name tag must be presented for every elements in watchlist")
}
evnames <- append(evnames, names(w))
}
}
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames, PACKAGE="xgboost")
return(msg)
} }

View File

@ -1,22 +1,26 @@
# constructing DMatrix # constructing DMatrix
xgb.DMatrix <- function(data, info=list(), missing=0.0, ...) { xgb.DMatrix <- function(data, info = list(), missing = 0, ...) {
if (typeof(data) == "character") { if (typeof(data) == "character") {
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost") handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE),
} else if(is.matrix(data)) { PACKAGE = "xgboost")
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE="xgboost") } else if (is.matrix(data)) {
} else if(class(data) == "dgCMatrix") { handle <- .Call("XGDMatrixCreateFromMat_R", data, missing,
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE="xgboost") PACKAGE = "xgboost")
} else { } else if (class(data) == "dgCMatrix") {
stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data))) handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x,
} PACKAGE = "xgboost")
dmat <- structure(handle, class="xgb.DMatrix") } else {
stop(paste("xgb.DMatrix: does not support to construct from ",
typeof(data)))
}
dmat <- structure(handle, class = "xgb.DMatrix")
info = append(info,list(...)) info <- append(info, list(...))
if (length(info)==0) if (length(info) == 0)
return(dmat)
for (i in 1:length(info)) {
p = info[i]
xgb.setinfo(dmat, names(p), p[[1]])
}
return(dmat) return(dmat)
for (i in 1:length(info)) {
p <- info[i]
xgb.setinfo(dmat, names(p), p[[1]])
}
return(dmat)
} }

View File

@ -1,12 +1,13 @@
# save model or DMatrix to file # save model or DMatrix to file
xgb.DMatrix.save <- function(handle, fname) { xgb.DMatrix.save <- function(handle, fname) {
if (typeof(fname) != "character") { if (typeof(fname) != "character") {
stop("xgb.save: fname must be character") stop("xgb.save: fname must be character")
} }
if (class(handle) == "xgb.DMatrix") { if (class(handle) == "xgb.DMatrix") {
.Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost") .Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE),
return(TRUE) PACKAGE = "xgboost")
} return(TRUE)
stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster") }
return(FALSE) stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
return(FALSE)
} }

View File

@ -1,11 +1,11 @@
# dump model # dump model
xgb.dump <- function(booster, fname, fmap = "") { xgb.dump <- function(booster, fname, fmap = "") {
if (class(booster) != "xgb.Booster") { if (class(booster) != "xgb.Booster") {
stop("xgb.dump: first argument must be type xgb.Booster") stop("xgb.dump: first argument must be type xgb.Booster")
} }
if (typeof(fname) != "character"){ if (typeof(fname) != "character") {
stop("xgb.dump: second argument must be type character") stop("xgb.dump: second argument must be type character")
} }
.Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE="xgboost") .Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE = "xgboost")
return(TRUE) return(TRUE)
} }

View File

@ -1,16 +0,0 @@
# get information from dmatrix
xgb.getinfo <- function(dmat, name) {
if (typeof(name) != "character") {
stop("xgb.getinfo: name must be character")
}
if (class(dmat) != "xgb.DMatrix") {
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
}
if (name != "label" &&
name != "weight" &&
name != "base_margin" ) {
stop(paste("xgb.getinfo: unknown info name", name))
}
ret <- .Call("XGDMatrixGetInfo_R", dmat, name, PACKAGE="xgboost")
return(ret)
}

View File

@ -1,5 +1,5 @@
xgb.load <- function(modelfile) { xgb.load <- function(modelfile) {
if (is.null(modelfile)) if (is.null(modelfile))
stop('xgb.load: modelfile cannot be NULL') stop("xgb.load: modelfile cannot be NULL")
xgb.Booster(modelfile=modelfile) xgb.Booster(modelfile = modelfile)
} }

View File

@ -1,12 +1,12 @@
# save model or DMatrix to file # save model or DMatrix to file
xgb.save <- function(handle, fname) { xgb.save <- function(handle, fname) {
if (typeof(fname) != "character") { if (typeof(fname) != "character") {
stop("xgb.save: fname must be character") stop("xgb.save: fname must be character")
} }
if (class(handle) == "xgb.Booster") { if (class(handle) == "xgb.Booster") {
.Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost") .Call("XGBoosterSaveModel_R", handle, fname, PACKAGE = "xgboost")
return(TRUE) return(TRUE)
} }
stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster") stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
return(FALSE) return(FALSE)
} }

View File

@ -1,38 +1,47 @@
# train a model using given parameters # train a model using given parameters
xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) { xgb.train <- function(params=list(), dtrain, nrounds = 10, watchlist = list(),
if (typeof(params) != "list") { obj = NULL, feval = NULL, ...) {
stop("xgb.train: first argument params must be list"); if (typeof(params) != "list") {
stop("xgb.train: first argument params must be list")
}
if (class(dtrain) != "xgb.DMatrix") {
stop("xgb.train: second argument dtrain must be xgb.DMatrix")
}
params = append(params, list(...))
bst <- xgb.Booster(params, append(watchlist, dtrain))
for (i in 1:nrounds) {
if (is.null(obj)) {
succ <- xgb.iter.update(bst, dtrain, i - 1)
} else {
pred <- xgb.predict(bst, dtrain)
gpair <- obj(pred, dtrain)
succ <- xgb.iter.boost(bst, dtrain, gpair)
} }
if (class(dtrain) != "xgb.DMatrix") { if (length(watchlist) != 0) {
stop("xgb.train: second argument dtrain must be xgb.DMatrix"); if (is.null(feval)) {
} msg <- xgb.iter.eval(bst, watchlist, i - 1)
bst <- xgb.Booster(params, append(watchlist,dtrain)) cat(msg)
for (i in 1:nrounds) { cat("\n")
if (is.null(obj)) { } else {
succ <- xgb.iter.update(bst, dtrain, i-1) cat("[")
} else { cat(i)
pred <- xgb.predict(bst, dtrain) cat("]")
gpair <- obj(pred, dtrain) for (j in 1:length(watchlist)) {
succ <- xgb.iter.boost(bst, dtrain, gpair) w <- watchlist[j]
} if (length(names(w)) == 0) {
if (length(watchlist) != 0) { stop("xgb.eval: name tag must be presented for every elements in watchlist")
if (is.null(feval)) { }
msg <- xgb.iter.eval(bst, watchlist, i-1) ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
cat(msg); cat("\n") cat("\t")
} else { cat(names(w))
cat("["); cat(i); cat("]"); cat("-")
for (j in 1:length(watchlist)) { cat(ret$metric)
w <- watchlist[j] cat(":")
if (length(names(w)) == 0) { cat(ret$value)
stop("xgb.eval: name tag must be presented for every elements in watchlist")
}
ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
cat("\t"); cat(names(w)); cat("-"); cat(ret$metric);
cat(":"); cat(ret$value)
}
cat("\n")
}
} }
cat("\n")
}
} }
return(bst) }
return(bst)
} }

View File

@ -1,41 +1,30 @@
# Main function for xgboost-package # Main function for xgboost-package
xgboost = function(data=NULL, label = NULL, params=list(), nrounds=10, xgboost <- function(data = NULL, label = NULL, params = list(), nrounds = 10,
verbose = 1, ...) verbose = 1, ...) {
{ inClass <- class(data)
inClass = class(data) if (inClass == "dgCMatrix" || inClass == "matrix") {
if (inClass=='dgCMatrix' || inClass=='matrix') if (is.null(label))
{ stop("xgboost: need label when data is a matrix")
if (is.null(label)) dtrain <- xgb.DMatrix(data, label = y)
stop('xgboost: need label when data is a matrix') } else {
dtrain = xgb.DMatrix(data, label=y) if (!is.null(label))
} warning("xgboost: label will be ignored.")
else if (inClass == "character")
{ dtrain <- xgb.DMatrix(data) else if (inClass == "xgb.DMatrix")
if (!is.null(label)) dtrain <- data else stop("xgboost: Invalid input of data")
warning('xgboost: label will be ignored.') }
if (inClass=='character')
dtrain = xgb.DMatrix(data)
else if (inClass=='xgb.DMatrix')
dtrain = data
else
stop('xgboost: Invalid input of data')
}
if (verbose>1) if (verbose > 1)
silent = 0 silent <- 0 else silent <- 1
else
silent = 1
params = append(params, list(silent=silent)) params <- append(params, list(silent = silent))
params = append(params, list(...)) params <- append(params, list(...))
if (verbose>0) if (verbose > 0)
watchlist = list(train=dtrain) watchlist <- list(train = dtrain) else watchlist <- list()
else
watchlist = list()
bst <- xgb.train(params, dtrain, nrounds, watchlist) bst <- xgb.train(params, dtrain, nrounds, watchlist)
return(bst) return(bst)
} }

View File

@ -85,8 +85,8 @@ test.y <- csc$label
test.x <- csc$data test.x <- csc$data
pred <- predict(bst, test.x) pred <- predict(bst, test.x)
# Extrac label with xgb.getinfo # Extrac label with getinfo
labels <- xgb.getinfo(dtest, "label") labels <- getinfo(dtest, "label")
err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels) err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels)
print(paste("error=", err)) print(paste("error=", err))
@ -126,7 +126,7 @@ param <- list(max_depth = 2, eta = 1, silent = 1)
# user define objective function, given prediction, return gradient and second order gradient this is # user define objective function, given prediction, return gradient and second order gradient this is
# loglikelihood loss # loglikelihood loss
logregobj <- function(preds, dtrain) { logregobj <- function(preds, dtrain) {
labels <- xgb.getinfo(dtrain, "label") labels <- getinfo(dtrain, "label")
preds <- 1/(1 + exp(-preds)) preds <- 1/(1 + exp(-preds))
grad <- preds - labels grad <- preds - labels
hess <- preds * (1 - preds) hess <- preds * (1 - preds)
@ -139,7 +139,7 @@ logregobj <- function(preds, dtrain) {
# transformation Take this in mind when you use the customization, and maybe you need write customized # transformation Take this in mind when you use the customization, and maybe you need write customized
# evaluation function # evaluation function
evalerror <- function(preds, dtrain) { evalerror <- function(preds, dtrain) {
labels <- xgb.getinfo(dtrain, "label") labels <- getinfo(dtrain, "label")
err <- as.numeric(sum(labels != (preds > 0)))/length(labels) err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
return(list(metric = "error", value = err)) return(list(metric = "error", value = err))
} }