Merge branch 'master' of ssh://github.com/tqchen/xgboost
This commit is contained in:
commit
9eb32b9dd4
@ -1,12 +1,18 @@
|
||||
importClassesFrom("Matrix", dgCMatrix, dgeMatrix)
|
||||
|
||||
export(xgboost)
|
||||
export(xgb.DMatrix)
|
||||
export(xgb.getinfo)
|
||||
export(xgb.setinfo)
|
||||
export(xgb.Booster)
|
||||
export(xgb.train)
|
||||
export(xgb.save)
|
||||
export(xgb.predict)
|
||||
export(xgb.dump)
|
||||
|
||||
# exportClasses(xgb.Boost)
|
||||
exportMethods(predict)
|
||||
|
||||
# export(xgb.Booster)
|
||||
# export(xgb.train)
|
||||
# export(xgb.save)
|
||||
# export(xgb.predict)
|
||||
# export(xgb.dump)
|
||||
|
||||
|
||||
|
||||
|
||||
16
R-package/R/predict.xgboost.R
Normal file
16
R-package/R/predict.xgboost.R
Normal file
@ -0,0 +1,16 @@
|
||||
#' @export
|
||||
setClass("xgb.Booster")
|
||||
|
||||
#' @export
|
||||
setMethod("predict",
|
||||
signature = "xgb.Booster",
|
||||
definition = function(object, newdata, outputmargin = FALSE)
|
||||
{
|
||||
if (class(newdata) != "xgb.DMatrix") {
|
||||
newdata = xgb.DMatrix(newdata)
|
||||
}
|
||||
ret <- .Call("XGBoosterPredict_R", object, newdata,
|
||||
as.integer(outputmargin), PACKAGE="xgboost")
|
||||
return(ret)
|
||||
})
|
||||
|
||||
222
R-package/R/xgb.Utils.R
Normal file
222
R-package/R/xgb.Utils.R
Normal file
@ -0,0 +1,222 @@
|
||||
# depends on matrix
|
||||
.onLoad <- function(libname, pkgname) {
|
||||
library.dynam("xgboost", pkgname, libname);
|
||||
}
|
||||
.onUnload <- function(libpath) {
|
||||
library.dynam.unload("xgboost", libpath);
|
||||
}
|
||||
|
||||
# constructing DMatrix
|
||||
xgb.DMatrix <- function(data, info=list(), missing=0.0) {
|
||||
if (typeof(data) == "character") {
|
||||
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost")
|
||||
} else if(is.matrix(data)) {
|
||||
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE="xgboost")
|
||||
} else if(class(data) == "dgCMatrix") {
|
||||
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE="xgboost")
|
||||
} else {
|
||||
stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data)))
|
||||
}
|
||||
dmat <- structure(handle, class="xgb.DMatrix")
|
||||
if (length(info) != 0) {
|
||||
for (i in 1:length(info)) {
|
||||
p <- info[i]
|
||||
xgb.setinfo(dmat, names(p), p[[1]])
|
||||
}
|
||||
}
|
||||
return(dmat)
|
||||
}
|
||||
# get information from dmatrix
|
||||
xgb.getinfo <- function(dmat, name) {
|
||||
if (typeof(name) != "character") {
|
||||
stop("xgb.getinfo: name must be character")
|
||||
}
|
||||
if (class(dmat) != "xgb.DMatrix") {
|
||||
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
|
||||
}
|
||||
if (name != "label" &&
|
||||
name != "weight" &&
|
||||
name != "base_margin" ) {
|
||||
stop(paste("xgb.getinfo: unknown info name", name))
|
||||
}
|
||||
ret <- .Call("XGDMatrixGetInfo_R", dmat, name, PACKAGE="xgboost")
|
||||
return(ret)
|
||||
}
|
||||
# set information into dmatrix, this mutate dmatrix
|
||||
xgb.setinfo <- function(dmat, name, info) {
|
||||
if (class(dmat) != "xgb.DMatrix") {
|
||||
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
|
||||
}
|
||||
if (name == "label") {
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "weight") {
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "base_margin") {
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "group") {
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info), PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
stop(pase("xgb.setinfo: unknown info name", name))
|
||||
return(FALSE)
|
||||
}
|
||||
# construct a Booster from cachelist
|
||||
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
||||
if (typeof(cachelist) != "list") {
|
||||
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
||||
}
|
||||
for (dm in cachelist) {
|
||||
if (class(dm) != "xgb.DMatrix") {
|
||||
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
||||
}
|
||||
}
|
||||
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE="xgboost")
|
||||
.Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE="xgboost")
|
||||
if (length(params) != 0) {
|
||||
for (i in 1:length(params)) {
|
||||
p <- params[i]
|
||||
.Call("XGBoosterSetParam_R", handle, names(p), as.character(p), PACKAGE="xgboost")
|
||||
}
|
||||
}
|
||||
if (!is.null(modelfile)) {
|
||||
if (typeof(modelfile) != "character"){
|
||||
stop("xgb.Booster: modelfile must be character");
|
||||
}
|
||||
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE="xgboost")
|
||||
}
|
||||
return(structure(handle, class="xgb.Booster"))
|
||||
}
|
||||
# train a model using given parameters
|
||||
xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) {
|
||||
if (typeof(params) != "list") {
|
||||
stop("xgb.train: first argument params must be list");
|
||||
}
|
||||
if (class(dtrain) != "xgb.DMatrix") {
|
||||
stop("xgb.train: second argument dtrain must be xgb.DMatrix");
|
||||
}
|
||||
bst <- xgb.Booster(params, append(watchlist,dtrain))
|
||||
for (i in 1:nrounds) {
|
||||
if (is.null(obj)) {
|
||||
succ <- xgb.iter.update(bst, dtrain, i-1)
|
||||
} else {
|
||||
pred <- xgb.predict(bst, dtrain)
|
||||
gpair <- obj(pred, dtrain)
|
||||
succ <- xgb.iter.boost(bst, dtrain, gpair)
|
||||
}
|
||||
if (length(watchlist) != 0) {
|
||||
if (is.null(feval)) {
|
||||
msg <- xgb.iter.eval(bst, watchlist, i-1)
|
||||
cat(msg); cat("\n")
|
||||
} else {
|
||||
cat("["); cat(i); cat("]");
|
||||
for (j in 1:length(watchlist)) {
|
||||
w <- watchlist[j]
|
||||
if (length(names(w)) == 0) {
|
||||
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
||||
}
|
||||
ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
|
||||
cat("\t"); cat(names(w)); cat("-"); cat(ret$metric);
|
||||
cat(":"); cat(ret$value)
|
||||
}
|
||||
cat("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
return(bst)
|
||||
}
|
||||
# save model or DMatrix to file
|
||||
xgb.save <- function(handle, fname) {
|
||||
if (typeof(fname) != "character") {
|
||||
stop("xgb.save: fname must be character")
|
||||
}
|
||||
if (class(handle) == "xgb.Booster") {
|
||||
.Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (class(handle) == "xgb.DMatrix") {
|
||||
.Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
|
||||
return(FALSE)
|
||||
}
|
||||
# predict
|
||||
xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
|
||||
if (class(booster) != "xgb.Booster") {
|
||||
stop("xgb.predict: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (class(dmat) != "xgb.DMatrix") {
|
||||
stop("xgb.predict: second argument must be type xgb.DMatrix")
|
||||
}
|
||||
ret <- .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin), PACKAGE="xgboost")
|
||||
return(ret)
|
||||
}
|
||||
# dump model
|
||||
xgb.dump <- function(booster, fname, fmap = "") {
|
||||
if (class(booster) != "xgb.Booster") {
|
||||
stop("xgb.dump: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (typeof(fname) != "character"){
|
||||
stop("xgb.dump: second argument must be type character")
|
||||
}
|
||||
.Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
##--------------------------------------
|
||||
# the following are low level iteratively function, not needed
|
||||
# if you do not want to use them
|
||||
#---------------------------------------
|
||||
# iteratively update booster with dtrain
|
||||
xgb.iter.update <- function(booster, dtrain, iter) {
|
||||
if (class(booster) != "xgb.Booster") {
|
||||
stop("xgb.iter.update: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (class(dtrain) != "xgb.DMatrix") {
|
||||
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
|
||||
}
|
||||
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
# iteratively update booster with customized statistics
|
||||
xgb.iter.boost <- function(booster, dtrain, gpair) {
|
||||
if (class(booster) != "xgb.Booster") {
|
||||
stop("xgb.iter.update: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (class(dtrain) != "xgb.DMatrix") {
|
||||
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
|
||||
}
|
||||
.Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
# iteratively evaluate one iteration
|
||||
xgb.iter.eval <- function(booster, watchlist, iter) {
|
||||
if (class(booster) != "xgb.Booster") {
|
||||
stop("xgb.eval: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (typeof(watchlist) != "list") {
|
||||
stop("xgb.eval: only accepts list of DMatrix as watchlist")
|
||||
}
|
||||
for (w in watchlist) {
|
||||
if (class(w) != "xgb.DMatrix") {
|
||||
stop("xgb.eval: watch list can only contain xgb.DMatrix")
|
||||
}
|
||||
}
|
||||
evnames <- list()
|
||||
if (length(watchlist) != 0) {
|
||||
for (i in 1:length(watchlist)) {
|
||||
w <- watchlist[i]
|
||||
if (length(names(w)) == 0) {
|
||||
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
||||
}
|
||||
evnames <- append(evnames, names(w))
|
||||
}
|
||||
}
|
||||
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames, PACKAGE="xgboost")
|
||||
return(msg)
|
||||
}
|
||||
@ -1,222 +1,24 @@
|
||||
# depends on matrix
|
||||
.onLoad <- function(libname, pkgname) {
|
||||
library.dynam("xgboost", pkgname, libname);
|
||||
}
|
||||
.onUnload <- function(libpath) {
|
||||
library.dynam.unload("xgboost", libpath);
|
||||
}
|
||||
# Main function for xgboost-package
|
||||
|
||||
# constructing DMatrix
|
||||
xgb.DMatrix <- function(data, info=list(), missing=0.0) {
|
||||
if (typeof(data) == "character") {
|
||||
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost")
|
||||
} else if(is.matrix(data)) {
|
||||
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE="xgboost")
|
||||
} else if(class(data) == "dgCMatrix") {
|
||||
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE="xgboost")
|
||||
} else {
|
||||
stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data)))
|
||||
}
|
||||
dmat <- structure(handle, class="xgb.DMatrix")
|
||||
if (length(info) != 0) {
|
||||
for (i in 1:length(info)) {
|
||||
p <- info[i]
|
||||
xgb.setinfo(dmat, names(p), p[[1]])
|
||||
}
|
||||
}
|
||||
return(dmat)
|
||||
}
|
||||
# get information from dmatrix
|
||||
xgb.getinfo <- function(dmat, name) {
|
||||
if (typeof(name) != "character") {
|
||||
stop("xgb.getinfo: name must be character")
|
||||
}
|
||||
if (class(dmat) != "xgb.DMatrix") {
|
||||
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
|
||||
}
|
||||
if (name != "label" &&
|
||||
name != "weight" &&
|
||||
name != "base_margin" ) {
|
||||
stop(paste("xgb.getinfo: unknown info name", name))
|
||||
}
|
||||
ret <- .Call("XGDMatrixGetInfo_R", dmat, name, PACKAGE="xgboost")
|
||||
return(ret)
|
||||
}
|
||||
# set information into dmatrix, this mutate dmatrix
|
||||
xgb.setinfo <- function(dmat, name, info) {
|
||||
if (class(dmat) != "xgb.DMatrix") {
|
||||
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
|
||||
}
|
||||
if (name == "label") {
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "weight") {
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "base_margin") {
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "group") {
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info), PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
stop(pase("xgb.setinfo: unknown info name", name))
|
||||
return(FALSE)
|
||||
}
|
||||
# construct a Booster from cachelist
|
||||
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
||||
if (typeof(cachelist) != "list") {
|
||||
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
||||
}
|
||||
for (dm in cachelist) {
|
||||
if (class(dm) != "xgb.DMatrix") {
|
||||
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
||||
}
|
||||
}
|
||||
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE="xgboost")
|
||||
.Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE="xgboost")
|
||||
if (length(params) != 0) {
|
||||
for (i in 1:length(params)) {
|
||||
p <- params[i]
|
||||
.Call("XGBoosterSetParam_R", handle, names(p), as.character(p), PACKAGE="xgboost")
|
||||
}
|
||||
}
|
||||
if (!is.null(modelfile)) {
|
||||
if (typeof(modelfile) != "character"){
|
||||
stop("xgb.Booster: modelfile must be character");
|
||||
}
|
||||
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE="xgboost")
|
||||
}
|
||||
return(structure(handle, class="xgb.Booster"))
|
||||
}
|
||||
# train a model using given parameters
|
||||
xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) {
|
||||
if (typeof(params) != "list") {
|
||||
stop("xgb.train: first argument params must be list");
|
||||
}
|
||||
if (class(dtrain) != "xgb.DMatrix") {
|
||||
stop("xgb.train: second argument dtrain must be xgb.DMatrix");
|
||||
}
|
||||
bst <- xgb.Booster(params, append(watchlist,dtrain))
|
||||
for (i in 1:nrounds) {
|
||||
if (is.null(obj)) {
|
||||
succ <- xgb.iter.update(bst, dtrain, i-1)
|
||||
} else {
|
||||
pred <- xgb.predict(bst, dtrain)
|
||||
gpair <- obj(pred, dtrain)
|
||||
succ <- xgb.iter.boost(bst, dtrain, gpair)
|
||||
}
|
||||
if (length(watchlist) != 0) {
|
||||
if (is.null(feval)) {
|
||||
msg <- xgb.iter.eval(bst, watchlist, i-1)
|
||||
cat(msg); cat("\n")
|
||||
} else {
|
||||
cat("["); cat(i); cat("]");
|
||||
for (j in 1:length(watchlist)) {
|
||||
w <- watchlist[j]
|
||||
if (length(names(w)) == 0) {
|
||||
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
||||
}
|
||||
ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
|
||||
cat("\t"); cat(names(w)); cat("-"); cat(ret$metric);
|
||||
cat(":"); cat(ret$value)
|
||||
}
|
||||
cat("\n")
|
||||
}
|
||||
xgboost = function(x=NULL,y=NULL,file=NULL,nrounds=10,params,watchlist=list(),
|
||||
obj=NULL, feval=NULL, margin=NULL)
|
||||
{
|
||||
if (is.null(x) && is.null(y))
|
||||
{
|
||||
if (is.null(file))
|
||||
stop('xgboost need input data, either R objects or local files.')
|
||||
dtrain = xgb.DMatrix(file)
|
||||
}
|
||||
else
|
||||
dtrain = xgb.DMatrix(x, info=list(label=y))
|
||||
if (!is.null(margin))
|
||||
{
|
||||
succ <- xgb.setinfo(dtrain, "base_margin", margin)
|
||||
if (!succ)
|
||||
warning('Attemp to use margin failed.')
|
||||
}
|
||||
bst <- xgb.train(params, dtrain, nrounds, watchlist, obj, feval)
|
||||
return(bst)
|
||||
}
|
||||
# save model or DMatrix to file
|
||||
xgb.save <- function(handle, fname) {
|
||||
if (typeof(fname) != "character") {
|
||||
stop("xgb.save: fname must be character")
|
||||
}
|
||||
if (class(handle) == "xgb.Booster") {
|
||||
.Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (class(handle) == "xgb.DMatrix") {
|
||||
.Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
|
||||
return(FALSE)
|
||||
}
|
||||
# predict
|
||||
xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
|
||||
if (class(booster) != "xgb.Booster") {
|
||||
stop("xgb.predict: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (class(dmat) != "xgb.DMatrix") {
|
||||
stop("xgb.predict: second argument must be type xgb.DMatrix")
|
||||
}
|
||||
ret <- .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin), PACKAGE="xgboost")
|
||||
return(ret)
|
||||
}
|
||||
# dump model
|
||||
xgb.dump <- function(booster, fname, fmap = "") {
|
||||
if (class(booster) != "xgb.Booster") {
|
||||
stop("xgb.dump: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (typeof(fname) != "character"){
|
||||
stop("xgb.dump: second argument must be type character")
|
||||
}
|
||||
.Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
##--------------------------------------
|
||||
# the following are low level iteratively function, not needed
|
||||
# if you do not want to use them
|
||||
#---------------------------------------
|
||||
# iteratively update booster with dtrain
|
||||
xgb.iter.update <- function(booster, dtrain, iter) {
|
||||
if (class(booster) != "xgb.Booster") {
|
||||
stop("xgb.iter.update: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (class(dtrain) != "xgb.DMatrix") {
|
||||
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
|
||||
}
|
||||
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
# iteratively update booster with customized statistics
|
||||
xgb.iter.boost <- function(booster, dtrain, gpair) {
|
||||
if (class(booster) != "xgb.Booster") {
|
||||
stop("xgb.iter.update: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (class(dtrain) != "xgb.DMatrix") {
|
||||
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
|
||||
}
|
||||
.Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
# iteratively evaluate one iteration
|
||||
xgb.iter.eval <- function(booster, watchlist, iter) {
|
||||
if (class(booster) != "xgb.Booster") {
|
||||
stop("xgb.eval: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (typeof(watchlist) != "list") {
|
||||
stop("xgb.eval: only accepts list of DMatrix as watchlist")
|
||||
}
|
||||
for (w in watchlist) {
|
||||
if (class(w) != "xgb.DMatrix") {
|
||||
stop("xgb.eval: watch list can only contain xgb.DMatrix")
|
||||
}
|
||||
}
|
||||
evnames <- list()
|
||||
if (length(watchlist) != 0) {
|
||||
for (i in 1:length(watchlist)) {
|
||||
w <- watchlist[i]
|
||||
if (length(names(w)) == 0) {
|
||||
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
||||
}
|
||||
evnames <- append(evnames, names(w))
|
||||
}
|
||||
}
|
||||
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames, PACKAGE="xgboost")
|
||||
return(msg)
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,4 +1,10 @@
|
||||
This is subfolder for experimental version of R package
|
||||
This is subfolder for experimental version of R package.
|
||||
|
||||
Not yet ready
|
||||
Not yet ready.
|
||||
|
||||
Installation:
|
||||
|
||||
```r
|
||||
require(devtools)
|
||||
install_github('xgboost','tqchen',subdir='R-package')
|
||||
```
|
||||
|
||||
127
R-package/inst/examples/demo-old.R
Normal file
127
R-package/inst/examples/demo-old.R
Normal file
@ -0,0 +1,127 @@
|
||||
# load xgboost library
|
||||
require(xgboost)
|
||||
require(methods)
|
||||
|
||||
# helper function to read libsvm format
|
||||
# this is very badly written, load in dense, and convert to sparse
|
||||
# use this only for demo purpose
|
||||
# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
|
||||
read.libsvm <- function(fname, maxcol) {
|
||||
content <- readLines(fname)
|
||||
nline <- length(content)
|
||||
label <- numeric(nline)
|
||||
mat <- matrix(0, nline, maxcol+1)
|
||||
for (i in 1:nline) {
|
||||
arr <- as.vector(strsplit(content[i], " ")[[1]])
|
||||
label[i] <- as.numeric(arr[[1]])
|
||||
for (j in 2:length(arr)) {
|
||||
kv <- strsplit(arr[j], ":")[[1]]
|
||||
# to avoid 0 index
|
||||
findex <- as.integer(kv[1]) + 1
|
||||
fvalue <- as.numeric(kv[2])
|
||||
mat[i,findex] <- fvalue
|
||||
}
|
||||
}
|
||||
mat <- as(mat, "sparseMatrix")
|
||||
return(list(label=label, data=mat))
|
||||
}
|
||||
|
||||
# test code here
|
||||
dtrain <- xgb.DMatrix("agaricus.txt.train")
|
||||
dtest <- xgb.DMatrix("agaricus.txt.test")
|
||||
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
||||
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||
# training xgboost model
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||
# make prediction
|
||||
preds <- xgb.predict(bst, dtest)
|
||||
labels <- xgb.getinfo(dtest, "label")
|
||||
err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
|
||||
# print error rate
|
||||
print(paste("error=",err))
|
||||
|
||||
# dump model
|
||||
xgb.dump(bst, "dump.raw.txt")
|
||||
# dump model with feature map
|
||||
xgb.dump(bst, "dump.nice.txt", "featmap.txt")
|
||||
|
||||
# save dmatrix into binary buffer
|
||||
succ <- xgb.save(dtest, "dtest.buffer")
|
||||
# save model into file
|
||||
succ <- xgb.save(bst, "xgb.model")
|
||||
# load model and data in
|
||||
bst2 <- xgb.Booster(modelfile="xgb.model")
|
||||
dtest2 <- xgb.DMatrix("dtest.buffer")
|
||||
preds2 <- xgb.predict(bst2, dtest2)
|
||||
# assert they are the same
|
||||
stopifnot(sum(abs(preds2-preds)) == 0)
|
||||
|
||||
###
|
||||
# build dmatrix from sparseMatrix
|
||||
###
|
||||
print ('start running example of build DMatrix from R.sparseMatrix')
|
||||
csc <- read.libsvm("agaricus.txt.train", 126)
|
||||
label <- csc$label
|
||||
data <- csc$data
|
||||
dtrain <- xgb.DMatrix(data, info=list(label=label) )
|
||||
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||
|
||||
###
|
||||
# build dmatrix from dense matrix
|
||||
###
|
||||
print ('start running example of build DMatrix from R.Matrix')
|
||||
mat = as.matrix(data)
|
||||
dtrain <- xgb.DMatrix(mat, info=list(label=label) )
|
||||
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||
|
||||
###
|
||||
# advanced: cutomsized loss function
|
||||
#
|
||||
print("start running example to used cutomized objective function")
|
||||
# note: for customized objective function, we leave objective as default
|
||||
# note: what we are getting is margin value in prediction
|
||||
# you must know what you are doing
|
||||
param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1)
|
||||
# user define objective function, given prediction, return gradient and second order gradient
|
||||
# this is loglikelihood loss
|
||||
logregobj <- function(preds, dtrain) {
|
||||
labels <- xgb.getinfo(dtrain, "label")
|
||||
preds <- 1.0 / (1.0 + exp(-preds))
|
||||
grad <- preds - labels
|
||||
hess <- preds * (1.0-preds)
|
||||
return(list(grad=grad, hess=hess))
|
||||
}
|
||||
# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
|
||||
# NOTE: when you do customized loss function, the default prediction value is margin
|
||||
# this may make buildin evalution metric not function properly
|
||||
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
||||
# the buildin evaluation error assumes input is after logistic transformation
|
||||
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||
evalerror <- function(preds, dtrain) {
|
||||
labels <- xgb.getinfo(dtrain, "label")
|
||||
err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
|
||||
return(list(metric="error", value=err))
|
||||
}
|
||||
|
||||
# training with customized objective, we can also do step by step training
|
||||
# simply look at xgboost.py"s implementation of train
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
|
||||
|
||||
###
|
||||
# advanced: start from a initial base prediction
|
||||
#
|
||||
print ("start running example to start from a initial prediction")
|
||||
# specify parameters via map, definition are same as c++ version
|
||||
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
||||
# train xgboost for 1 round
|
||||
bst <- xgb.train( param, dtrain, 1, watchlist )
|
||||
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
||||
# do predict with output_margin=True, will always give you margin values before logistic transformation
|
||||
ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE)
|
||||
ptest <- xgb.predict(bst, dtest, outputmargin=TRUE)
|
||||
succ <- xgb.setinfo(dtrain, "base_margin", ptrain)
|
||||
succ <- xgb.setinfo(dtest, "base_margin", ptest)
|
||||
print ("this is result of running from initial prediction")
|
||||
bst <- xgb.train( param, dtrain, 1, watchlist )
|
||||
@ -1,96 +1,82 @@
|
||||
# load xgboost library
|
||||
require(xgboost)
|
||||
require(methods)
|
||||
|
||||
# helper function to read libsvm format
|
||||
# this is very badly written, load in dense, and convert to sparse
|
||||
# use this only for demo purpose
|
||||
# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
|
||||
read.libsvm <- function(fname, maxcol) {
|
||||
content <- readLines(fname)
|
||||
nline <- length(content)
|
||||
label <- numeric(nline)
|
||||
mat <- matrix(0, nline, maxcol+1)
|
||||
read.libsvm = function(fname, maxcol) {
|
||||
content = readLines(fname)
|
||||
nline = length(content)
|
||||
label = numeric(nline)
|
||||
mat = matrix(0, nline, maxcol+1)
|
||||
for (i in 1:nline) {
|
||||
arr <- as.vector(strsplit(content[i], " ")[[1]])
|
||||
label[i] <- as.numeric(arr[[1]])
|
||||
arr = as.vector(strsplit(content[i], " ")[[1]])
|
||||
label[i] = as.numeric(arr[[1]])
|
||||
for (j in 2:length(arr)) {
|
||||
kv <- strsplit(arr[j], ":")[[1]]
|
||||
kv = strsplit(arr[j], ":")[[1]]
|
||||
# to avoid 0 index
|
||||
findex <- as.integer(kv[1]) + 1
|
||||
fvalue <- as.numeric(kv[2])
|
||||
mat[i,findex] <- fvalue
|
||||
findex = as.integer(kv[1]) + 1
|
||||
fvalue = as.numeric(kv[2])
|
||||
mat[i,findex] = fvalue
|
||||
}
|
||||
}
|
||||
mat <- as(mat, "sparseMatrix")
|
||||
mat = as(mat, "sparseMatrix")
|
||||
return(list(label=label, data=mat))
|
||||
}
|
||||
|
||||
# test code here
|
||||
# Parameter setting
|
||||
dtrain <- xgb.DMatrix("agaricus.txt.train")
|
||||
dtest <- xgb.DMatrix("agaricus.txt.test")
|
||||
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
||||
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||
# training xgboost model
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||
# make prediction
|
||||
preds <- xgb.predict(bst, dtest)
|
||||
labels <- xgb.getinfo(dtest, "label")
|
||||
err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
|
||||
# print error rate
|
||||
watchlist = list("eval"=dtest,"train"=dtrain)
|
||||
|
||||
###########################
|
||||
# Train from local file
|
||||
###########################
|
||||
|
||||
# Training
|
||||
bst = xgboost(file='agaricus.txt.train',params=param,watchlist=watchlist)
|
||||
# Prediction
|
||||
pred = predict(bst, 'agaricus.txt.test')
|
||||
# Performance
|
||||
labels = xgb.getinfo(dtest, "label")
|
||||
err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
|
||||
print(paste("error=",err))
|
||||
|
||||
# dump model
|
||||
xgb.dump(bst, "dump.raw.txt")
|
||||
# dump model with feature map
|
||||
xgb.dump(bst, "dump.nice.txt", "featmap.txt")
|
||||
###########################
|
||||
# Train from R object
|
||||
###########################
|
||||
|
||||
# save dmatrix into binary buffer
|
||||
succ <- xgb.save(dtest, "dtest.buffer")
|
||||
# save model into file
|
||||
succ <- xgb.save(bst, "xgb.model")
|
||||
# load model and data in
|
||||
bst2 <- xgb.Booster(modelfile="xgb.model")
|
||||
dtest2 <- xgb.DMatrix("dtest.buffer")
|
||||
preds2 <- xgb.predict(bst2, dtest2)
|
||||
# assert they are the same
|
||||
stopifnot(sum(abs(preds2-preds)) == 0)
|
||||
csc = read.libsvm("agaricus.txt.train", 126)
|
||||
y = csc$label
|
||||
x = csc$data
|
||||
# x as Sparse Matrix
|
||||
class(x)
|
||||
|
||||
###
|
||||
# build dmatrix from sparseMatrix
|
||||
###
|
||||
print ('start running example of build DMatrix from R.sparseMatrix')
|
||||
csc <- read.libsvm("agaricus.txt.train", 126)
|
||||
label <- csc$label
|
||||
data <- csc$data
|
||||
dtrain <- xgb.DMatrix(data, info=list(label=label) )
|
||||
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||
# Training
|
||||
bst = xgboost(x,y,params=param,watchlist=watchlist)
|
||||
# Prediction
|
||||
pred = predict(bst, 'agaricus.txt.test')
|
||||
# Performance
|
||||
labels = xgb.getinfo(dtest, "label")
|
||||
err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
|
||||
print(paste("error=",err))
|
||||
|
||||
###
|
||||
# build dmatrix from dense matrix
|
||||
###
|
||||
print ('start running example of build DMatrix from R.Matrix')
|
||||
mat = as.matrix(data)
|
||||
dtrain <- xgb.DMatrix(mat, info=list(label=label) )
|
||||
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||
# Training with dense matrix
|
||||
x = as.matrix(x)
|
||||
bst = xgboost(x,y,params=param,watchlist=watchlist)
|
||||
|
||||
###########################
|
||||
# Train with customization
|
||||
###########################
|
||||
|
||||
###
|
||||
# advanced: cutomsized loss function
|
||||
#
|
||||
print("start running example to used cutomized objective function")
|
||||
# note: for customized objective function, we leave objective as default
|
||||
# note: what we are getting is margin value in prediction
|
||||
# you must know what you are doing
|
||||
param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1)
|
||||
# user define objective function, given prediction, return gradient and second order gradient
|
||||
# this is loglikelihood loss
|
||||
logregobj <- function(preds, dtrain) {
|
||||
labels <- xgb.getinfo(dtrain, "label")
|
||||
preds <- 1.0 / (1.0 + exp(-preds))
|
||||
grad <- preds - labels
|
||||
hess <- preds * (1.0-preds)
|
||||
logregobj = function(preds, dtrain) {
|
||||
labels = xgb.getinfo(dtrain, "label")
|
||||
preds = 1.0 / (1.0 + exp(-preds))
|
||||
grad = preds - labels
|
||||
hess = preds * (1.0-preds)
|
||||
return(list(grad=grad, hess=hess))
|
||||
}
|
||||
# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
|
||||
@ -99,29 +85,18 @@ logregobj <- function(preds, dtrain) {
|
||||
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
||||
# the buildin evaluation error assumes input is after logistic transformation
|
||||
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||
evalerror <- function(preds, dtrain) {
|
||||
labels <- xgb.getinfo(dtrain, "label")
|
||||
err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
|
||||
evalerror = function(preds, dtrain) {
|
||||
labels = xgb.getinfo(dtrain, "label")
|
||||
err = as.numeric(sum(labels != (preds > 0.0))) / length(labels)
|
||||
return(list(metric="error", value=err))
|
||||
}
|
||||
|
||||
# training with customized objective, we can also do step by step training
|
||||
# simply look at xgboost.py"s implementation of train
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
|
||||
bst = xgboost(x,y,params=param,watchlist=watchlist,obj=logregobj, feval=evalerror)
|
||||
|
||||
###
|
||||
# advanced: start from a initial base prediction
|
||||
#
|
||||
print ("start running example to start from a initial prediction")
|
||||
# specify parameters via map, definition are same as c++ version
|
||||
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
||||
# train xgboost for 1 round
|
||||
bst <- xgb.train( param, dtrain, 1, watchlist )
|
||||
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
||||
# do predict with output_margin=True, will always give you margin values before logistic transformation
|
||||
ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE)
|
||||
ptest <- xgb.predict(bst, dtest, outputmargin=TRUE)
|
||||
succ <- xgb.setinfo(dtrain, "base_margin", ptrain)
|
||||
succ <- xgb.setinfo(dtest, "base_margin", ptest)
|
||||
print ("this is result of running from initial prediction")
|
||||
bst <- xgb.train( param, dtrain, 1, watchlist )
|
||||
############################
|
||||
# Train with previous result
|
||||
############################
|
||||
|
||||
bst = xgboost(x,y,params=param,watchlist=watchlist)
|
||||
pred = predict(bst, 'agaricus.txt.train', outputmargin=TRUE)
|
||||
bst2 = xgboost(x,y,params=param,watchlist=watchlist,margin=pred)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user