export fewer functions to user and optimize parameter setting
This commit is contained in:
parent
5f6d5d19b8
commit
4940fff55b
@ -1,12 +1,15 @@
|
|||||||
importClassesFrom("Matrix", dgCMatrix, dgeMatrix)
|
importClassesFrom("Matrix", dgCMatrix, dgeMatrix)
|
||||||
|
|
||||||
|
export(xgboost)
|
||||||
export(xgb.DMatrix)
|
export(xgb.DMatrix)
|
||||||
export(xgb.getinfo)
|
export(xgb.getinfo)
|
||||||
export(xgb.setinfo)
|
export(xgb.setinfo)
|
||||||
export(xgb.Booster)
|
|
||||||
export(xgb.train)
|
# export(xgb.Booster)
|
||||||
export(xgb.save)
|
# export(xgb.train)
|
||||||
export(xgb.predict)
|
# export(xgb.save)
|
||||||
export(xgb.dump)
|
# export(xgb.predict)
|
||||||
|
# export(xgb.dump)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
16
R-package/R/predict.xgboost.R
Normal file
16
R-package/R/predict.xgboost.R
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#' @export
|
||||||
|
setClass("xgb.Booster")
|
||||||
|
|
||||||
|
#' @export
|
||||||
|
setMethod("predict",
|
||||||
|
signature = "xgb.Booster",
|
||||||
|
definition = function(object, newdata, outputmargin = FALSE)
|
||||||
|
{
|
||||||
|
if (class(newdata) != "xgb.DMatrix") {
|
||||||
|
newdata = xgb.DMatrix(newdata)
|
||||||
|
}
|
||||||
|
ret <- .Call("XGBoosterPredict_R", object, newdata,
|
||||||
|
as.integer(outputmargin), PACKAGE="xgboost")
|
||||||
|
return(ret)
|
||||||
|
})
|
||||||
|
|
||||||
222
R-package/R/xgb.Utils.R
Normal file
222
R-package/R/xgb.Utils.R
Normal file
@ -0,0 +1,222 @@
|
|||||||
|
# depends on matrix
|
||||||
|
.onLoad <- function(libname, pkgname) {
|
||||||
|
library.dynam("xgboost", pkgname, libname);
|
||||||
|
}
|
||||||
|
.onUnload <- function(libpath) {
|
||||||
|
library.dynam.unload("xgboost", libpath);
|
||||||
|
}
|
||||||
|
|
||||||
|
# constructing DMatrix
|
||||||
|
xgb.DMatrix <- function(data, info=list(), missing=0.0) {
|
||||||
|
if (typeof(data) == "character") {
|
||||||
|
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost")
|
||||||
|
} else if(is.matrix(data)) {
|
||||||
|
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE="xgboost")
|
||||||
|
} else if(class(data) == "dgCMatrix") {
|
||||||
|
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE="xgboost")
|
||||||
|
} else {
|
||||||
|
stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data)))
|
||||||
|
}
|
||||||
|
dmat <- structure(handle, class="xgb.DMatrix")
|
||||||
|
if (length(info) != 0) {
|
||||||
|
for (i in 1:length(info)) {
|
||||||
|
p <- info[i]
|
||||||
|
xgb.setinfo(dmat, names(p), p[[1]])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(dmat)
|
||||||
|
}
|
||||||
|
# get information from dmatrix
|
||||||
|
xgb.getinfo <- function(dmat, name) {
|
||||||
|
if (typeof(name) != "character") {
|
||||||
|
stop("xgb.getinfo: name must be character")
|
||||||
|
}
|
||||||
|
if (class(dmat) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
|
||||||
|
}
|
||||||
|
if (name != "label" &&
|
||||||
|
name != "weight" &&
|
||||||
|
name != "base_margin" ) {
|
||||||
|
stop(paste("xgb.getinfo: unknown info name", name))
|
||||||
|
}
|
||||||
|
ret <- .Call("XGDMatrixGetInfo_R", dmat, name, PACKAGE="xgboost")
|
||||||
|
return(ret)
|
||||||
|
}
|
||||||
|
# set information into dmatrix, this mutate dmatrix
|
||||||
|
xgb.setinfo <- function(dmat, name, info) {
|
||||||
|
if (class(dmat) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
|
||||||
|
}
|
||||||
|
if (name == "label") {
|
||||||
|
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
if (name == "weight") {
|
||||||
|
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
if (name == "base_margin") {
|
||||||
|
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
if (name == "group") {
|
||||||
|
.Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info), PACKAGE="xgboost")
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
stop(pase("xgb.setinfo: unknown info name", name))
|
||||||
|
return(FALSE)
|
||||||
|
}
|
||||||
|
# construct a Booster from cachelist
|
||||||
|
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
||||||
|
if (typeof(cachelist) != "list") {
|
||||||
|
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
||||||
|
}
|
||||||
|
for (dm in cachelist) {
|
||||||
|
if (class(dm) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE="xgboost")
|
||||||
|
.Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE="xgboost")
|
||||||
|
if (length(params) != 0) {
|
||||||
|
for (i in 1:length(params)) {
|
||||||
|
p <- params[i]
|
||||||
|
.Call("XGBoosterSetParam_R", handle, names(p), as.character(p), PACKAGE="xgboost")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!is.null(modelfile)) {
|
||||||
|
if (typeof(modelfile) != "character"){
|
||||||
|
stop("xgb.Booster: modelfile must be character");
|
||||||
|
}
|
||||||
|
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE="xgboost")
|
||||||
|
}
|
||||||
|
return(structure(handle, class="xgb.Booster"))
|
||||||
|
}
|
||||||
|
# train a model using given parameters
|
||||||
|
xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) {
|
||||||
|
if (typeof(params) != "list") {
|
||||||
|
stop("xgb.train: first argument params must be list");
|
||||||
|
}
|
||||||
|
if (class(dtrain) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.train: second argument dtrain must be xgb.DMatrix");
|
||||||
|
}
|
||||||
|
bst <- xgb.Booster(params, append(watchlist,dtrain))
|
||||||
|
for (i in 1:nrounds) {
|
||||||
|
if (is.null(obj)) {
|
||||||
|
succ <- xgb.iter.update(bst, dtrain, i-1)
|
||||||
|
} else {
|
||||||
|
pred <- xgb.predict(bst, dtrain)
|
||||||
|
gpair <- obj(pred, dtrain)
|
||||||
|
succ <- xgb.iter.boost(bst, dtrain, gpair)
|
||||||
|
}
|
||||||
|
if (length(watchlist) != 0) {
|
||||||
|
if (is.null(feval)) {
|
||||||
|
msg <- xgb.iter.eval(bst, watchlist, i-1)
|
||||||
|
cat(msg); cat("\n")
|
||||||
|
} else {
|
||||||
|
cat("["); cat(i); cat("]");
|
||||||
|
for (j in 1:length(watchlist)) {
|
||||||
|
w <- watchlist[j]
|
||||||
|
if (length(names(w)) == 0) {
|
||||||
|
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
||||||
|
}
|
||||||
|
ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
|
||||||
|
cat("\t"); cat(names(w)); cat("-"); cat(ret$metric);
|
||||||
|
cat(":"); cat(ret$value)
|
||||||
|
}
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(bst)
|
||||||
|
}
|
||||||
|
# save model or DMatrix to file
|
||||||
|
xgb.save <- function(handle, fname) {
|
||||||
|
if (typeof(fname) != "character") {
|
||||||
|
stop("xgb.save: fname must be character")
|
||||||
|
}
|
||||||
|
if (class(handle) == "xgb.Booster") {
|
||||||
|
.Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost")
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
if (class(handle) == "xgb.DMatrix") {
|
||||||
|
.Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
|
||||||
|
return(FALSE)
|
||||||
|
}
|
||||||
|
# predict
|
||||||
|
xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
|
||||||
|
if (class(booster) != "xgb.Booster") {
|
||||||
|
stop("xgb.predict: first argument must be type xgb.Booster")
|
||||||
|
}
|
||||||
|
if (class(dmat) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.predict: second argument must be type xgb.DMatrix")
|
||||||
|
}
|
||||||
|
ret <- .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin), PACKAGE="xgboost")
|
||||||
|
return(ret)
|
||||||
|
}
|
||||||
|
# dump model
|
||||||
|
xgb.dump <- function(booster, fname, fmap = "") {
|
||||||
|
if (class(booster) != "xgb.Booster") {
|
||||||
|
stop("xgb.dump: first argument must be type xgb.Booster")
|
||||||
|
}
|
||||||
|
if (typeof(fname) != "character"){
|
||||||
|
stop("xgb.dump: second argument must be type character")
|
||||||
|
}
|
||||||
|
.Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE="xgboost")
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
##--------------------------------------
|
||||||
|
# the following are low level iteratively function, not needed
|
||||||
|
# if you do not want to use them
|
||||||
|
#---------------------------------------
|
||||||
|
# iteratively update booster with dtrain
|
||||||
|
xgb.iter.update <- function(booster, dtrain, iter) {
|
||||||
|
if (class(booster) != "xgb.Booster") {
|
||||||
|
stop("xgb.iter.update: first argument must be type xgb.Booster")
|
||||||
|
}
|
||||||
|
if (class(dtrain) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
|
||||||
|
}
|
||||||
|
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, PACKAGE="xgboost")
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
# iteratively update booster with customized statistics
|
||||||
|
xgb.iter.boost <- function(booster, dtrain, gpair) {
|
||||||
|
if (class(booster) != "xgb.Booster") {
|
||||||
|
stop("xgb.iter.update: first argument must be type xgb.Booster")
|
||||||
|
}
|
||||||
|
if (class(dtrain) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
|
||||||
|
}
|
||||||
|
.Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE="xgboost")
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
# iteratively evaluate one iteration
|
||||||
|
xgb.iter.eval <- function(booster, watchlist, iter) {
|
||||||
|
if (class(booster) != "xgb.Booster") {
|
||||||
|
stop("xgb.eval: first argument must be type xgb.Booster")
|
||||||
|
}
|
||||||
|
if (typeof(watchlist) != "list") {
|
||||||
|
stop("xgb.eval: only accepts list of DMatrix as watchlist")
|
||||||
|
}
|
||||||
|
for (w in watchlist) {
|
||||||
|
if (class(w) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.eval: watch list can only contain xgb.DMatrix")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
evnames <- list()
|
||||||
|
if (length(watchlist) != 0) {
|
||||||
|
for (i in 1:length(watchlist)) {
|
||||||
|
w <- watchlist[i]
|
||||||
|
if (length(names(w)) == 0) {
|
||||||
|
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
||||||
|
}
|
||||||
|
evnames <- append(evnames, names(w))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames, PACKAGE="xgboost")
|
||||||
|
return(msg)
|
||||||
|
}
|
||||||
@ -1,222 +1,24 @@
|
|||||||
# depends on matrix
|
# Main function for xgboost-package
|
||||||
.onLoad <- function(libname, pkgname) {
|
|
||||||
library.dynam("xgboost", pkgname, libname);
|
|
||||||
}
|
|
||||||
.onUnload <- function(libpath) {
|
|
||||||
library.dynam.unload("xgboost", libpath);
|
|
||||||
}
|
|
||||||
|
|
||||||
# constructing DMatrix
|
xgboost = function(x=NULL,y=NULL,file=NULL,nrounds=10,params,watchlist=list(),
|
||||||
xgb.DMatrix <- function(data, info=list(), missing=0.0) {
|
obj=NULL, feval=NULL, margin=NULL)
|
||||||
if (typeof(data) == "character") {
|
{
|
||||||
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost")
|
if (is.null(x) && is.null(y))
|
||||||
} else if(is.matrix(data)) {
|
{
|
||||||
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE="xgboost")
|
if (is.null(file))
|
||||||
} else if(class(data) == "dgCMatrix") {
|
stop('xgboost need input data, either R objects or local files.')
|
||||||
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE="xgboost")
|
dtrain = xgb.DMatrix(file)
|
||||||
} else {
|
|
||||||
stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data)))
|
|
||||||
}
|
|
||||||
dmat <- structure(handle, class="xgb.DMatrix")
|
|
||||||
if (length(info) != 0) {
|
|
||||||
for (i in 1:length(info)) {
|
|
||||||
p <- info[i]
|
|
||||||
xgb.setinfo(dmat, names(p), p[[1]])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return(dmat)
|
|
||||||
}
|
|
||||||
# get information from dmatrix
|
|
||||||
xgb.getinfo <- function(dmat, name) {
|
|
||||||
if (typeof(name) != "character") {
|
|
||||||
stop("xgb.getinfo: name must be character")
|
|
||||||
}
|
|
||||||
if (class(dmat) != "xgb.DMatrix") {
|
|
||||||
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
|
|
||||||
}
|
|
||||||
if (name != "label" &&
|
|
||||||
name != "weight" &&
|
|
||||||
name != "base_margin" ) {
|
|
||||||
stop(paste("xgb.getinfo: unknown info name", name))
|
|
||||||
}
|
|
||||||
ret <- .Call("XGDMatrixGetInfo_R", dmat, name, PACKAGE="xgboost")
|
|
||||||
return(ret)
|
|
||||||
}
|
|
||||||
# set information into dmatrix, this mutate dmatrix
|
|
||||||
xgb.setinfo <- function(dmat, name, info) {
|
|
||||||
if (class(dmat) != "xgb.DMatrix") {
|
|
||||||
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
|
|
||||||
}
|
|
||||||
if (name == "label") {
|
|
||||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
|
|
||||||
return(TRUE)
|
|
||||||
}
|
|
||||||
if (name == "weight") {
|
|
||||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
|
|
||||||
return(TRUE)
|
|
||||||
}
|
|
||||||
if (name == "base_margin") {
|
|
||||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
|
|
||||||
return(TRUE)
|
|
||||||
}
|
|
||||||
if (name == "group") {
|
|
||||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info), PACKAGE="xgboost")
|
|
||||||
return(TRUE)
|
|
||||||
}
|
|
||||||
stop(pase("xgb.setinfo: unknown info name", name))
|
|
||||||
return(FALSE)
|
|
||||||
}
|
|
||||||
# construct a Booster from cachelist
|
|
||||||
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
|
||||||
if (typeof(cachelist) != "list") {
|
|
||||||
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
|
||||||
}
|
|
||||||
for (dm in cachelist) {
|
|
||||||
if (class(dm) != "xgb.DMatrix") {
|
|
||||||
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE="xgboost")
|
|
||||||
.Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE="xgboost")
|
|
||||||
if (length(params) != 0) {
|
|
||||||
for (i in 1:length(params)) {
|
|
||||||
p <- params[i]
|
|
||||||
.Call("XGBoosterSetParam_R", handle, names(p), as.character(p), PACKAGE="xgboost")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!is.null(modelfile)) {
|
|
||||||
if (typeof(modelfile) != "character"){
|
|
||||||
stop("xgb.Booster: modelfile must be character");
|
|
||||||
}
|
|
||||||
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE="xgboost")
|
|
||||||
}
|
|
||||||
return(structure(handle, class="xgb.Booster"))
|
|
||||||
}
|
|
||||||
# train a model using given parameters
|
|
||||||
xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) {
|
|
||||||
if (typeof(params) != "list") {
|
|
||||||
stop("xgb.train: first argument params must be list");
|
|
||||||
}
|
|
||||||
if (class(dtrain) != "xgb.DMatrix") {
|
|
||||||
stop("xgb.train: second argument dtrain must be xgb.DMatrix");
|
|
||||||
}
|
|
||||||
bst <- xgb.Booster(params, append(watchlist,dtrain))
|
|
||||||
for (i in 1:nrounds) {
|
|
||||||
if (is.null(obj)) {
|
|
||||||
succ <- xgb.iter.update(bst, dtrain, i-1)
|
|
||||||
} else {
|
|
||||||
pred <- xgb.predict(bst, dtrain)
|
|
||||||
gpair <- obj(pred, dtrain)
|
|
||||||
succ <- xgb.iter.boost(bst, dtrain, gpair)
|
|
||||||
}
|
|
||||||
if (length(watchlist) != 0) {
|
|
||||||
if (is.null(feval)) {
|
|
||||||
msg <- xgb.iter.eval(bst, watchlist, i-1)
|
|
||||||
cat(msg); cat("\n")
|
|
||||||
} else {
|
|
||||||
cat("["); cat(i); cat("]");
|
|
||||||
for (j in 1:length(watchlist)) {
|
|
||||||
w <- watchlist[j]
|
|
||||||
if (length(names(w)) == 0) {
|
|
||||||
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
|
||||||
}
|
|
||||||
ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
|
|
||||||
cat("\t"); cat(names(w)); cat("-"); cat(ret$metric);
|
|
||||||
cat(":"); cat(ret$value)
|
|
||||||
}
|
|
||||||
cat("\n")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
dtrain = xgb.DMatrix(x, info=list(label=y))
|
||||||
|
if (!is.null(margin))
|
||||||
|
{
|
||||||
|
succ <- xgb.setinfo(dtrain, "base_margin", margin)
|
||||||
|
if (!succ)
|
||||||
|
warning('Attemp to use margin failed.')
|
||||||
}
|
}
|
||||||
|
bst <- xgb.train(params, dtrain, nrounds, watchlist, obj, feval)
|
||||||
return(bst)
|
return(bst)
|
||||||
}
|
}
|
||||||
# save model or DMatrix to file
|
|
||||||
xgb.save <- function(handle, fname) {
|
|
||||||
if (typeof(fname) != "character") {
|
|
||||||
stop("xgb.save: fname must be character")
|
|
||||||
}
|
|
||||||
if (class(handle) == "xgb.Booster") {
|
|
||||||
.Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost")
|
|
||||||
return(TRUE)
|
|
||||||
}
|
|
||||||
if (class(handle) == "xgb.DMatrix") {
|
|
||||||
.Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
|
|
||||||
return(TRUE)
|
|
||||||
}
|
|
||||||
stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
|
|
||||||
return(FALSE)
|
|
||||||
}
|
|
||||||
# predict
|
|
||||||
xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
|
|
||||||
if (class(booster) != "xgb.Booster") {
|
|
||||||
stop("xgb.predict: first argument must be type xgb.Booster")
|
|
||||||
}
|
|
||||||
if (class(dmat) != "xgb.DMatrix") {
|
|
||||||
stop("xgb.predict: second argument must be type xgb.DMatrix")
|
|
||||||
}
|
|
||||||
ret <- .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin), PACKAGE="xgboost")
|
|
||||||
return(ret)
|
|
||||||
}
|
|
||||||
# dump model
|
|
||||||
xgb.dump <- function(booster, fname, fmap = "") {
|
|
||||||
if (class(booster) != "xgb.Booster") {
|
|
||||||
stop("xgb.dump: first argument must be type xgb.Booster")
|
|
||||||
}
|
|
||||||
if (typeof(fname) != "character"){
|
|
||||||
stop("xgb.dump: second argument must be type character")
|
|
||||||
}
|
|
||||||
.Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE="xgboost")
|
|
||||||
return(TRUE)
|
|
||||||
}
|
|
||||||
##--------------------------------------
|
|
||||||
# the following are low level iteratively function, not needed
|
|
||||||
# if you do not want to use them
|
|
||||||
#---------------------------------------
|
|
||||||
# iteratively update booster with dtrain
|
|
||||||
xgb.iter.update <- function(booster, dtrain, iter) {
|
|
||||||
if (class(booster) != "xgb.Booster") {
|
|
||||||
stop("xgb.iter.update: first argument must be type xgb.Booster")
|
|
||||||
}
|
|
||||||
if (class(dtrain) != "xgb.DMatrix") {
|
|
||||||
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
|
|
||||||
}
|
|
||||||
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, PACKAGE="xgboost")
|
|
||||||
return(TRUE)
|
|
||||||
}
|
|
||||||
# iteratively update booster with customized statistics
|
|
||||||
xgb.iter.boost <- function(booster, dtrain, gpair) {
|
|
||||||
if (class(booster) != "xgb.Booster") {
|
|
||||||
stop("xgb.iter.update: first argument must be type xgb.Booster")
|
|
||||||
}
|
|
||||||
if (class(dtrain) != "xgb.DMatrix") {
|
|
||||||
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
|
|
||||||
}
|
|
||||||
.Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE="xgboost")
|
|
||||||
return(TRUE)
|
|
||||||
}
|
|
||||||
# iteratively evaluate one iteration
|
|
||||||
xgb.iter.eval <- function(booster, watchlist, iter) {
|
|
||||||
if (class(booster) != "xgb.Booster") {
|
|
||||||
stop("xgb.eval: first argument must be type xgb.Booster")
|
|
||||||
}
|
|
||||||
if (typeof(watchlist) != "list") {
|
|
||||||
stop("xgb.eval: only accepts list of DMatrix as watchlist")
|
|
||||||
}
|
|
||||||
for (w in watchlist) {
|
|
||||||
if (class(w) != "xgb.DMatrix") {
|
|
||||||
stop("xgb.eval: watch list can only contain xgb.DMatrix")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
evnames <- list()
|
|
||||||
if (length(watchlist) != 0) {
|
|
||||||
for (i in 1:length(watchlist)) {
|
|
||||||
w <- watchlist[i]
|
|
||||||
if (length(names(w)) == 0) {
|
|
||||||
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
|
||||||
}
|
|
||||||
evnames <- append(evnames, names(w))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames, PACKAGE="xgboost")
|
|
||||||
return(msg)
|
|
||||||
}
|
|
||||||
|
|||||||
@ -1,4 +1,10 @@
|
|||||||
This is subfolder for experimental version of R package
|
This is subfolder for experimental version of R package.
|
||||||
|
|
||||||
Not yet ready
|
Not yet ready.
|
||||||
|
|
||||||
|
Installation:
|
||||||
|
|
||||||
|
```r
|
||||||
|
require(devtools)
|
||||||
|
install_github('xgboost','tqchen',subdir='R-package')
|
||||||
|
```
|
||||||
|
|||||||
102
R-package/inst/examples/demo-Rinterface.R
Normal file
102
R-package/inst/examples/demo-Rinterface.R
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
require(xgboost)
|
||||||
|
|
||||||
|
# helper function to read libsvm format
|
||||||
|
# this is very badly written, load in dense, and convert to sparse
|
||||||
|
# use this only for demo purpose
|
||||||
|
# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
|
||||||
|
read.libsvm = function(fname, maxcol) {
|
||||||
|
content = readLines(fname)
|
||||||
|
nline = length(content)
|
||||||
|
label = numeric(nline)
|
||||||
|
mat = matrix(0, nline, maxcol+1)
|
||||||
|
for (i in 1:nline) {
|
||||||
|
arr = as.vector(strsplit(content[i], " ")[[1]])
|
||||||
|
label[i] = as.numeric(arr[[1]])
|
||||||
|
for (j in 2:length(arr)) {
|
||||||
|
kv = strsplit(arr[j], ":")[[1]]
|
||||||
|
# to avoid 0 index
|
||||||
|
findex = as.integer(kv[1]) + 1
|
||||||
|
fvalue = as.numeric(kv[2])
|
||||||
|
mat[i,findex] = fvalue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mat = as(mat, "sparseMatrix")
|
||||||
|
return(list(label=label, data=mat))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parameter setting
|
||||||
|
dtrain <- xgb.DMatrix("agaricus.txt.train")
|
||||||
|
dtest <- xgb.DMatrix("agaricus.txt.test")
|
||||||
|
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
||||||
|
watchlist = list("eval"=dtest,"train"=dtrain)
|
||||||
|
|
||||||
|
###########################
|
||||||
|
# Train from local file
|
||||||
|
###########################
|
||||||
|
|
||||||
|
# Training
|
||||||
|
bst = xgboost(file='agaricus.txt.train',params=param,watchlist=watchlist)
|
||||||
|
# Prediction
|
||||||
|
pred = predict(bst, 'agaricus.txt.test')
|
||||||
|
# Performance
|
||||||
|
labels = xgb.getinfo(dtest, "label")
|
||||||
|
err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
|
||||||
|
print(paste("error=",err))
|
||||||
|
|
||||||
|
###########################
|
||||||
|
# Train from R object
|
||||||
|
###########################
|
||||||
|
|
||||||
|
csc = read.libsvm("agaricus.txt.train", 126)
|
||||||
|
y = csc$label
|
||||||
|
x = csc$data
|
||||||
|
# x as Sparse Matrix
|
||||||
|
class(x)
|
||||||
|
|
||||||
|
# Training
|
||||||
|
bst = xgboost(x,y,params=param,watchlist=watchlist)
|
||||||
|
# Prediction
|
||||||
|
pred = predict(bst, 'agaricus.txt.test')
|
||||||
|
# Performance
|
||||||
|
labels = xgb.getinfo(dtest, "label")
|
||||||
|
err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
|
||||||
|
print(paste("error=",err))
|
||||||
|
|
||||||
|
# Training with dense matrix
|
||||||
|
x = as.matrix(x)
|
||||||
|
bst = xgboost(x,y,params=param,watchlist=watchlist)
|
||||||
|
|
||||||
|
###########################
|
||||||
|
# Train with customization
|
||||||
|
###########################
|
||||||
|
|
||||||
|
# user define objective function, given prediction, return gradient and second order gradient
|
||||||
|
# this is loglikelihood loss
|
||||||
|
logregobj = function(preds, dtrain) {
|
||||||
|
labels = xgb.getinfo(dtrain, "label")
|
||||||
|
preds = 1.0 / (1.0 + exp(-preds))
|
||||||
|
grad = preds - labels
|
||||||
|
hess = preds * (1.0-preds)
|
||||||
|
return(list(grad=grad, hess=hess))
|
||||||
|
}
|
||||||
|
# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
|
||||||
|
# NOTE: when you do customized loss function, the default prediction value is margin
|
||||||
|
# this may make buildin evalution metric not function properly
|
||||||
|
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
||||||
|
# the buildin evaluation error assumes input is after logistic transformation
|
||||||
|
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||||
|
evalerror = function(preds, dtrain) {
|
||||||
|
labels = xgb.getinfo(dtrain, "label")
|
||||||
|
err = as.numeric(sum(labels != (preds > 0.0))) / length(labels)
|
||||||
|
return(list(metric="error", value=err))
|
||||||
|
}
|
||||||
|
|
||||||
|
bst = xgboost(x,y,params=param,watchlist=watchlist,obj=logregobj, feval=evalerror)
|
||||||
|
|
||||||
|
############################
|
||||||
|
# Train with previous result
|
||||||
|
############################
|
||||||
|
|
||||||
|
bst = xgboost(x,y,params=param,watchlist=watchlist)
|
||||||
|
pred = predict(bst, 'agaricus.txt.train', outputmargin=TRUE)
|
||||||
|
bst2 = xgboost(x,y,params=param,watchlist=watchlist,margin=pred)
|
||||||
Loading…
x
Reference in New Issue
Block a user