refinement of R package
This commit is contained in:
parent
0fe5470a4f
commit
d747172d37
@ -8,3 +8,4 @@ export(xgb.train)
|
|||||||
export(xgb.save)
|
export(xgb.save)
|
||||||
export(xgb.load)
|
export(xgb.load)
|
||||||
export(xgb.dump)
|
export(xgb.dump)
|
||||||
|
export(xgb.DMatrix.save)
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
# constructing DMatrix
|
# constructing DMatrix
|
||||||
xgb.DMatrix <- function(data, missing=0.0, ...) {
|
xgb.DMatrix <- function(data, info=list(), missing=0.0, ...) {
|
||||||
if (typeof(data) == "character") {
|
if (typeof(data) == "character") {
|
||||||
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost")
|
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost")
|
||||||
} else if(is.matrix(data)) {
|
} else if(is.matrix(data)) {
|
||||||
@ -11,7 +11,7 @@ xgb.DMatrix <- function(data, missing=0.0, ...) {
|
|||||||
}
|
}
|
||||||
dmat <- structure(handle, class="xgb.DMatrix")
|
dmat <- structure(handle, class="xgb.DMatrix")
|
||||||
|
|
||||||
info = list(...)
|
info = append(info,list(...))
|
||||||
if (length(info)==0)
|
if (length(info)==0)
|
||||||
return(dmat)
|
return(dmat)
|
||||||
for (i in 1:length(info)) {
|
for (i in 1:length(info)) {
|
||||||
|
|||||||
12
R-package/R/xgb.DMatrix.save.R
Normal file
12
R-package/R/xgb.DMatrix.save.R
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
# save model or DMatrix to file
|
||||||
|
xgb.DMatrix.save <- function(handle, fname) {
|
||||||
|
if (typeof(fname) != "character") {
|
||||||
|
stop("xgb.save: fname must be character")
|
||||||
|
}
|
||||||
|
if (class(handle) == "xgb.DMatrix") {
|
||||||
|
.Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
|
||||||
|
return(FALSE)
|
||||||
|
}
|
||||||
@ -7,10 +7,6 @@ xgb.save <- function(handle, fname) {
|
|||||||
.Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost")
|
.Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost")
|
||||||
return(TRUE)
|
return(TRUE)
|
||||||
}
|
}
|
||||||
if (class(handle) == "xgb.DMatrix") {
|
|
||||||
.Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
|
|
||||||
return(TRUE)
|
|
||||||
}
|
|
||||||
stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
|
stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
|
||||||
return(FALSE)
|
return(FALSE)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,49 +1,41 @@
|
|||||||
# Main function for xgboost-package
|
# Main function for xgboost-package
|
||||||
|
|
||||||
xgboost = function(x=NULL,y=NULL,DMatrix=NULL, file=NULL, validation=NULL,
|
xgboost = function(data=NULL, label = NULL, params=list(), nrounds=10,
|
||||||
nrounds=10, obj=NULL, feval=NULL, margin=NULL, verbose = T, ...)
|
verbose = 1, ...)
|
||||||
{
|
{
|
||||||
if (!is.null(DMatrix))
|
inClass = class(data)
|
||||||
dtrain = DMatrix
|
if (inClass=='dgCMatrix' || inClass=='matrix')
|
||||||
|
{
|
||||||
|
if (is.null(label))
|
||||||
|
stop('xgboost: need label when data is a matrix')
|
||||||
|
dtrain = xgb.DMatrix(data, label=y)
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (is.null(x) && is.null(y))
|
if (!is.null(label))
|
||||||
{
|
warning('xgboost: label will be ignored.')
|
||||||
if (is.null(file))
|
if (inClass=='character')
|
||||||
stop('xgboost need input data, either R objects, local files or DMatrix object.')
|
dtrain = xgb.DMatrix(data)
|
||||||
dtrain = xgb.DMatrix(file)
|
else if (inClass=='xgb.DMatrix')
|
||||||
}
|
dtrain = data
|
||||||
else
|
else
|
||||||
dtrain = xgb.DMatrix(x, label=y)
|
stop('xgboost: Invalid input of data')
|
||||||
if (!is.null(margin))
|
|
||||||
{
|
|
||||||
succ <- xgb.setinfo(dtrain, "base_margin", margin)
|
|
||||||
if (!succ)
|
|
||||||
warning('Attemp to use margin failed.')
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
params = list(...)
|
if (verbose>1)
|
||||||
|
silent = 0
|
||||||
|
else
|
||||||
|
silent = 1
|
||||||
|
|
||||||
watchlist=list()
|
params = append(params, list(silent=silent))
|
||||||
if (verbose)
|
params = append(params, list(...))
|
||||||
{
|
|
||||||
if (!is.null(validation))
|
|
||||||
{
|
|
||||||
if (class(validation)!='xgb.DMatrix')
|
|
||||||
dtest = xgb.DMatrix(validation)
|
|
||||||
else
|
|
||||||
dtest = validation
|
|
||||||
watchlist = list(eval=dtest,train=dtrain)
|
|
||||||
}
|
|
||||||
|
|
||||||
else
|
if (verbose>0)
|
||||||
watchlist = list(train=dtrain)
|
watchlist = list(train=dtrain)
|
||||||
}
|
else
|
||||||
|
watchlist = list()
|
||||||
|
|
||||||
bst <- xgb.train(params, dtrain, nrounds, watchlist, obj, feval)
|
bst <- xgb.train(params, dtrain, nrounds, watchlist)
|
||||||
|
|
||||||
return(bst)
|
return(bst)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -51,20 +51,25 @@ dtrain = xgb.DMatrix(dense.x, label=y)
|
|||||||
############################
|
############################
|
||||||
|
|
||||||
# Test with DMatrix object
|
# Test with DMatrix object
|
||||||
bst = xgboost(DMatrix=dtrain, max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic')
|
||||||
|
|
||||||
|
# Verbose = 0,1,2
|
||||||
|
bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic',
|
||||||
|
verbose = 0)
|
||||||
|
bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic',
|
||||||
|
verbose = 1)
|
||||||
|
bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic',
|
||||||
|
verbose = 2)
|
||||||
|
|
||||||
# Test with local file
|
# Test with local file
|
||||||
bst = xgboost(file='agaricus.txt.train', max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
bst = xgboost(data='agaricus.txt.train', max_depth=2, eta=1, objective='binary:logistic')
|
||||||
|
|
||||||
# Test with Sparse Matrix
|
# Test with Sparse Matrix
|
||||||
bst = xgboost(x = x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
bst = xgboost(data = x, label = y, max_depth=2, eta=1, objective='binary:logistic')
|
||||||
|
|
||||||
# Test with dense Matrix
|
# Test with dense Matrix
|
||||||
bst = xgboost(x = dense.x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
bst = xgboost(data = dense.x, label = y, max_depth=2, eta=1, objective='binary:logistic')
|
||||||
|
|
||||||
# Test with validation set
|
|
||||||
bst = xgboost(file='agaricus.txt.train', validation='agaricus.txt.test',
|
|
||||||
max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
|
||||||
|
|
||||||
############################
|
############################
|
||||||
# Test predict
|
# Test predict
|
||||||
@ -102,17 +107,39 @@ pred = predict(bst, test.x)
|
|||||||
# save model to text file
|
# save model to text file
|
||||||
xgb.dump(bst, 'model.dump')
|
xgb.dump(bst, 'model.dump')
|
||||||
|
|
||||||
|
# save a DMatrix object to hard disk
|
||||||
|
xgb.DMatrix.save(dtrain,'dtrain.save')
|
||||||
|
|
||||||
|
# load a DMatrix object to R
|
||||||
|
dtrain = xgb.DMatrix('dtrain.save')
|
||||||
|
|
||||||
############################
|
############################
|
||||||
# Customized objective and evaluation function
|
# More flexible training function xgb.train
|
||||||
############################
|
############################
|
||||||
|
|
||||||
|
param = list(max_depth=2, eta=1, silent = 1, objective="binary:logistic")
|
||||||
|
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||||
|
|
||||||
|
# training xgboost model
|
||||||
|
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||||
|
|
||||||
|
############################
|
||||||
|
# cutomsized loss function
|
||||||
|
############################
|
||||||
|
|
||||||
|
param <- list(max_depth = 2, eta = 1, silent =1)
|
||||||
|
|
||||||
|
# note: for customized objective function, we leave objective as default
|
||||||
|
# note: what we are getting is margin value in prediction
|
||||||
|
# you must know what you are doing
|
||||||
|
|
||||||
# user define objective function, given prediction, return gradient and second order gradient
|
# user define objective function, given prediction, return gradient and second order gradient
|
||||||
# this is loglikelihood loss
|
# this is loglikelihood loss
|
||||||
logregobj = function(preds, dtrain) {
|
logregobj <- function(preds, dtrain) {
|
||||||
labels = xgb.getinfo(dtrain, "label")
|
labels <- xgb.getinfo(dtrain, "label")
|
||||||
preds = 1.0 / (1.0 + exp(-preds))
|
preds <- 1.0 / (1.0 + exp(-preds))
|
||||||
grad = preds - labels
|
grad <- preds - labels
|
||||||
hess = preds * (1.0-preds)
|
hess <- preds * (1.0-preds)
|
||||||
return(list(grad=grad, hess=hess))
|
return(list(grad=grad, hess=hess))
|
||||||
}
|
}
|
||||||
# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
|
# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
|
||||||
@ -121,13 +148,14 @@ logregobj = function(preds, dtrain) {
|
|||||||
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
||||||
# the buildin evaluation error assumes input is after logistic transformation
|
# the buildin evaluation error assumes input is after logistic transformation
|
||||||
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||||
evalerror = function(preds, dtrain) {
|
evalerror <- function(preds, dtrain) {
|
||||||
labels = xgb.getinfo(dtrain, "label")
|
labels <- xgb.getinfo(dtrain, "label")
|
||||||
err = as.numeric(sum(labels != (preds > 0.0))) / length(labels)
|
err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
|
||||||
return(list(metric="error", value=err))
|
return(list(metric="error", value=err))
|
||||||
}
|
}
|
||||||
|
|
||||||
bst = xgboost(x = x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic',
|
# training with customized objective, we can also do step by step training
|
||||||
obj=logregobj, feval=evalerror)
|
# simply look at xgboost.py"s implementation of train
|
||||||
|
bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user