refinement of R package

2014-08-27 12:57:37 -07:00 · 2014-08-27 12:57:37 -07:00 · d747172d37
commit d747172d37
parent 0fe5470a4f
6 changed files with 89 additions and 60 deletions
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@ -8,3 +8,4 @@ export(xgb.train)
 export(xgb.save)
 export(xgb.load)
 export(xgb.dump)
 export(xgb.DMatrix.save)
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@ -1,5 +1,5 @@
 # constructing DMatrix
-xgb.DMatrix <- function(data, missing=0.0, ...) {
+xgb.DMatrix <- function(data, info=list(), missing=0.0, ...) {
    if (typeof(data) == "character") {
        handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost")
    } else if(is.matrix(data)) {
@ -11,7 +11,7 @@ xgb.DMatrix <- function(data, missing=0.0, ...) {
    }
    dmat <- structure(handle, class="xgb.DMatrix")
-    info = list(...)
+    info = append(info,list(...))
    if (length(info)==0)
        return(dmat)
    for (i in 1:length(info)) {
--- a/R-package/R/xgb.DMatrix.save.R
+++ b/R-package/R/xgb.DMatrix.save.R
@ -0,0 +1,12 @@
 # save model or DMatrix to file 
 xgb.DMatrix.save <- function(handle, fname) {
    if (typeof(fname) != "character") {
        stop("xgb.save: fname must be character")
    }
    if (class(handle) == "xgb.DMatrix") {
        .Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
        return(TRUE)
    }
    stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
    return(FALSE)
 }
--- a/R-package/R/xgb.save.R
+++ b/R-package/R/xgb.save.R
@ -7,10 +7,6 @@ xgb.save <- function(handle, fname) {
        .Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost")
        return(TRUE)
    }
    if (class(handle) == "xgb.DMatrix") {
        .Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
        return(TRUE)
    }
    stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
    return(FALSE)
 }
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@ -1,49 +1,41 @@
 # Main function for xgboost-package
-xgboost = function(x=NULL,y=NULL,DMatrix=NULL, file=NULL, validation=NULL, 
+xgboost = function(data=NULL, label = NULL, params=list(), nrounds=10, 
-                   nrounds=10, obj=NULL, feval=NULL, margin=NULL, verbose = T, ...)
+                   verbose = 1, ...)
 {
-    if (!is.null(DMatrix))
+    inClass = class(data)
-        dtrain = DMatrix
+    if (inClass=='dgCMatrix' || inClass=='matrix')
    {
        if (is.null(label))
            stop('xgboost: need label when data is a matrix')
        dtrain = xgb.DMatrix(data, label=y)
    }
    else
    {
-        if (is.null(x) && is.null(y))
+        if (!is.null(label))
-        {
+            warning('xgboost: label will be ignored.')
-            if (is.null(file))
+        if (inClass=='character')
-                stop('xgboost need input data, either R objects, local files or DMatrix object.')
+            dtrain = xgb.DMatrix(data)
-            dtrain = xgb.DMatrix(file)
+        else if (inClass=='xgb.DMatrix')
-        }
+            dtrain = data
        else
-            dtrain = xgb.DMatrix(x, label=y)
+            stop('xgboost: Invalid input of data')
        if (!is.null(margin))
        {
            succ <- xgb.setinfo(dtrain, "base_margin", margin)
            if (!succ)
                warning('Attemp to use margin failed.')
        }
    }
-    params = list(...)
+    if (verbose>1)
        silent = 0
    else
        silent = 1
-    watchlist=list()
+    params = append(params, list(silent=silent))
-    if (verbose)
+    params = append(params, list(...)) 
    {
        if (!is.null(validation))
        {
            if (class(validation)!='xgb.DMatrix')
                dtest = xgb.DMatrix(validation)
            else
                dtest = validation
            watchlist = list(eval=dtest,train=dtrain)
        }
-        else
+    if (verbose>0)
-            watchlist = list(train=dtrain)
+        watchlist = list(train=dtrain)
-    }
+    else
        watchlist = list()
-    bst <- xgb.train(params, dtrain, nrounds, watchlist, obj, feval)
+    bst <- xgb.train(params, dtrain, nrounds, watchlist)
    return(bst)
 }
--- a/R-package/inst/examples/demo-new.R
+++ b/R-package/inst/examples/demo-new.R
@ -51,20 +51,25 @@ dtrain = xgb.DMatrix(dense.x, label=y)
 ############################
 # Test with DMatrix object
-bst = xgboost(DMatrix=dtrain, max_depth=2, eta=1, silent=1, objective='binary:logistic')
+bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic')
 # Verbose = 0,1,2
 bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic', 
              verbose = 0)
 bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic', 
              verbose = 1)
 bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic', 
              verbose = 2)
 # Test with local file
-bst = xgboost(file='agaricus.txt.train', max_depth=2, eta=1, silent=1, objective='binary:logistic')
+bst = xgboost(data='agaricus.txt.train', max_depth=2, eta=1, objective='binary:logistic')
 # Test with Sparse Matrix
-bst = xgboost(x = x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic')
+bst = xgboost(data = x, label = y, max_depth=2, eta=1, objective='binary:logistic')
 # Test with dense Matrix
-bst = xgboost(x = dense.x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic')
+bst = xgboost(data = dense.x, label = y, max_depth=2, eta=1, objective='binary:logistic')
 # Test with validation set
 bst = xgboost(file='agaricus.txt.train', validation='agaricus.txt.test', 
              max_depth=2, eta=1, silent=1, objective='binary:logistic')
 ############################
 # Test predict
@ -102,17 +107,39 @@ pred = predict(bst, test.x)
 # save model to text file
 xgb.dump(bst, 'model.dump')
 # save a DMatrix object to hard disk
 xgb.DMatrix.save(dtrain,'dtrain.save')
 # load a DMatrix object to R
 dtrain = xgb.DMatrix('dtrain.save')
 ############################
-# Customized objective and evaluation function 
+# More flexible training function xgb.train
 ############################
 param = list(max_depth=2, eta=1, silent = 1, objective="binary:logistic")
 watchlist <- list("eval"=dtest,"train"=dtrain)
 # training xgboost model
 bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
 ############################
 # cutomsized loss function
 ############################
 param <- list(max_depth = 2, eta = 1, silent =1)
 # note: for customized objective function, we leave objective as default
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
 # user define objective function, given prediction, return gradient and second order gradient
 # this is loglikelihood loss
-logregobj = function(preds, dtrain) {
+logregobj <- function(preds, dtrain) {
-    labels = xgb.getinfo(dtrain, "label")
+    labels <- xgb.getinfo(dtrain, "label")
-    preds = 1.0 / (1.0 + exp(-preds))
+    preds <- 1.0 / (1.0 + exp(-preds))
-    grad = preds - labels
+    grad <- preds - labels
-    hess = preds * (1.0-preds)
+    hess <- preds * (1.0-preds)
    return(list(grad=grad, hess=hess))
 }
 # user defined evaluation function, return a list(metric="metric-name", value="metric-value")
@ -121,13 +148,14 @@ logregobj = function(preds, dtrain) {
 # for example, we are doing logistic loss, the prediction is score before logistic transformation
 # the buildin evaluation error assumes input is after logistic transformation
 # Take this in mind when you use the customization, and maybe you need write customized evaluation function
-evalerror = function(preds, dtrain) {
+evalerror <- function(preds, dtrain) {
-    labels = xgb.getinfo(dtrain, "label")
+    labels <- xgb.getinfo(dtrain, "label")
-    err = as.numeric(sum(labels != (preds > 0.0))) / length(labels)
+    err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
    return(list(metric="error", value=err))
 }
-bst = xgboost(x = x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic',
+# training with customized objective, we can also do step by step training
-              obj=logregobj, feval=evalerror)
+# simply look at xgboost.py"s implementation of train
 bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)