Merge branch 'master' of ssh://github.com/tqchen/xgboost

2014-08-26 10:24:04 -07:00 · 2014-08-26 10:24:04 -07:00 · 9eb32b9dd4
commit 9eb32b9dd4
parent 2e3c214173 41d290906f
7 changed files with 478 additions and 324 deletions
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@ -1,12 +1,18 @@
 importClassesFrom("Matrix", dgCMatrix, dgeMatrix)
+
+export(xgboost)
 export(xgb.DMatrix)
 export(xgb.getinfo)
 export(xgb.setinfo)
-export(xgb.Booster)
-export(xgb.train)
-export(xgb.save)
-export(xgb.predict)
-export(xgb.dump)
+
+# exportClasses(xgb.Boost)
+exportMethods(predict)
+
+# export(xgb.Booster)
+# export(xgb.train)
+# export(xgb.save)
+# export(xgb.predict)
+# export(xgb.dump)



--- a/R-package/R/predict.xgboost.R
+++ b/R-package/R/predict.xgboost.R
@ -0,0 +1,16 @@
+#' @export
+setClass("xgb.Booster")
+
+#' @export
+setMethod("predict",
+          signature = "xgb.Booster",
+          definition = function(object, newdata, outputmargin = FALSE)
+          {
+              if (class(newdata) != "xgb.DMatrix") {
+                  newdata = xgb.DMatrix(newdata)
+              }
+              ret <- .Call("XGBoosterPredict_R", object, newdata, 
+                           as.integer(outputmargin), PACKAGE="xgboost")
+              return(ret)
+          })
+
--- a/R-package/R/xgb.Utils.R
+++ b/R-package/R/xgb.Utils.R
@ -0,0 +1,222 @@
+# depends on matrix
+.onLoad <- function(libname, pkgname) {
+    library.dynam("xgboost", pkgname, libname);
+}
+.onUnload <- function(libpath) {
+    library.dynam.unload("xgboost", libpath);
+}
+
+# constructing DMatrix
+xgb.DMatrix <- function(data, info=list(), missing=0.0) {
+    if (typeof(data) == "character") {
+        handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost")
+    } else if(is.matrix(data)) {
+        handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE="xgboost")
+    } else if(class(data) == "dgCMatrix") {
+        handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE="xgboost")
+    } else {
+        stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data)))
+    }
+    dmat <- structure(handle, class="xgb.DMatrix")
+    if (length(info) != 0) {
+        for (i in 1:length(info)) {
+            p <- info[i]
+            xgb.setinfo(dmat, names(p), p[[1]])
+        }
+    }
+    return(dmat)
+}
+# get information from dmatrix
+xgb.getinfo <- function(dmat, name) {
+    if (typeof(name) != "character") {
+        stop("xgb.getinfo: name must be character")
+    }
+    if (class(dmat) != "xgb.DMatrix") {
+        stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
+    }
+    if (name != "label" &&
+            name != "weight" &&
+            name != "base_margin" ) {
+        stop(paste("xgb.getinfo: unknown info name", name))
+    }
+    ret <- .Call("XGDMatrixGetInfo_R", dmat, name, PACKAGE="xgboost")
+    return(ret)
+}
+# set information into dmatrix, this mutate dmatrix
+xgb.setinfo <- function(dmat, name, info) {
+    if (class(dmat) != "xgb.DMatrix") {
+        stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
+    }
+    if (name == "label") {
+        .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
+        return(TRUE)
+    }
+    if (name == "weight") {
+        .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
+        return(TRUE)
+    }
+    if (name == "base_margin") {
+        .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
+        return(TRUE)
+    }
+    if (name == "group") {
+        .Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info), PACKAGE="xgboost")
+        return(TRUE)
+    }
+    stop(pase("xgb.setinfo: unknown info name", name))
+    return(FALSE)
+}
+# construct a Booster from cachelist
+xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
+    if (typeof(cachelist) != "list") {
+        stop("xgb.Booster: only accepts list of DMatrix as cachelist")
+    }
+    for (dm in cachelist) {
+        if (class(dm) != "xgb.DMatrix") {
+            stop("xgb.Booster: only accepts list of DMatrix as cachelist")
+        }
+    }
+    handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE="xgboost")
+    .Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE="xgboost")
+    if (length(params) != 0) {
+        for (i in 1:length(params)) {
+            p <- params[i]
+            .Call("XGBoosterSetParam_R", handle, names(p), as.character(p), PACKAGE="xgboost")
+        }
+    }
+    if (!is.null(modelfile)) {
+        if (typeof(modelfile) != "character"){
+            stop("xgb.Booster: modelfile must be character");
+        }
+        .Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE="xgboost")
+    }
+    return(structure(handle, class="xgb.Booster"))
+}
+# train a model using given parameters
+xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) {
+    if (typeof(params) != "list") {
+        stop("xgb.train: first argument params must be list");
+    }
+    if (class(dtrain) != "xgb.DMatrix") {
+        stop("xgb.train: second argument dtrain must be xgb.DMatrix");
+    }
+    bst <- xgb.Booster(params, append(watchlist,dtrain))
+    for (i in 1:nrounds) {
+        if (is.null(obj)) {
+            succ <- xgb.iter.update(bst, dtrain, i-1)
+        } else {
+            pred <- xgb.predict(bst, dtrain)
+            gpair <- obj(pred, dtrain)
+            succ <- xgb.iter.boost(bst, dtrain, gpair)
+        }
+        if (length(watchlist) != 0) {
+            if (is.null(feval)) {      
+                msg <- xgb.iter.eval(bst, watchlist, i-1)
+                cat(msg); cat("\n")
+            } else {
+                cat("["); cat(i); cat("]");
+                for (j in 1:length(watchlist)) {
+                    w <- watchlist[j]
+                    if (length(names(w)) == 0) {
+                        stop("xgb.eval: name tag must be presented for every elements in watchlist")
+                    }
+                    ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
+                    cat("\t"); cat(names(w)); cat("-"); cat(ret$metric); 
+                    cat(":"); cat(ret$value)
+                }
+                cat("\n")        
+            }
+        }
+    }
+    return(bst)
+}
+# save model or DMatrix to file 
+xgb.save <- function(handle, fname) {
+    if (typeof(fname) != "character") {
+        stop("xgb.save: fname must be character")
+    }
+    if (class(handle) == "xgb.Booster") {
+        .Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost")
+        return(TRUE)
+    }
+    if (class(handle) == "xgb.DMatrix") {
+        .Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
+        return(TRUE)
+    }
+    stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
+    return(FALSE)
+}
+# predict 
+xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
+    if (class(booster) != "xgb.Booster") {
+        stop("xgb.predict: first argument must be type xgb.Booster")
+    }
+    if (class(dmat) != "xgb.DMatrix") {
+        stop("xgb.predict: second argument must be type xgb.DMatrix")
+    }
+    ret <- .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin), PACKAGE="xgboost")
+    return(ret)
+}
+# dump model
+xgb.dump <- function(booster, fname, fmap = "") {
+    if (class(booster) != "xgb.Booster") {
+        stop("xgb.dump: first argument must be type xgb.Booster")
+    }
+    if (typeof(fname) != "character"){
+        stop("xgb.dump: second argument must be type character")
+    }
+    .Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE="xgboost")
+    return(TRUE)
+}
+##--------------------------------------
+# the following are low level iteratively function, not needed
+# if you do not want to use them
+#---------------------------------------
+# iteratively update booster with dtrain
+xgb.iter.update <- function(booster, dtrain, iter) {
+    if (class(booster) != "xgb.Booster") {
+        stop("xgb.iter.update: first argument must be type xgb.Booster")
+    }
+    if (class(dtrain) != "xgb.DMatrix") {
+        stop("xgb.iter.update: second argument must be type xgb.DMatrix")
+    }
+    .Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, PACKAGE="xgboost")
+    return(TRUE)
+}
+# iteratively update booster with customized statistics
+xgb.iter.boost <- function(booster, dtrain, gpair) {
+    if (class(booster) != "xgb.Booster") {
+        stop("xgb.iter.update: first argument must be type xgb.Booster")
+    }
+    if (class(dtrain) != "xgb.DMatrix") {
+        stop("xgb.iter.update: second argument must be type xgb.DMatrix")
+    }
+    .Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE="xgboost")
+    return(TRUE)
+}
+# iteratively evaluate one iteration
+xgb.iter.eval <- function(booster, watchlist, iter) {
+    if (class(booster) != "xgb.Booster") {
+        stop("xgb.eval: first argument must be type xgb.Booster")
+    }
+    if (typeof(watchlist) != "list") {
+        stop("xgb.eval: only accepts list of DMatrix as watchlist")
+    }
+    for (w in watchlist) {
+        if (class(w) != "xgb.DMatrix") {
+            stop("xgb.eval: watch list can only contain xgb.DMatrix")
+        }
+    }
+    evnames <- list()
+    if (length(watchlist) != 0) {
+        for (i in 1:length(watchlist)) {
+            w <- watchlist[i]
+            if (length(names(w)) == 0) {
+                stop("xgb.eval: name tag must be presented for every elements in watchlist")
+            }
+            evnames <- append(evnames, names(w))     
+        }
+    }
+    msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames, PACKAGE="xgboost")
+    return(msg)
+}
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@ -1,222 +1,24 @@
-# depends on matrix
-.onLoad <- function(libname, pkgname) {
-  library.dynam("xgboost", pkgname, libname);
-}
-.onUnload <- function(libpath) {
-  library.dynam.unload("xgboost", libpath);
-}
+# Main function for xgboost-package

-# constructing DMatrix
-xgb.DMatrix <- function(data, info=list(), missing=0.0) {
-  if (typeof(data) == "character") {
-    handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost")
-  } else if(is.matrix(data)) {
-    handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE="xgboost")
-  } else if(class(data) == "dgCMatrix") {
-    handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE="xgboost")
-  } else {
-    stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data)))
-  }
-  dmat <- structure(handle, class="xgb.DMatrix")
-  if (length(info) != 0) {
-    for (i in 1:length(info)) {
-      p <- info[i]
-      xgb.setinfo(dmat, names(p), p[[1]])
-    }
-  }
-  return(dmat)
-}
-# get information from dmatrix
-xgb.getinfo <- function(dmat, name) {
-  if (typeof(name) != "character") {
-    stop("xgb.getinfo: name must be character")
-  }
-  if (class(dmat) != "xgb.DMatrix") {
-    stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
-  }
-  if (name != "label" &&
-      name != "weight" &&
-      name != "base_margin" ) {
-    stop(paste("xgb.getinfo: unknown info name", name))
-  }
-  ret <- .Call("XGDMatrixGetInfo_R", dmat, name, PACKAGE="xgboost")
-  return(ret)
-}
-# set information into dmatrix, this mutate dmatrix
-xgb.setinfo <- function(dmat, name, info) {
-  if (class(dmat) != "xgb.DMatrix") {
-    stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
-  }
-  if (name == "label") {
-    .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
-    return(TRUE)
-  }
-  if (name == "weight") {
-    .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
-    return(TRUE)
-  }
-  if (name == "base_margin") {
-    .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
-    return(TRUE)
-  }
-  if (name == "group") {
-    .Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info), PACKAGE="xgboost")
-    return(TRUE)
-  }
-  stop(pase("xgb.setinfo: unknown info name", name))
-  return(FALSE)
-}
-# construct a Booster from cachelist
-xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
-  if (typeof(cachelist) != "list") {
-    stop("xgb.Booster: only accepts list of DMatrix as cachelist")
-  }
-  for (dm in cachelist) {
-    if (class(dm) != "xgb.DMatrix") {
-      stop("xgb.Booster: only accepts list of DMatrix as cachelist")
-    }
-  }
-  handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE="xgboost")
-  .Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE="xgboost")
-  if (length(params) != 0) {
-    for (i in 1:length(params)) {
-      p <- params[i]
-      .Call("XGBoosterSetParam_R", handle, names(p), as.character(p), PACKAGE="xgboost")
-    }
-  }
-  if (!is.null(modelfile)) {
-    if (typeof(modelfile) != "character"){
-      stop("xgb.Booster: modelfile must be character");
-    }
-    .Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE="xgboost")
-  }
-  return(structure(handle, class="xgb.Booster"))
-}
-# train a model using given parameters
-xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) {
-  if (typeof(params) != "list") {
-    stop("xgb.train: first argument params must be list");
-  }
-  if (class(dtrain) != "xgb.DMatrix") {
-    stop("xgb.train: second argument dtrain must be xgb.DMatrix");
-  }
-  bst <- xgb.Booster(params, append(watchlist,dtrain))
-  for (i in 1:nrounds) {
-    if (is.null(obj)) {
-      succ <- xgb.iter.update(bst, dtrain, i-1)
-    } else {
-      pred <- xgb.predict(bst, dtrain)
-      gpair <- obj(pred, dtrain)
-      succ <- xgb.iter.boost(bst, dtrain, gpair)
-    }
-    if (length(watchlist) != 0) {
-      if (is.null(feval)) {      
-        msg <- xgb.iter.eval(bst, watchlist, i-1)
-        cat(msg); cat("\n")
-      } else {
-        cat("["); cat(i); cat("]");
-        for (j in 1:length(watchlist)) {
-          w <- watchlist[j]
-          if (length(names(w)) == 0) {
-            stop("xgb.eval: name tag must be presented for every elements in watchlist")
-          }
-          ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
-          cat("\t"); cat(names(w)); cat("-"); cat(ret$metric); 
-          cat(":"); cat(ret$value)
-        }
-        cat("\n")        
-      }
+xgboost = function(x=NULL,y=NULL,file=NULL,nrounds=10,params,watchlist=list(), 
+                   obj=NULL, feval=NULL, margin=NULL)
+{
+    if (is.null(x) && is.null(y))
+    {
+        if (is.null(file))
+            stop('xgboost need input data, either R objects or local files.')
+        dtrain = xgb.DMatrix(file)
    }
+    else
+        dtrain = xgb.DMatrix(x, info=list(label=y))
+    if (!is.null(margin))
+    {
+        succ <- xgb.setinfo(dtrain, "base_margin", margin)
+        if (!succ)
+            warning('Attemp to use margin failed.')
    }
+    bst <- xgb.train(params, dtrain, nrounds, watchlist, obj, feval)
    return(bst)
 }
-# save model or DMatrix to file 
-xgb.save <- function(handle, fname) {
-  if (typeof(fname) != "character") {
-    stop("xgb.save: fname must be character")
-  }
-  if (class(handle) == "xgb.Booster") {
-    .Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost")
-    return(TRUE)
-  }
-  if (class(handle) == "xgb.DMatrix") {
-    .Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
-    return(TRUE)
-  }
-  stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
-  return(FALSE)
-}
-# predict 
-xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
-  if (class(booster) != "xgb.Booster") {
-    stop("xgb.predict: first argument must be type xgb.Booster")
-  }
-  if (class(dmat) != "xgb.DMatrix") {
-    stop("xgb.predict: second argument must be type xgb.DMatrix")
-  }
-  ret <- .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin), PACKAGE="xgboost")
-  return(ret)
-}
-# dump model
-xgb.dump <- function(booster, fname, fmap = "") {
-  if (class(booster) != "xgb.Booster") {
-    stop("xgb.dump: first argument must be type xgb.Booster")
-  }
-  if (typeof(fname) != "character"){
-    stop("xgb.dump: second argument must be type character")
-  }
-  .Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE="xgboost")
-  return(TRUE)
-}
-##--------------------------------------
-# the following are low level iteratively function, not needed
-# if you do not want to use them
-#---------------------------------------
-# iteratively update booster with dtrain
-xgb.iter.update <- function(booster, dtrain, iter) {
-  if (class(booster) != "xgb.Booster") {
-    stop("xgb.iter.update: first argument must be type xgb.Booster")
-  }
-  if (class(dtrain) != "xgb.DMatrix") {
-    stop("xgb.iter.update: second argument must be type xgb.DMatrix")
-  }
-  .Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, PACKAGE="xgboost")
-  return(TRUE)
-}
-# iteratively update booster with customized statistics
-xgb.iter.boost <- function(booster, dtrain, gpair) {
-  if (class(booster) != "xgb.Booster") {
-    stop("xgb.iter.update: first argument must be type xgb.Booster")
-  }
-  if (class(dtrain) != "xgb.DMatrix") {
-    stop("xgb.iter.update: second argument must be type xgb.DMatrix")
-  }
-  .Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE="xgboost")
-  return(TRUE)
-}
-# iteratively evaluate one iteration
-xgb.iter.eval <- function(booster, watchlist, iter) {
-  if (class(booster) != "xgb.Booster") {
-    stop("xgb.eval: first argument must be type xgb.Booster")
-  }
-  if (typeof(watchlist) != "list") {
-    stop("xgb.eval: only accepts list of DMatrix as watchlist")
-  }
-  for (w in watchlist) {
-    if (class(w) != "xgb.DMatrix") {
-      stop("xgb.eval: watch list can only contain xgb.DMatrix")
-    }
-  }
-  evnames <- list()
-  if (length(watchlist) != 0) {
-    for (i in 1:length(watchlist)) {
-      w <- watchlist[i]
-      if (length(names(w)) == 0) {
-        stop("xgb.eval: name tag must be presented for every elements in watchlist")
-      }
-      evnames <- append(evnames, names(w))     
-    }
-  }
-  msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames, PACKAGE="xgboost")
-  return(msg)
-}
+
+
--- a/R-package/README.md
+++ b/R-package/README.md
@ -1,4 +1,10 @@
-This is subfolder for experimental version of R package
+This is subfolder for experimental version of R package.

-Not yet ready
+Not yet ready.

+Installation:
+
+```r
+require(devtools)
+install_github('xgboost','tqchen',subdir='R-package')
+```
--- a/R-package/inst/examples/demo-old.R
+++ b/R-package/inst/examples/demo-old.R
@ -0,0 +1,127 @@
+# load xgboost library
+require(xgboost)
+require(methods)
+
+# helper function to read libsvm format
+# this is very badly written, load in dense, and convert to sparse
+# use this only for demo purpose
+# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
+read.libsvm <- function(fname, maxcol) {
+  content <- readLines(fname)
+  nline <- length(content)
+  label <- numeric(nline)
+  mat <- matrix(0, nline, maxcol+1)
+  for (i in 1:nline) {
+    arr <- as.vector(strsplit(content[i], " ")[[1]])
+    label[i] <- as.numeric(arr[[1]])
+    for (j in 2:length(arr)) {
+      kv <- strsplit(arr[j], ":")[[1]]
+      # to avoid 0 index
+      findex <- as.integer(kv[1]) + 1
+      fvalue <- as.numeric(kv[2])
+      mat[i,findex] <- fvalue
+    }
+  }
+  mat <- as(mat, "sparseMatrix")
+  return(list(label=label, data=mat))
+}
+
+# test code here
+dtrain <- xgb.DMatrix("agaricus.txt.train")
+dtest <- xgb.DMatrix("agaricus.txt.test")
+param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
+watchlist <- list("eval"=dtest,"train"=dtrain)
+# training xgboost model
+bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
+# make prediction
+preds <- xgb.predict(bst, dtest)
+labels <- xgb.getinfo(dtest, "label")
+err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
+# print error rate
+print(paste("error=",err))
+
+# dump model
+xgb.dump(bst, "dump.raw.txt")
+# dump model with feature map
+xgb.dump(bst, "dump.nice.txt", "featmap.txt")
+
+# save dmatrix into binary buffer
+succ <- xgb.save(dtest, "dtest.buffer")
+# save model into file
+succ <- xgb.save(bst, "xgb.model")
+# load model and data in 
+bst2 <- xgb.Booster(modelfile="xgb.model")
+dtest2 <- xgb.DMatrix("dtest.buffer")
+preds2 <- xgb.predict(bst2, dtest2)
+# assert they are the same
+stopifnot(sum(abs(preds2-preds)) == 0)
+
+###
+# build dmatrix from sparseMatrix
+###
+print ('start running example of build DMatrix from R.sparseMatrix')
+csc <- read.libsvm("agaricus.txt.train", 126)
+label <- csc$label
+data <- csc$data
+dtrain <- xgb.DMatrix(data, info=list(label=label) )
+watchlist <- list("eval"=dtest,"train"=dtrain)
+bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
+
+###
+# build dmatrix from dense matrix
+###
+print ('start running example of build DMatrix from R.Matrix')
+mat = as.matrix(data)
+dtrain <- xgb.DMatrix(mat, info=list(label=label) )
+watchlist <- list("eval"=dtest,"train"=dtrain)
+bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
+
+###
+# advanced: cutomsized loss function
+# 
+print("start running example to used cutomized objective function")
+# note: for customized objective function, we leave objective as default
+# note: what we are getting is margin value in prediction
+# you must know what you are doing
+param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1)
+# user define objective function, given prediction, return gradient and second order gradient
+# this is loglikelihood loss
+logregobj <- function(preds, dtrain) {
+  labels <- xgb.getinfo(dtrain, "label")
+  preds <- 1.0 / (1.0 + exp(-preds))
+  grad <- preds - labels
+  hess <- preds * (1.0-preds)
+  return(list(grad=grad, hess=hess))
+}
+# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
+# NOTE: when you do customized loss function, the default prediction value is margin
+# this may make buildin evalution metric not function properly
+# for example, we are doing logistic loss, the prediction is score before logistic transformation
+# the buildin evaluation error assumes input is after logistic transformation
+# Take this in mind when you use the customization, and maybe you need write customized evaluation function
+evalerror <- function(preds, dtrain) {
+  labels <- xgb.getinfo(dtrain, "label")
+  err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
+  return(list(metric="error", value=err))
+}
+
+# training with customized objective, we can also do step by step training
+# simply look at xgboost.py"s implementation of train
+bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
+
+###
+# advanced: start from a initial base prediction
+#
+print ("start running example to start from a initial prediction")
+# specify parameters via map, definition are same as c++ version
+param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
+# train xgboost for 1 round
+bst <- xgb.train( param, dtrain, 1, watchlist )
+# Note: we need the margin value instead of transformed prediction in set_base_margin
+# do predict with output_margin=True, will always give you margin values before logistic transformation
+ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE)
+ptest <- xgb.predict(bst, dtest, outputmargin=TRUE)
+succ <- xgb.setinfo(dtrain, "base_margin", ptrain)
+succ <- xgb.setinfo(dtest, "base_margin", ptest)
+print ("this is result of running from initial prediction")
+bst <- xgb.train( param, dtrain, 1, watchlist )
--- a/R-package/inst/examples/demo.R
+++ b/R-package/inst/examples/demo.R
@ -1,96 +1,82 @@
-# load xgboost library
 require(xgboost)
-require(methods)

 # helper function to read libsvm format
 # this is very badly written, load in dense, and convert to sparse
 # use this only for demo purpose
 # adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
-read.libsvm <- function(fname, maxcol) {
-  content <- readLines(fname)
-  nline <- length(content)
-  label <- numeric(nline)
-  mat <- matrix(0, nline, maxcol+1)
+read.libsvm = function(fname, maxcol) {
+    content = readLines(fname)
+    nline = length(content)
+    label = numeric(nline)
+    mat = matrix(0, nline, maxcol+1)
    for (i in 1:nline) {
-    arr <- as.vector(strsplit(content[i], " ")[[1]])
-    label[i] <- as.numeric(arr[[1]])
+        arr = as.vector(strsplit(content[i], " ")[[1]])
+        label[i] = as.numeric(arr[[1]])
        for (j in 2:length(arr)) {
-      kv <- strsplit(arr[j], ":")[[1]]
+            kv = strsplit(arr[j], ":")[[1]]
            # to avoid 0 index
-      findex <- as.integer(kv[1]) + 1
-      fvalue <- as.numeric(kv[2])
-      mat[i,findex] <- fvalue
+            findex = as.integer(kv[1]) + 1
+            fvalue = as.numeric(kv[2])
+            mat[i,findex] = fvalue
        }
    }
-  mat <- as(mat, "sparseMatrix")
+    mat = as(mat, "sparseMatrix")
    return(list(label=label, data=mat))
 }

-# test code here
+# Parameter setting
 dtrain <- xgb.DMatrix("agaricus.txt.train")
 dtest <- xgb.DMatrix("agaricus.txt.test")
 param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
-watchlist <- list("eval"=dtest,"train"=dtrain)
-# training xgboost model
-bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
-# make prediction
-preds <- xgb.predict(bst, dtest)
-labels <- xgb.getinfo(dtest, "label")
-err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
-# print error rate
+watchlist = list("eval"=dtest,"train"=dtrain)
+
+###########################
+# Train from local file
+###########################
+
+# Training
+bst = xgboost(file='agaricus.txt.train',params=param,watchlist=watchlist)
+# Prediction
+pred = predict(bst, 'agaricus.txt.test')
+# Performance
+labels = xgb.getinfo(dtest, "label")
+err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
 print(paste("error=",err))

-# dump model
-xgb.dump(bst, "dump.raw.txt")
-# dump model with feature map
-xgb.dump(bst, "dump.nice.txt", "featmap.txt")
+###########################
+# Train from R object
+###########################

-# save dmatrix into binary buffer
-succ <- xgb.save(dtest, "dtest.buffer")
-# save model into file
-succ <- xgb.save(bst, "xgb.model")
-# load model and data in 
-bst2 <- xgb.Booster(modelfile="xgb.model")
-dtest2 <- xgb.DMatrix("dtest.buffer")
-preds2 <- xgb.predict(bst2, dtest2)
-# assert they are the same
-stopifnot(sum(abs(preds2-preds)) == 0)
+csc = read.libsvm("agaricus.txt.train", 126)
+y = csc$label
+x = csc$data
+# x as Sparse Matrix
+class(x)

-###
-# build dmatrix from sparseMatrix
-###
-print ('start running example of build DMatrix from R.sparseMatrix')
-csc <- read.libsvm("agaricus.txt.train", 126)
-label <- csc$label
-data <- csc$data
-dtrain <- xgb.DMatrix(data, info=list(label=label) )
-watchlist <- list("eval"=dtest,"train"=dtrain)
-bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
+# Training
+bst = xgboost(x,y,params=param,watchlist=watchlist)
+# Prediction
+pred = predict(bst, 'agaricus.txt.test')
+# Performance
+labels = xgb.getinfo(dtest, "label")
+err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
+print(paste("error=",err))

-###
-# build dmatrix from dense matrix
-###
-print ('start running example of build DMatrix from R.Matrix')
-mat = as.matrix(data)
-dtrain <- xgb.DMatrix(mat, info=list(label=label) )
-watchlist <- list("eval"=dtest,"train"=dtrain)
-bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
+# Training with dense matrix
+x = as.matrix(x)
+bst = xgboost(x,y,params=param,watchlist=watchlist)
+
+###########################
+# Train with customization
+###########################

-###
-# advanced: cutomsized loss function
-# 
-print("start running example to used cutomized objective function")
-# note: for customized objective function, we leave objective as default
-# note: what we are getting is margin value in prediction
-# you must know what you are doing
-param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1)
 # user define objective function, given prediction, return gradient and second order gradient
 # this is loglikelihood loss
-logregobj <- function(preds, dtrain) {
-  labels <- xgb.getinfo(dtrain, "label")
-  preds <- 1.0 / (1.0 + exp(-preds))
-  grad <- preds - labels
-  hess <- preds * (1.0-preds)
+logregobj = function(preds, dtrain) {
+    labels = xgb.getinfo(dtrain, "label")
+    preds = 1.0 / (1.0 + exp(-preds))
+    grad = preds - labels
+    hess = preds * (1.0-preds)
    return(list(grad=grad, hess=hess))
 }
 # user defined evaluation function, return a list(metric="metric-name", value="metric-value")
@ -99,29 +85,18 @@ logregobj <- function(preds, dtrain) {
 # for example, we are doing logistic loss, the prediction is score before logistic transformation
 # the buildin evaluation error assumes input is after logistic transformation
 # Take this in mind when you use the customization, and maybe you need write customized evaluation function
-evalerror <- function(preds, dtrain) {
-  labels <- xgb.getinfo(dtrain, "label")
-  err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
+evalerror = function(preds, dtrain) {
+    labels = xgb.getinfo(dtrain, "label")
+    err = as.numeric(sum(labels != (preds > 0.0))) / length(labels)
    return(list(metric="error", value=err))
 }

-# training with customized objective, we can also do step by step training
-# simply look at xgboost.py"s implementation of train
-bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
+bst = xgboost(x,y,params=param,watchlist=watchlist,obj=logregobj, feval=evalerror)

-###
-# advanced: start from a initial base prediction
-#
-print ("start running example to start from a initial prediction")
-# specify parameters via map, definition are same as c++ version
-param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
-# train xgboost for 1 round
-bst <- xgb.train( param, dtrain, 1, watchlist )
-# Note: we need the margin value instead of transformed prediction in set_base_margin
-# do predict with output_margin=True, will always give you margin values before logistic transformation
-ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE)
-ptest <- xgb.predict(bst, dtest, outputmargin=TRUE)
-succ <- xgb.setinfo(dtrain, "base_margin", ptrain)
-succ <- xgb.setinfo(dtest, "base_margin", ptest)
-print ("this is result of running from initial prediction")
-bst <- xgb.train( param, dtrain, 1, watchlist )
+############################
+# Train with previous result
+############################
+
+bst = xgboost(x,y,params=param,watchlist=watchlist)
+pred = predict(bst, 'agaricus.txt.train', outputmargin=TRUE)
+bst2 = xgboost(x,y,params=param,watchlist=watchlist,margin=pred)