Merge branch 'master' of ssh://github.com/tqchen/xgboost

2014-08-26 10:24:04 -07:00 · 2014-08-26 10:24:04 -07:00 · 9eb32b9dd4
commit 9eb32b9dd4
parent 2e3c214173 41d290906f
7 changed files with 478 additions and 324 deletions
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@ -1,12 +1,18 @@
 importClassesFrom("Matrix", dgCMatrix, dgeMatrix)
 export(xgboost)
 export(xgb.DMatrix)
 export(xgb.getinfo)
 export(xgb.setinfo)
-export(xgb.Booster)
+
-export(xgb.train)
+# exportClasses(xgb.Boost)
-export(xgb.save)
+exportMethods(predict)
-export(xgb.predict)
+
-export(xgb.dump)
+# export(xgb.Booster)
 # export(xgb.train)
 # export(xgb.save)
 # export(xgb.predict)
 # export(xgb.dump)
--- a/R-package/R/predict.xgboost.R
+++ b/R-package/R/predict.xgboost.R
@ -0,0 +1,16 @@
 #' @export
 setClass("xgb.Booster")
 #' @export
 setMethod("predict",
          signature = "xgb.Booster",
          definition = function(object, newdata, outputmargin = FALSE)
          {
              if (class(newdata) != "xgb.DMatrix") {
                  newdata = xgb.DMatrix(newdata)
              }
              ret <- .Call("XGBoosterPredict_R", object, newdata, 
                           as.integer(outputmargin), PACKAGE="xgboost")
              return(ret)
          })
--- a/R-package/R/xgb.Utils.R
+++ b/R-package/R/xgb.Utils.R
@ -0,0 +1,222 @@
 # depends on matrix
 .onLoad <- function(libname, pkgname) {
    library.dynam("xgboost", pkgname, libname);
 }
 .onUnload <- function(libpath) {
    library.dynam.unload("xgboost", libpath);
 }
 # constructing DMatrix
 xgb.DMatrix <- function(data, info=list(), missing=0.0) {
    if (typeof(data) == "character") {
        handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost")
    } else if(is.matrix(data)) {
        handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE="xgboost")
    } else if(class(data) == "dgCMatrix") {
        handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE="xgboost")
    } else {
        stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data)))
    }
    dmat <- structure(handle, class="xgb.DMatrix")
    if (length(info) != 0) {
        for (i in 1:length(info)) {
            p <- info[i]
            xgb.setinfo(dmat, names(p), p[[1]])
        }
    }
    return(dmat)
 }
 # get information from dmatrix
 xgb.getinfo <- function(dmat, name) {
    if (typeof(name) != "character") {
        stop("xgb.getinfo: name must be character")
    }
    if (class(dmat) != "xgb.DMatrix") {
        stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
    }
    if (name != "label" &&
            name != "weight" &&
            name != "base_margin" ) {
        stop(paste("xgb.getinfo: unknown info name", name))
    }
    ret <- .Call("XGDMatrixGetInfo_R", dmat, name, PACKAGE="xgboost")
    return(ret)
 }
 # set information into dmatrix, this mutate dmatrix
 xgb.setinfo <- function(dmat, name, info) {
    if (class(dmat) != "xgb.DMatrix") {
        stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
    }
    if (name == "label") {
        .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
        return(TRUE)
    }
    if (name == "weight") {
        .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
        return(TRUE)
    }
    if (name == "base_margin") {
        .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
        return(TRUE)
    }
    if (name == "group") {
        .Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info), PACKAGE="xgboost")
        return(TRUE)
    }
    stop(pase("xgb.setinfo: unknown info name", name))
    return(FALSE)
 }
 # construct a Booster from cachelist
 xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
    if (typeof(cachelist) != "list") {
        stop("xgb.Booster: only accepts list of DMatrix as cachelist")
    }
    for (dm in cachelist) {
        if (class(dm) != "xgb.DMatrix") {
            stop("xgb.Booster: only accepts list of DMatrix as cachelist")
        }
    }
    handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE="xgboost")
    .Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE="xgboost")
    if (length(params) != 0) {
        for (i in 1:length(params)) {
            p <- params[i]
            .Call("XGBoosterSetParam_R", handle, names(p), as.character(p), PACKAGE="xgboost")
        }
    }
    if (!is.null(modelfile)) {
        if (typeof(modelfile) != "character"){
            stop("xgb.Booster: modelfile must be character");
        }
        .Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE="xgboost")
    }
    return(structure(handle, class="xgb.Booster"))
 }
 # train a model using given parameters
 xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) {
    if (typeof(params) != "list") {
        stop("xgb.train: first argument params must be list");
    }
    if (class(dtrain) != "xgb.DMatrix") {
        stop("xgb.train: second argument dtrain must be xgb.DMatrix");
    }
    bst <- xgb.Booster(params, append(watchlist,dtrain))
    for (i in 1:nrounds) {
        if (is.null(obj)) {
            succ <- xgb.iter.update(bst, dtrain, i-1)
        } else {
            pred <- xgb.predict(bst, dtrain)
            gpair <- obj(pred, dtrain)
            succ <- xgb.iter.boost(bst, dtrain, gpair)
        }
        if (length(watchlist) != 0) {
            if (is.null(feval)) {      
                msg <- xgb.iter.eval(bst, watchlist, i-1)
                cat(msg); cat("\n")
            } else {
                cat("["); cat(i); cat("]");
                for (j in 1:length(watchlist)) {
                    w <- watchlist[j]
                    if (length(names(w)) == 0) {
                        stop("xgb.eval: name tag must be presented for every elements in watchlist")
                    }
                    ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
                    cat("\t"); cat(names(w)); cat("-"); cat(ret$metric); 
                    cat(":"); cat(ret$value)
                }
                cat("\n")        
            }
        }
    }
    return(bst)
 }
 # save model or DMatrix to file 
 xgb.save <- function(handle, fname) {
    if (typeof(fname) != "character") {
        stop("xgb.save: fname must be character")
    }
    if (class(handle) == "xgb.Booster") {
        .Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost")
        return(TRUE)
    }
    if (class(handle) == "xgb.DMatrix") {
        .Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
        return(TRUE)
    }
    stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
    return(FALSE)
 }
 # predict 
 xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
    if (class(booster) != "xgb.Booster") {
        stop("xgb.predict: first argument must be type xgb.Booster")
    }
    if (class(dmat) != "xgb.DMatrix") {
        stop("xgb.predict: second argument must be type xgb.DMatrix")
    }
    ret <- .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin), PACKAGE="xgboost")
    return(ret)
 }
 # dump model
 xgb.dump <- function(booster, fname, fmap = "") {
    if (class(booster) != "xgb.Booster") {
        stop("xgb.dump: first argument must be type xgb.Booster")
    }
    if (typeof(fname) != "character"){
        stop("xgb.dump: second argument must be type character")
    }
    .Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE="xgboost")
    return(TRUE)
 }
 ##--------------------------------------
 # the following are low level iteratively function, not needed
 # if you do not want to use them
 #---------------------------------------
 # iteratively update booster with dtrain
 xgb.iter.update <- function(booster, dtrain, iter) {
    if (class(booster) != "xgb.Booster") {
        stop("xgb.iter.update: first argument must be type xgb.Booster")
    }
    if (class(dtrain) != "xgb.DMatrix") {
        stop("xgb.iter.update: second argument must be type xgb.DMatrix")
    }
    .Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, PACKAGE="xgboost")
    return(TRUE)
 }
 # iteratively update booster with customized statistics
 xgb.iter.boost <- function(booster, dtrain, gpair) {
    if (class(booster) != "xgb.Booster") {
        stop("xgb.iter.update: first argument must be type xgb.Booster")
    }
    if (class(dtrain) != "xgb.DMatrix") {
        stop("xgb.iter.update: second argument must be type xgb.DMatrix")
    }
    .Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE="xgboost")
    return(TRUE)
 }
 # iteratively evaluate one iteration
 xgb.iter.eval <- function(booster, watchlist, iter) {
    if (class(booster) != "xgb.Booster") {
        stop("xgb.eval: first argument must be type xgb.Booster")
    }
    if (typeof(watchlist) != "list") {
        stop("xgb.eval: only accepts list of DMatrix as watchlist")
    }
    for (w in watchlist) {
        if (class(w) != "xgb.DMatrix") {
            stop("xgb.eval: watch list can only contain xgb.DMatrix")
        }
    }
    evnames <- list()
    if (length(watchlist) != 0) {
        for (i in 1:length(watchlist)) {
            w <- watchlist[i]
            if (length(names(w)) == 0) {
                stop("xgb.eval: name tag must be presented for every elements in watchlist")
            }
            evnames <- append(evnames, names(w))     
        }
    }
    msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames, PACKAGE="xgboost")
    return(msg)
 }
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@ -1,222 +1,24 @@
-# depends on matrix
+# Main function for xgboost-package
-.onLoad <- function(libname, pkgname) {
+
-  library.dynam("xgboost", pkgname, libname);
+xgboost = function(x=NULL,y=NULL,file=NULL,nrounds=10,params,watchlist=list(), 
-}
+                   obj=NULL, feval=NULL, margin=NULL)
-.onUnload <- function(libpath) {
+{
-  library.dynam.unload("xgboost", libpath);
+    if (is.null(x) && is.null(y))
    {
        if (is.null(file))
            stop('xgboost need input data, either R objects or local files.')
        dtrain = xgb.DMatrix(file)
    }
    else
        dtrain = xgb.DMatrix(x, info=list(label=y))
    if (!is.null(margin))
    {
        succ <- xgb.setinfo(dtrain, "base_margin", margin)
        if (!succ)
            warning('Attemp to use margin failed.')
    }
    bst <- xgb.train(params, dtrain, nrounds, watchlist, obj, feval)
    return(bst)
 }
-# constructing DMatrix
+
 xgb.DMatrix <- function(data, info=list(), missing=0.0) {
  if (typeof(data) == "character") {
    handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost")
  } else if(is.matrix(data)) {
    handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE="xgboost")
  } else if(class(data) == "dgCMatrix") {
    handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE="xgboost")
  } else {
    stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data)))
  }
  dmat <- structure(handle, class="xgb.DMatrix")
  if (length(info) != 0) {
    for (i in 1:length(info)) {
      p <- info[i]
      xgb.setinfo(dmat, names(p), p[[1]])
    }
  }
  return(dmat)
 }
 # get information from dmatrix
 xgb.getinfo <- function(dmat, name) {
  if (typeof(name) != "character") {
    stop("xgb.getinfo: name must be character")
  }
  if (class(dmat) != "xgb.DMatrix") {
    stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
  }
  if (name != "label" &&
      name != "weight" &&
      name != "base_margin" ) {
    stop(paste("xgb.getinfo: unknown info name", name))
  }
  ret <- .Call("XGDMatrixGetInfo_R", dmat, name, PACKAGE="xgboost")
  return(ret)
 }
 # set information into dmatrix, this mutate dmatrix
 xgb.setinfo <- function(dmat, name, info) {
  if (class(dmat) != "xgb.DMatrix") {
    stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
  }
  if (name == "label") {
    .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
    return(TRUE)
  }
  if (name == "weight") {
    .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
    return(TRUE)
  }
  if (name == "base_margin") {
    .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
    return(TRUE)
  }
  if (name == "group") {
    .Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info), PACKAGE="xgboost")
    return(TRUE)
  }
  stop(pase("xgb.setinfo: unknown info name", name))
  return(FALSE)
 }
 # construct a Booster from cachelist
 xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
  if (typeof(cachelist) != "list") {
    stop("xgb.Booster: only accepts list of DMatrix as cachelist")
  }
  for (dm in cachelist) {
    if (class(dm) != "xgb.DMatrix") {
      stop("xgb.Booster: only accepts list of DMatrix as cachelist")
    }
  }
  handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE="xgboost")
  .Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE="xgboost")
  if (length(params) != 0) {
    for (i in 1:length(params)) {
      p <- params[i]
      .Call("XGBoosterSetParam_R", handle, names(p), as.character(p), PACKAGE="xgboost")
    }
  }
  if (!is.null(modelfile)) {
    if (typeof(modelfile) != "character"){
      stop("xgb.Booster: modelfile must be character");
    }
    .Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE="xgboost")
  }
  return(structure(handle, class="xgb.Booster"))
 }
 # train a model using given parameters
 xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) {
  if (typeof(params) != "list") {
    stop("xgb.train: first argument params must be list");
  }
  if (class(dtrain) != "xgb.DMatrix") {
    stop("xgb.train: second argument dtrain must be xgb.DMatrix");
  }
  bst <- xgb.Booster(params, append(watchlist,dtrain))
  for (i in 1:nrounds) {
    if (is.null(obj)) {
      succ <- xgb.iter.update(bst, dtrain, i-1)
    } else {
      pred <- xgb.predict(bst, dtrain)
      gpair <- obj(pred, dtrain)
      succ <- xgb.iter.boost(bst, dtrain, gpair)
    }
    if (length(watchlist) != 0) {
      if (is.null(feval)) {      
        msg <- xgb.iter.eval(bst, watchlist, i-1)
        cat(msg); cat("\n")
      } else {
        cat("["); cat(i); cat("]");
        for (j in 1:length(watchlist)) {
          w <- watchlist[j]
          if (length(names(w)) == 0) {
            stop("xgb.eval: name tag must be presented for every elements in watchlist")
          }
          ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
          cat("\t"); cat(names(w)); cat("-"); cat(ret$metric); 
          cat(":"); cat(ret$value)
        }
        cat("\n")        
      }
    }
  }
  return(bst)
 }
 # save model or DMatrix to file 
 xgb.save <- function(handle, fname) {
  if (typeof(fname) != "character") {
    stop("xgb.save: fname must be character")
  }
  if (class(handle) == "xgb.Booster") {
    .Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost")
    return(TRUE)
  }
  if (class(handle) == "xgb.DMatrix") {
    .Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
    return(TRUE)
  }
  stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
  return(FALSE)
 }
 # predict 
 xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
  if (class(booster) != "xgb.Booster") {
    stop("xgb.predict: first argument must be type xgb.Booster")
  }
  if (class(dmat) != "xgb.DMatrix") {
    stop("xgb.predict: second argument must be type xgb.DMatrix")
  }
  ret <- .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin), PACKAGE="xgboost")
  return(ret)
 }
 # dump model
 xgb.dump <- function(booster, fname, fmap = "") {
  if (class(booster) != "xgb.Booster") {
    stop("xgb.dump: first argument must be type xgb.Booster")
  }
  if (typeof(fname) != "character"){
    stop("xgb.dump: second argument must be type character")
  }
  .Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE="xgboost")
  return(TRUE)
 }
 ##--------------------------------------
 # the following are low level iteratively function, not needed
 # if you do not want to use them
 #---------------------------------------
 # iteratively update booster with dtrain
 xgb.iter.update <- function(booster, dtrain, iter) {
  if (class(booster) != "xgb.Booster") {
    stop("xgb.iter.update: first argument must be type xgb.Booster")
  }
  if (class(dtrain) != "xgb.DMatrix") {
    stop("xgb.iter.update: second argument must be type xgb.DMatrix")
  }
  .Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, PACKAGE="xgboost")
  return(TRUE)
 }
 # iteratively update booster with customized statistics
 xgb.iter.boost <- function(booster, dtrain, gpair) {
  if (class(booster) != "xgb.Booster") {
    stop("xgb.iter.update: first argument must be type xgb.Booster")
  }
  if (class(dtrain) != "xgb.DMatrix") {
    stop("xgb.iter.update: second argument must be type xgb.DMatrix")
  }
  .Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE="xgboost")
  return(TRUE)
 }
 # iteratively evaluate one iteration
 xgb.iter.eval <- function(booster, watchlist, iter) {
  if (class(booster) != "xgb.Booster") {
    stop("xgb.eval: first argument must be type xgb.Booster")
  }
  if (typeof(watchlist) != "list") {
    stop("xgb.eval: only accepts list of DMatrix as watchlist")
  }
  for (w in watchlist) {
    if (class(w) != "xgb.DMatrix") {
      stop("xgb.eval: watch list can only contain xgb.DMatrix")
    }
  }
  evnames <- list()
  if (length(watchlist) != 0) {
    for (i in 1:length(watchlist)) {
      w <- watchlist[i]
      if (length(names(w)) == 0) {
        stop("xgb.eval: name tag must be presented for every elements in watchlist")
      }
      evnames <- append(evnames, names(w))     
    }
  }
  msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames, PACKAGE="xgboost")
  return(msg)
 }
--- a/R-package/README.md
+++ b/R-package/README.md
@ -1,4 +1,10 @@
-This is subfolder for experimental version of R package
+This is subfolder for experimental version of R package.
-Not yet ready
+Not yet ready.
 Installation:
 ```r
 require(devtools)
 install_github('xgboost','tqchen',subdir='R-package')
 ```
--- a/R-package/inst/examples/demo-old.R
+++ b/R-package/inst/examples/demo-old.R
@ -0,0 +1,127 @@
 # load xgboost library
 require(xgboost)
 require(methods)
 # helper function to read libsvm format
 # this is very badly written, load in dense, and convert to sparse
 # use this only for demo purpose
 # adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
 read.libsvm <- function(fname, maxcol) {
  content <- readLines(fname)
  nline <- length(content)
  label <- numeric(nline)
  mat <- matrix(0, nline, maxcol+1)
  for (i in 1:nline) {
    arr <- as.vector(strsplit(content[i], " ")[[1]])
    label[i] <- as.numeric(arr[[1]])
    for (j in 2:length(arr)) {
      kv <- strsplit(arr[j], ":")[[1]]
      # to avoid 0 index
      findex <- as.integer(kv[1]) + 1
      fvalue <- as.numeric(kv[2])
      mat[i,findex] <- fvalue
    }
  }
  mat <- as(mat, "sparseMatrix")
  return(list(label=label, data=mat))
 }
 # test code here
 dtrain <- xgb.DMatrix("agaricus.txt.train")
 dtest <- xgb.DMatrix("agaricus.txt.test")
 param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
 watchlist <- list("eval"=dtest,"train"=dtrain)
 # training xgboost model
 bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
 # make prediction
 preds <- xgb.predict(bst, dtest)
 labels <- xgb.getinfo(dtest, "label")
 err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
 # print error rate
 print(paste("error=",err))
 # dump model
 xgb.dump(bst, "dump.raw.txt")
 # dump model with feature map
 xgb.dump(bst, "dump.nice.txt", "featmap.txt")
 # save dmatrix into binary buffer
 succ <- xgb.save(dtest, "dtest.buffer")
 # save model into file
 succ <- xgb.save(bst, "xgb.model")
 # load model and data in 
 bst2 <- xgb.Booster(modelfile="xgb.model")
 dtest2 <- xgb.DMatrix("dtest.buffer")
 preds2 <- xgb.predict(bst2, dtest2)
 # assert they are the same
 stopifnot(sum(abs(preds2-preds)) == 0)
 ###
 # build dmatrix from sparseMatrix
 ###
 print ('start running example of build DMatrix from R.sparseMatrix')
 csc <- read.libsvm("agaricus.txt.train", 126)
 label <- csc$label
 data <- csc$data
 dtrain <- xgb.DMatrix(data, info=list(label=label) )
 watchlist <- list("eval"=dtest,"train"=dtrain)
 bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
 ###
 # build dmatrix from dense matrix
 ###
 print ('start running example of build DMatrix from R.Matrix')
 mat = as.matrix(data)
 dtrain <- xgb.DMatrix(mat, info=list(label=label) )
 watchlist <- list("eval"=dtest,"train"=dtrain)
 bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
 ###
 # advanced: cutomsized loss function
 # 
 print("start running example to used cutomized objective function")
 # note: for customized objective function, we leave objective as default
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
 param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1)
 # user define objective function, given prediction, return gradient and second order gradient
 # this is loglikelihood loss
 logregobj <- function(preds, dtrain) {
  labels <- xgb.getinfo(dtrain, "label")
  preds <- 1.0 / (1.0 + exp(-preds))
  grad <- preds - labels
  hess <- preds * (1.0-preds)
  return(list(grad=grad, hess=hess))
 }
 # user defined evaluation function, return a list(metric="metric-name", value="metric-value")
 # NOTE: when you do customized loss function, the default prediction value is margin
 # this may make buildin evalution metric not function properly
 # for example, we are doing logistic loss, the prediction is score before logistic transformation
 # the buildin evaluation error assumes input is after logistic transformation
 # Take this in mind when you use the customization, and maybe you need write customized evaluation function
 evalerror <- function(preds, dtrain) {
  labels <- xgb.getinfo(dtrain, "label")
  err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
  return(list(metric="error", value=err))
 }
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py"s implementation of train
 bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
 ###
 # advanced: start from a initial base prediction
 #
 print ("start running example to start from a initial prediction")
 # specify parameters via map, definition are same as c++ version
 param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
 # train xgboost for 1 round
 bst <- xgb.train( param, dtrain, 1, watchlist )
 # Note: we need the margin value instead of transformed prediction in set_base_margin
 # do predict with output_margin=True, will always give you margin values before logistic transformation
 ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE)
 ptest <- xgb.predict(bst, dtest, outputmargin=TRUE)
 succ <- xgb.setinfo(dtrain, "base_margin", ptrain)
 succ <- xgb.setinfo(dtest, "base_margin", ptest)
 print ("this is result of running from initial prediction")
 bst <- xgb.train( param, dtrain, 1, watchlist )
--- a/R-package/inst/examples/demo.R
+++ b/R-package/inst/examples/demo.R
@ -1,97 +1,83 @@
 # load xgboost library
 require(xgboost)
 require(methods)
 # helper function to read libsvm format
 # this is very badly written, load in dense, and convert to sparse
 # use this only for demo purpose
 # adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
-read.libsvm <- function(fname, maxcol) {
+read.libsvm = function(fname, maxcol) {
-  content <- readLines(fname)
+    content = readLines(fname)
-  nline <- length(content)
+    nline = length(content)
-  label <- numeric(nline)
+    label = numeric(nline)
-  mat <- matrix(0, nline, maxcol+1)
+    mat = matrix(0, nline, maxcol+1)
-  for (i in 1:nline) {
+    for (i in 1:nline) {
-    arr <- as.vector(strsplit(content[i], " ")[[1]])
+        arr = as.vector(strsplit(content[i], " ")[[1]])
-    label[i] <- as.numeric(arr[[1]])
+        label[i] = as.numeric(arr[[1]])
-    for (j in 2:length(arr)) {
+        for (j in 2:length(arr)) {
-      kv <- strsplit(arr[j], ":")[[1]]
+            kv = strsplit(arr[j], ":")[[1]]
-      # to avoid 0 index
+            # to avoid 0 index
-      findex <- as.integer(kv[1]) + 1
+            findex = as.integer(kv[1]) + 1
-      fvalue <- as.numeric(kv[2])
+            fvalue = as.numeric(kv[2])
-      mat[i,findex] <- fvalue
+            mat[i,findex] = fvalue
        }
    }
-  }
+    mat = as(mat, "sparseMatrix")
-  mat <- as(mat, "sparseMatrix")
+    return(list(label=label, data=mat))
  return(list(label=label, data=mat))
 }
-# test code here
+# Parameter setting
 dtrain <- xgb.DMatrix("agaricus.txt.train")
 dtest <- xgb.DMatrix("agaricus.txt.test")
 param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
-watchlist <- list("eval"=dtest,"train"=dtrain)
+watchlist = list("eval"=dtest,"train"=dtrain)
-# training xgboost model
+
-bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
+###########################
-# make prediction
+# Train from local file
-preds <- xgb.predict(bst, dtest)
+###########################
-labels <- xgb.getinfo(dtest, "label")
+
-err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
+# Training
-# print error rate
+bst = xgboost(file='agaricus.txt.train',params=param,watchlist=watchlist)
 # Prediction
 pred = predict(bst, 'agaricus.txt.test')
 # Performance
 labels = xgb.getinfo(dtest, "label")
 err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
 print(paste("error=",err))
-# dump model
+###########################
-xgb.dump(bst, "dump.raw.txt")
+# Train from R object
-# dump model with feature map
+###########################
 xgb.dump(bst, "dump.nice.txt", "featmap.txt")
-# save dmatrix into binary buffer
+csc = read.libsvm("agaricus.txt.train", 126)
-succ <- xgb.save(dtest, "dtest.buffer")
+y = csc$label
-# save model into file
+x = csc$data
-succ <- xgb.save(bst, "xgb.model")
+# x as Sparse Matrix
-# load model and data in 
+class(x)
 bst2 <- xgb.Booster(modelfile="xgb.model")
 dtest2 <- xgb.DMatrix("dtest.buffer")
 preds2 <- xgb.predict(bst2, dtest2)
 # assert they are the same
 stopifnot(sum(abs(preds2-preds)) == 0)
-###
+# Training
-# build dmatrix from sparseMatrix
+bst = xgboost(x,y,params=param,watchlist=watchlist)
-###
+# Prediction
-print ('start running example of build DMatrix from R.sparseMatrix')
+pred = predict(bst, 'agaricus.txt.test')
-csc <- read.libsvm("agaricus.txt.train", 126)
+# Performance
-label <- csc$label
+labels = xgb.getinfo(dtest, "label")
-data <- csc$data
+err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
-dtrain <- xgb.DMatrix(data, info=list(label=label) )
+print(paste("error=",err))
 watchlist <- list("eval"=dtest,"train"=dtrain)
 bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
-###
+# Training with dense matrix
-# build dmatrix from dense matrix
+x = as.matrix(x)
-###
+bst = xgboost(x,y,params=param,watchlist=watchlist)
-print ('start running example of build DMatrix from R.Matrix')
+
-mat = as.matrix(data)
+###########################
-dtrain <- xgb.DMatrix(mat, info=list(label=label) )
+# Train with customization
-watchlist <- list("eval"=dtest,"train"=dtrain)
+###########################
 bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
 ###
 # advanced: cutomsized loss function
 # 
 print("start running example to used cutomized objective function")
 # note: for customized objective function, we leave objective as default
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
 param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1)
 # user define objective function, given prediction, return gradient and second order gradient
 # this is loglikelihood loss
-logregobj <- function(preds, dtrain) {
+logregobj = function(preds, dtrain) {
-  labels <- xgb.getinfo(dtrain, "label")
+    labels = xgb.getinfo(dtrain, "label")
-  preds <- 1.0 / (1.0 + exp(-preds))
+    preds = 1.0 / (1.0 + exp(-preds))
-  grad <- preds - labels
+    grad = preds - labels
-  hess <- preds * (1.0-preds)
+    hess = preds * (1.0-preds)
-  return(list(grad=grad, hess=hess))
+    return(list(grad=grad, hess=hess))
 }
 # user defined evaluation function, return a list(metric="metric-name", value="metric-value")
 # NOTE: when you do customized loss function, the default prediction value is margin
@ -99,29 +85,18 @@ logregobj <- function(preds, dtrain) {
 # for example, we are doing logistic loss, the prediction is score before logistic transformation
 # the buildin evaluation error assumes input is after logistic transformation
 # Take this in mind when you use the customization, and maybe you need write customized evaluation function
-evalerror <- function(preds, dtrain) {
+evalerror = function(preds, dtrain) {
-  labels <- xgb.getinfo(dtrain, "label")
+    labels = xgb.getinfo(dtrain, "label")
-  err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
+    err = as.numeric(sum(labels != (preds > 0.0))) / length(labels)
-  return(list(metric="error", value=err))
+    return(list(metric="error", value=err))
 }
-# training with customized objective, we can also do step by step training
+bst = xgboost(x,y,params=param,watchlist=watchlist,obj=logregobj, feval=evalerror)
 # simply look at xgboost.py"s implementation of train
 bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
-###
+############################
-# advanced: start from a initial base prediction
+# Train with previous result
-#
+############################
-print ("start running example to start from a initial prediction")
+
-# specify parameters via map, definition are same as c++ version
+bst = xgboost(x,y,params=param,watchlist=watchlist)
-param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
+pred = predict(bst, 'agaricus.txt.train', outputmargin=TRUE)
-# train xgboost for 1 round
+bst2 = xgboost(x,y,params=param,watchlist=watchlist,margin=pred)
 bst <- xgb.train( param, dtrain, 1, watchlist )
 # Note: we need the margin value instead of transformed prediction in set_base_margin
 # do predict with output_margin=True, will always give you margin values before logistic transformation
 ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE)
 ptest <- xgb.predict(bst, dtest, outputmargin=TRUE)
 succ <- xgb.setinfo(dtrain, "base_margin", ptrain)
 succ <- xgb.setinfo(dtest, "base_margin", ptest)
 print ("this is result of running from initial prediction")
 bst <- xgb.train( param, dtrain, 1, watchlist )