remove old R demo files

2014-08-27 13:16:16 -07:00 · 2014-08-27 13:16:16 -07:00 · a060a2e9a6
commit a060a2e9a6
parent 247e0d5d78
3 changed files with 88 additions and 309 deletions
--- a/R-package/inst/examples/demo-new.R
+++ b/R-package/inst/examples/demo-new.R
@ -1,151 +0,0 @@
 require(xgboost)
 require(methods)
 # helper function to read libsvm format this is very badly written, load in dense, and convert to sparse
 # use this only for demo purpose adopted from
 # https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
 read.libsvm <- function(fname, maxcol) {
  content <- readLines(fname)
  nline <- length(content)
  label <- numeric(nline)
  mat <- matrix(0, nline, maxcol + 1)
  for (i in 1:nline) {
    arr <- as.vector(strsplit(content[i], " ")[[1]])
    label[i] <- as.numeric(arr[[1]])
    for (j in 2:length(arr)) {
      kv <- strsplit(arr[j], ":")[[1]]
      # to avoid 0 index
      findex <- as.integer(kv[1]) + 1
      fvalue <- as.numeric(kv[2])
      mat[i, findex] <- fvalue
    }
  }
  mat <- as(mat, "sparseMatrix")
  return(list(label = label, data = mat))
 }
 ############################ Test xgb.DMatrix with local file, sparse matrix and dense matrix in R.
 # Directly read in local file
 dtrain <- xgb.DMatrix("agaricus.txt.train")
 class(dtrain)
 # read file in R
 csc <- read.libsvm("agaricus.txt.train", 126)
 y <- csc$label
 x <- csc$data
 # x as Sparse Matrix
 class(x)
 dtrain <- xgb.DMatrix(x, label = y)
 # x as dense matrix
 dense.x <- as.matrix(x)
 dtrain <- xgb.DMatrix(dense.x, label = y)
 ############################ Test xgboost with local file, sparse matrix and dense matrix in R.
 # Test with DMatrix object
 bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
               objective = "binary:logistic")
 # Verbose = 0,1,2
 bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
               objective = "binary:logistic", verbose = 0)
 bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
               objective = "binary:logistic", verbose = 1)
 bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
               objective = "binary:logistic", verbose = 2)
 # Test with local file
 bst <- xgboost(data = "agaricus.txt.train", max_depth = 2, eta = 1,
               objective = "binary:logistic")
 # Test with Sparse Matrix
 bst <- xgboost(data = x, label = y, max_depth = 2, eta = 1, 
               objective = "binary:logistic")
 # Test with dense Matrix
 bst <- xgboost(data = dense.x, label = y, max_depth = 2, eta = 1, 
               objective = "binary:logistic")
 ############################ Test predict
 # Prediction with DMatrix object
 dtest <- xgb.DMatrix("agaricus.txt.test")
 pred <- predict(bst, dtest)
 # Prediction with local test file
 pred <- predict(bst, "agaricus.txt.test")
 # Prediction with Sparse Matrix
 csc <- read.libsvm("agaricus.txt.test", 126)
 test.y <- csc$label
 test.x <- csc$data
 pred <- predict(bst, test.x)
 # Extrac label with xgb.getinfo
 labels <- xgb.getinfo(dtest, "label")
 err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels)
 print(paste("error=", err))
 ############################ Save and load model to hard disk
 # save model to binary local file
 xgb.save(bst, "model.save")
 # load binary model to R
 bst <- xgb.load("model.save")
 pred <- predict(bst, test.x)
 # save model to text file
 xgb.dump(bst, "model.dump")
 # save a DMatrix object to hard disk
 xgb.DMatrix.save(dtrain, "dtrain.save")
 # load a DMatrix object to R
 dtrain <- xgb.DMatrix("dtrain.save")
 ############################ More flexible training function xgb.train
 param <- list(max_depth = 2, eta = 1, silent = 1, objective = "binary:logistic")
 watchlist <- list(eval = dtest, train = dtrain)
 # training xgboost model
 bst <- xgb.train(param, dtrain, nround = 2, watchlist = watchlist)
 ############################ cutomsized loss function
 param <- list(max_depth = 2, eta = 1, silent = 1)
 # note: for customized objective function, we leave objective as default note: what we are getting is
 # margin value in prediction you must know what you are doing
 # user define objective function, given prediction, return gradient and second order gradient this is
 # loglikelihood loss
 logregobj <- function(preds, dtrain) {
  labels <- xgb.getinfo(dtrain, "label")
  preds <- 1/(1 + exp(-preds))
  grad <- preds - labels
  hess <- preds * (1 - preds)
  return(list(grad = grad, hess = hess))
 }
 # user defined evaluation function, return a list(metric='metric-name', value='metric-value') NOTE: when
 # you do customized loss function, the default prediction value is margin this may make buildin
 # evalution metric not function properly for example, we are doing logistic loss, the prediction is
 # score before logistic transformation the buildin evaluation error assumes input is after logistic
 # transformation Take this in mind when you use the customization, and maybe you need write customized
 # evaluation function
 evalerror <- function(preds, dtrain) {
  labels <- xgb.getinfo(dtrain, "label")
  err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
  return(list(metric = "error", value = err))
 }
 # training with customized objective, we can also do step by step training simply look at xgboost.py's
 # implementation of train
 bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror)
--- a/R-package/inst/examples/demo-old.R
+++ b/R-package/inst/examples/demo-old.R
@ -1,127 +0,0 @@
 # load xgboost library
 require(xgboost)
 require(methods)
 # helper function to read libsvm format
 # this is very badly written, load in dense, and convert to sparse
 # use this only for demo purpose
 # adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
 read.libsvm <- function(fname, maxcol) {
  content <- readLines(fname)
  nline <- length(content)
  label <- numeric(nline)
  mat <- matrix(0, nline, maxcol+1)
  for (i in 1:nline) {
    arr <- as.vector(strsplit(content[i], " ")[[1]])
    label[i] <- as.numeric(arr[[1]])
    for (j in 2:length(arr)) {
      kv <- strsplit(arr[j], ":")[[1]]
      # to avoid 0 index
      findex <- as.integer(kv[1]) + 1
      fvalue <- as.numeric(kv[2])
      mat[i,findex] <- fvalue
    }
  }
  mat <- as(mat, "sparseMatrix")
  return(list(label=label, data=mat))
 }
 # test code here
 dtrain <- xgb.DMatrix("agaricus.txt.train")
 dtest <- xgb.DMatrix("agaricus.txt.test")
 param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
 watchlist <- list("eval"=dtest,"train"=dtrain)
 # training xgboost model
 bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
 # make prediction
 preds <- xgb.predict(bst, dtest)
 labels <- xgb.getinfo(dtest, "label")
 err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
 # print error rate
 print(paste("error=",err))
 # dump model
 xgb.dump(bst, "dump.raw.txt")
 # dump model with feature map
 xgb.dump(bst, "dump.nice.txt", "featmap.txt")
 # save dmatrix into binary buffer
 succ <- xgb.save(dtest, "dtest.buffer")
 # save model into file
 succ <- xgb.save(bst, "xgb.model")
 # load model and data in 
 bst2 <- xgb.Booster(modelfile="xgb.model")
 dtest2 <- xgb.DMatrix("dtest.buffer")
 preds2 <- xgb.predict(bst2, dtest2)
 # assert they are the same
 stopifnot(sum(abs(preds2-preds)) == 0)
 ###
 # build dmatrix from sparseMatrix
 ###
 print ('start running example of build DMatrix from R.sparseMatrix')
 csc <- read.libsvm("agaricus.txt.train", 126)
 label <- csc$label
 data <- csc$data
 dtrain <- xgb.DMatrix(data, info=list(label=label) )
 watchlist <- list("eval"=dtest,"train"=dtrain)
 bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
 ###
 # build dmatrix from dense matrix
 ###
 print ('start running example of build DMatrix from R.Matrix')
 mat = as.matrix(data)
 dtrain <- xgb.DMatrix(mat, info=list(label=label) )
 watchlist <- list("eval"=dtest,"train"=dtrain)
 bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
 ###
 # advanced: cutomsized loss function
 # 
 print("start running example to used cutomized objective function")
 # note: for customized objective function, we leave objective as default
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
 param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1)
 # user define objective function, given prediction, return gradient and second order gradient
 # this is loglikelihood loss
 logregobj <- function(preds, dtrain) {
  labels <- xgb.getinfo(dtrain, "label")
  preds <- 1.0 / (1.0 + exp(-preds))
  grad <- preds - labels
  hess <- preds * (1.0-preds)
  return(list(grad=grad, hess=hess))
 }
 # user defined evaluation function, return a list(metric="metric-name", value="metric-value")
 # NOTE: when you do customized loss function, the default prediction value is margin
 # this may make buildin evalution metric not function properly
 # for example, we are doing logistic loss, the prediction is score before logistic transformation
 # the buildin evaluation error assumes input is after logistic transformation
 # Take this in mind when you use the customization, and maybe you need write customized evaluation function
 evalerror <- function(preds, dtrain) {
  labels <- xgb.getinfo(dtrain, "label")
  err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
  return(list(metric="error", value=err))
 }
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py"s implementation of train
 bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
 ###
 # advanced: start from a initial base prediction
 #
 print ("start running example to start from a initial prediction")
 # specify parameters via map, definition are same as c++ version
 param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
 # train xgboost for 1 round
 bst <- xgb.train( param, dtrain, 1, watchlist )
 # Note: we need the margin value instead of transformed prediction in set_base_margin
 # do predict with output_margin=True, will always give you margin values before logistic transformation
 ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE)
 ptest <- xgb.predict(bst, dtest, outputmargin=TRUE)
 succ <- xgb.setinfo(dtrain, "base_margin", ptrain)
 succ <- xgb.setinfo(dtest, "base_margin", ptest)
 print ("this is result of running from initial prediction")
 bst <- xgb.train( param, dtrain, 1, watchlist )
--- a/R-package/inst/examples/demo.R
+++ b/R-package/inst/examples/demo.R
@ -24,45 +24,104 @@ read.libsvm <- function(fname, maxcol) {
  return(list(label = label, data = mat))
 }
-# Parameter setting
+############################ Test xgb.DMatrix with local file, sparse matrix and dense matrix in R.
 # Directly read in local file
 dtrain <- xgb.DMatrix("agaricus.txt.train")
-dtest <- xgb.DMatrix("agaricus.txt.test")
+class(dtrain)
 param <- list(`bst:max_depth` = 2, `bst:eta` = 1, silent = 1, objective = "binary:logistic")
 watchlist <- list(eval = dtest, train = dtrain)
 ########################### Train from local file
 # Training
 bst <- xgboost(file = "agaricus.txt.train", params = param, watchlist = watchlist)
 # Prediction
 pred <- predict(bst, "agaricus.txt.test")
 # Performance
 labels <- xgb.getinfo(dtest, "label")
 err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels)
 print(paste("error=", err))
 ########################### Train from R object
 # read file in R
 csc <- read.libsvm("agaricus.txt.train", 126)
 y <- csc$label
 x <- csc$data
 # x as Sparse Matrix
 class(x)
 dtrain <- xgb.DMatrix(x, label = y)
-# Training
+# x as dense matrix
-bst <- xgboost(x, y, params = param, watchlist = watchlist)
+dense.x <- as.matrix(x)
-# Prediction
+dtrain <- xgb.DMatrix(dense.x, label = y)
 ############################ Test xgboost with local file, sparse matrix and dense matrix in R.
 # Test with DMatrix object
 bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
               objective = "binary:logistic")
 # Verbose = 0,1,2
 bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
               objective = "binary:logistic", verbose = 0)
 bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
               objective = "binary:logistic", verbose = 1)
 bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
               objective = "binary:logistic", verbose = 2)
 # Test with local file
 bst <- xgboost(data = "agaricus.txt.train", max_depth = 2, eta = 1,
               objective = "binary:logistic")
 # Test with Sparse Matrix
 bst <- xgboost(data = x, label = y, max_depth = 2, eta = 1, 
               objective = "binary:logistic")
 # Test with dense Matrix
 bst <- xgboost(data = dense.x, label = y, max_depth = 2, eta = 1, 
               objective = "binary:logistic")
 ############################ Test predict
 # Prediction with DMatrix object
 dtest <- xgb.DMatrix("agaricus.txt.test")
 pred <- predict(bst, dtest)
 # Prediction with local test file
 pred <- predict(bst, "agaricus.txt.test")
-# Performance
+
 # Prediction with Sparse Matrix
 csc <- read.libsvm("agaricus.txt.test", 126)
 test.y <- csc$label
 test.x <- csc$data
 pred <- predict(bst, test.x)
 # Extrac label with xgb.getinfo
 labels <- xgb.getinfo(dtest, "label")
 err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels)
 print(paste("error=", err))
-# Training with dense matrix
+############################ Save and load model to hard disk
 x <- as.matrix(x)
 bst <- xgboost(x, y, params = param, watchlist = watchlist)
-########################### Train with customization
+# save model to binary local file
 xgb.save(bst, "model.save")
 # load binary model to R
 bst <- xgb.load("model.save")
 pred <- predict(bst, test.x)
 # save model to text file
 xgb.dump(bst, "model.dump")
 # save a DMatrix object to hard disk
 xgb.DMatrix.save(dtrain, "dtrain.save")
 # load a DMatrix object to R
 dtrain <- xgb.DMatrix("dtrain.save")
 ############################ More flexible training function xgb.train
 param <- list(max_depth = 2, eta = 1, silent = 1, objective = "binary:logistic")
 watchlist <- list(eval = dtest, train = dtrain)
 # training xgboost model
 bst <- xgb.train(param, dtrain, nround = 2, watchlist = watchlist)
 ############################ cutomsized loss function
 param <- list(max_depth = 2, eta = 1, silent = 1)
 # note: for customized objective function, we leave objective as default note: what we are getting is
 # margin value in prediction you must know what you are doing
 # user define objective function, given prediction, return gradient and second order gradient this is
 # loglikelihood loss
@ -85,10 +144,8 @@ evalerror <- function(preds, dtrain) {
  return(list(metric = "error", value = err))
 }
-bst <- xgboost(x, y, params = param, watchlist = watchlist, obj = logregobj, feval = evalerror)
+# training with customized objective, we can also do step by step training simply look at xgboost.py's
 # implementation of train
 bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror)
 ############################ Train with previous result
 bst <- xgboost(x, y, params = param, watchlist = watchlist)
 pred <- predict(bst, "agaricus.txt.train", outputmargin = TRUE)
 bst2 <- xgboost(x, y, params = param, watchlist = watchlist, margin = pred)