remove old R demo files

2014-08-27 13:16:16 -07:00
parent 247e0d5d78
commit a060a2e9a6
3 changed files with 88 additions and 309 deletions
--- a/R-package/inst/examples/demo-new.R
+++ b/R-package/inst/examples/demo-new.R
@@ -1,151 +0,0 @@
-require(xgboost)
-require(methods)
-
-# helper function to read libsvm format this is very badly written, load in dense, and convert to sparse
-# use this only for demo purpose adopted from
-# https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
-read.libsvm <- function(fname, maxcol) {
-  content <- readLines(fname)
-  nline <- length(content)
-  label <- numeric(nline)
-  mat <- matrix(0, nline, maxcol + 1)
-  for (i in 1:nline) {
-    arr <- as.vector(strsplit(content[i], " ")[[1]])
-    label[i] <- as.numeric(arr[[1]])
-    for (j in 2:length(arr)) {
-      kv <- strsplit(arr[j], ":")[[1]]
-      # to avoid 0 index
-      findex <- as.integer(kv[1]) + 1
-      fvalue <- as.numeric(kv[2])
-      mat[i, findex] <- fvalue
-    }
-  }
-  mat <- as(mat, "sparseMatrix")
-  return(list(label = label, data = mat))
-}
-
-############################ Test xgb.DMatrix with local file, sparse matrix and dense matrix in R.
-
-# Directly read in local file
-dtrain <- xgb.DMatrix("agaricus.txt.train")
-class(dtrain)
-
-# read file in R
-csc <- read.libsvm("agaricus.txt.train", 126)
-y <- csc$label
-x <- csc$data
-
-# x as Sparse Matrix
-class(x)
-dtrain <- xgb.DMatrix(x, label = y)
-
-# x as dense matrix
-dense.x <- as.matrix(x)
-dtrain <- xgb.DMatrix(dense.x, label = y)
-
-############################ Test xgboost with local file, sparse matrix and dense matrix in R.
-
-# Test with DMatrix object
-bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
-               objective = "binary:logistic")
-
-# Verbose = 0,1,2
-bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
-               objective = "binary:logistic", verbose = 0)
-bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
-               objective = "binary:logistic", verbose = 1)
-bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
-               objective = "binary:logistic", verbose = 2)
-
-# Test with local file
-bst <- xgboost(data = "agaricus.txt.train", max_depth = 2, eta = 1,
-               objective = "binary:logistic")
-
-# Test with Sparse Matrix
-bst <- xgboost(data = x, label = y, max_depth = 2, eta = 1, 
-               objective = "binary:logistic")
-
-# Test with dense Matrix
-bst <- xgboost(data = dense.x, label = y, max_depth = 2, eta = 1, 
-               objective = "binary:logistic")
-
-
-############################ Test predict
-
-# Prediction with DMatrix object
-dtest <- xgb.DMatrix("agaricus.txt.test")
-pred <- predict(bst, dtest)
-
-# Prediction with local test file
-pred <- predict(bst, "agaricus.txt.test")
-
-# Prediction with Sparse Matrix
-csc <- read.libsvm("agaricus.txt.test", 126)
-test.y <- csc$label
-test.x <- csc$data
-pred <- predict(bst, test.x)
-
-# Extrac label with xgb.getinfo
-labels <- xgb.getinfo(dtest, "label")
-err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels)
-print(paste("error=", err))
-
-############################ Save and load model to hard disk
-
-# save model to binary local file
-xgb.save(bst, "model.save")
-
-# load binary model to R
-bst <- xgb.load("model.save")
-pred <- predict(bst, test.x)
-
-# save model to text file
-xgb.dump(bst, "model.dump")
-
-# save a DMatrix object to hard disk
-xgb.DMatrix.save(dtrain, "dtrain.save")
-
-# load a DMatrix object to R
-dtrain <- xgb.DMatrix("dtrain.save")
-
-############################ More flexible training function xgb.train
-
-param <- list(max_depth = 2, eta = 1, silent = 1, objective = "binary:logistic")
-watchlist <- list(eval = dtest, train = dtrain)
-
-# training xgboost model
-bst <- xgb.train(param, dtrain, nround = 2, watchlist = watchlist)
-
-############################ cutomsized loss function
-
-param <- list(max_depth = 2, eta = 1, silent = 1)
-
-# note: for customized objective function, we leave objective as default note: what we are getting is
-# margin value in prediction you must know what you are doing
-
-# user define objective function, given prediction, return gradient and second order gradient this is
-# loglikelihood loss
-logregobj <- function(preds, dtrain) {
-  labels <- xgb.getinfo(dtrain, "label")
-  preds <- 1/(1 + exp(-preds))
-  grad <- preds - labels
-  hess <- preds * (1 - preds)
-  return(list(grad = grad, hess = hess))
-}
-# user defined evaluation function, return a list(metric='metric-name', value='metric-value') NOTE: when
-# you do customized loss function, the default prediction value is margin this may make buildin
-# evalution metric not function properly for example, we are doing logistic loss, the prediction is
-# score before logistic transformation the buildin evaluation error assumes input is after logistic
-# transformation Take this in mind when you use the customization, and maybe you need write customized
-# evaluation function
-evalerror <- function(preds, dtrain) {
-  labels <- xgb.getinfo(dtrain, "label")
-  err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
-  return(list(metric = "error", value = err))
-}
-
-# training with customized objective, we can also do step by step training simply look at xgboost.py's
-# implementation of train
-bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror)
-
- 
--- a/R-package/inst/examples/demo-old.R
+++ b/R-package/inst/examples/demo-old.R
@@ -1,127 +0,0 @@
-# load xgboost library
-require(xgboost)
-require(methods)
-
-# helper function to read libsvm format
-# this is very badly written, load in dense, and convert to sparse
-# use this only for demo purpose
-# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
-read.libsvm <- function(fname, maxcol) {
-  content <- readLines(fname)
-  nline <- length(content)
-  label <- numeric(nline)
-  mat <- matrix(0, nline, maxcol+1)
-  for (i in 1:nline) {
-    arr <- as.vector(strsplit(content[i], " ")[[1]])
-    label[i] <- as.numeric(arr[[1]])
-    for (j in 2:length(arr)) {
-      kv <- strsplit(arr[j], ":")[[1]]
-      # to avoid 0 index
-      findex <- as.integer(kv[1]) + 1
-      fvalue <- as.numeric(kv[2])
-      mat[i,findex] <- fvalue
-    }
-  }
-  mat <- as(mat, "sparseMatrix")
-  return(list(label=label, data=mat))
-}
-
-# test code here
-dtrain <- xgb.DMatrix("agaricus.txt.train")
-dtest <- xgb.DMatrix("agaricus.txt.test")
-param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
-watchlist <- list("eval"=dtest,"train"=dtrain)
-# training xgboost model
-bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
-# make prediction
-preds <- xgb.predict(bst, dtest)
-labels <- xgb.getinfo(dtest, "label")
-err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
-# print error rate
-print(paste("error=",err))
-
-# dump model
-xgb.dump(bst, "dump.raw.txt")
-# dump model with feature map
-xgb.dump(bst, "dump.nice.txt", "featmap.txt")
-
-# save dmatrix into binary buffer
-succ <- xgb.save(dtest, "dtest.buffer")
-# save model into file
-succ <- xgb.save(bst, "xgb.model")
-# load model and data in 
-bst2 <- xgb.Booster(modelfile="xgb.model")
-dtest2 <- xgb.DMatrix("dtest.buffer")
-preds2 <- xgb.predict(bst2, dtest2)
-# assert they are the same
-stopifnot(sum(abs(preds2-preds)) == 0)
-
-###
-# build dmatrix from sparseMatrix
-###
-print ('start running example of build DMatrix from R.sparseMatrix')
-csc <- read.libsvm("agaricus.txt.train", 126)
-label <- csc$label
-data <- csc$data
-dtrain <- xgb.DMatrix(data, info=list(label=label) )
-watchlist <- list("eval"=dtest,"train"=dtrain)
-bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
-
-###
-# build dmatrix from dense matrix
-###
-print ('start running example of build DMatrix from R.Matrix')
-mat = as.matrix(data)
-dtrain <- xgb.DMatrix(mat, info=list(label=label) )
-watchlist <- list("eval"=dtest,"train"=dtrain)
-bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
-
-###
-# advanced: cutomsized loss function
-# 
-print("start running example to used cutomized objective function")
-# note: for customized objective function, we leave objective as default
-# note: what we are getting is margin value in prediction
-# you must know what you are doing
-param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1)
-# user define objective function, given prediction, return gradient and second order gradient
-# this is loglikelihood loss
-logregobj <- function(preds, dtrain) {
-  labels <- xgb.getinfo(dtrain, "label")
-  preds <- 1.0 / (1.0 + exp(-preds))
-  grad <- preds - labels
-  hess <- preds * (1.0-preds)
-  return(list(grad=grad, hess=hess))
-}
-# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
-# NOTE: when you do customized loss function, the default prediction value is margin
-# this may make buildin evalution metric not function properly
-# for example, we are doing logistic loss, the prediction is score before logistic transformation
-# the buildin evaluation error assumes input is after logistic transformation
-# Take this in mind when you use the customization, and maybe you need write customized evaluation function
-evalerror <- function(preds, dtrain) {
-  labels <- xgb.getinfo(dtrain, "label")
-  err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
-  return(list(metric="error", value=err))
-}
-
-# training with customized objective, we can also do step by step training
-# simply look at xgboost.py"s implementation of train
-bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
-
-###
-# advanced: start from a initial base prediction
-#
-print ("start running example to start from a initial prediction")
-# specify parameters via map, definition are same as c++ version
-param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
-# train xgboost for 1 round
-bst <- xgb.train( param, dtrain, 1, watchlist )
-# Note: we need the margin value instead of transformed prediction in set_base_margin
-# do predict with output_margin=True, will always give you margin values before logistic transformation
-ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE)
-ptest <- xgb.predict(bst, dtest, outputmargin=TRUE)
-succ <- xgb.setinfo(dtrain, "base_margin", ptrain)
-succ <- xgb.setinfo(dtest, "base_margin", ptest)
-print ("this is result of running from initial prediction")
-bst <- xgb.train( param, dtrain, 1, watchlist )
--- a/R-package/inst/examples/demo.R
+++ b/R-package/inst/examples/demo.R
@@ -24,45 +24,104 @@ read.libsvm <- function(fname, maxcol) {
  return(list(label = label, data = mat))
 }

-# Parameter setting
+############################ Test xgb.DMatrix with local file, sparse matrix and dense matrix in R.
+
+# Directly read in local file
 dtrain <- xgb.DMatrix("agaricus.txt.train")
-dtest <- xgb.DMatrix("agaricus.txt.test")
-param <- list(`bst:max_depth` = 2, `bst:eta` = 1, silent = 1, objective = "binary:logistic")
-watchlist <- list(eval = dtest, train = dtrain)
-
-########################### Train from local file
-
-# Training
-bst <- xgboost(file = "agaricus.txt.train", params = param, watchlist = watchlist)
-# Prediction
-pred <- predict(bst, "agaricus.txt.test")
-# Performance
-labels <- xgb.getinfo(dtest, "label")
-err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels)
-print(paste("error=", err))
-
-########################### Train from R object
+class(dtrain)

+# read file in R
 csc <- read.libsvm("agaricus.txt.train", 126)
 y <- csc$label
 x <- csc$data
+
 # x as Sparse Matrix
 class(x)
+dtrain <- xgb.DMatrix(x, label = y)

-# Training
-bst <- xgboost(x, y, params = param, watchlist = watchlist)
-# Prediction
+# x as dense matrix
+dense.x <- as.matrix(x)
+dtrain <- xgb.DMatrix(dense.x, label = y)
+
+############################ Test xgboost with local file, sparse matrix and dense matrix in R.
+
+# Test with DMatrix object
+bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
+               objective = "binary:logistic")
+
+# Verbose = 0,1,2
+bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
+               objective = "binary:logistic", verbose = 0)
+bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
+               objective = "binary:logistic", verbose = 1)
+bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
+               objective = "binary:logistic", verbose = 2)
+
+# Test with local file
+bst <- xgboost(data = "agaricus.txt.train", max_depth = 2, eta = 1,
+               objective = "binary:logistic")
+
+# Test with Sparse Matrix
+bst <- xgboost(data = x, label = y, max_depth = 2, eta = 1, 
+               objective = "binary:logistic")
+
+# Test with dense Matrix
+bst <- xgboost(data = dense.x, label = y, max_depth = 2, eta = 1, 
+               objective = "binary:logistic")
+
+
+############################ Test predict
+
+# Prediction with DMatrix object
+dtest <- xgb.DMatrix("agaricus.txt.test")
+pred <- predict(bst, dtest)
+
+# Prediction with local test file
 pred <- predict(bst, "agaricus.txt.test")
-# Performance
+
+# Prediction with Sparse Matrix
+csc <- read.libsvm("agaricus.txt.test", 126)
+test.y <- csc$label
+test.x <- csc$data
+pred <- predict(bst, test.x)
+
+# Extrac label with xgb.getinfo
 labels <- xgb.getinfo(dtest, "label")
 err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels)
 print(paste("error=", err))

-# Training with dense matrix
-x <- as.matrix(x)
-bst <- xgboost(x, y, params = param, watchlist = watchlist)
+############################ Save and load model to hard disk

-########################### Train with customization
+# save model to binary local file
+xgb.save(bst, "model.save")
+
+# load binary model to R
+bst <- xgb.load("model.save")
+pred <- predict(bst, test.x)
+
+# save model to text file
+xgb.dump(bst, "model.dump")
+
+# save a DMatrix object to hard disk
+xgb.DMatrix.save(dtrain, "dtrain.save")
+
+# load a DMatrix object to R
+dtrain <- xgb.DMatrix("dtrain.save")
+
+############################ More flexible training function xgb.train
+
+param <- list(max_depth = 2, eta = 1, silent = 1, objective = "binary:logistic")
+watchlist <- list(eval = dtest, train = dtrain)
+
+# training xgboost model
+bst <- xgb.train(param, dtrain, nround = 2, watchlist = watchlist)
+
+############################ cutomsized loss function
+
+param <- list(max_depth = 2, eta = 1, silent = 1)
+
+# note: for customized objective function, we leave objective as default note: what we are getting is
+# margin value in prediction you must know what you are doing

 # user define objective function, given prediction, return gradient and second order gradient this is
 # loglikelihood loss
@@ -85,10 +144,8 @@ evalerror <- function(preds, dtrain) {
  return(list(metric = "error", value = err))
 }

-bst <- xgboost(x, y, params = param, watchlist = watchlist, obj = logregobj, feval = evalerror)
+# training with customized objective, we can also do step by step training simply look at xgboost.py's
+# implementation of train
+bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror)

-############################ Train with previous result
-
-bst <- xgboost(x, y, params = param, watchlist = watchlist)
-pred <- predict(bst, "agaricus.txt.train", outputmargin = TRUE)
-bst2 <- xgboost(x, y, params = param, watchlist = watchlist, margin = pred) 
+