tidy code by formatR

2014-08-27 13:15:28 -07:00 · 2014-08-27 13:15:28 -07:00 · 247e0d5d78
commit 247e0d5d78
parent 4dcc7d7303
2 changed files with 152 additions and 171 deletions
--- a/R-package/inst/examples/demo-new.R
+++ b/R-package/inst/examples/demo-new.R
@ -1,161 +1,151 @@
 require(xgboost)
 require(methods)

-# helper function to read libsvm format
-# this is very badly written, load in dense, and convert to sparse
-# use this only for demo purpose
-# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
-read.libsvm = function(fname, maxcol) {
-    content = readLines(fname)
-    nline = length(content)
-    label = numeric(nline)
-    mat = matrix(0, nline, maxcol+1)
-    for (i in 1:nline) {
-        arr = as.vector(strsplit(content[i], " ")[[1]])
-        label[i] = as.numeric(arr[[1]])
-        for (j in 2:length(arr)) {
-            kv = strsplit(arr[j], ":")[[1]]
-            # to avoid 0 index
-            findex = as.integer(kv[1]) + 1
-            fvalue = as.numeric(kv[2])
-            mat[i,findex] = fvalue
-        }
+# helper function to read libsvm format this is very badly written, load in dense, and convert to sparse
+# use this only for demo purpose adopted from
+# https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
+read.libsvm <- function(fname, maxcol) {
+  content <- readLines(fname)
+  nline <- length(content)
+  label <- numeric(nline)
+  mat <- matrix(0, nline, maxcol + 1)
+  for (i in 1:nline) {
+    arr <- as.vector(strsplit(content[i], " ")[[1]])
+    label[i] <- as.numeric(arr[[1]])
+    for (j in 2:length(arr)) {
+      kv <- strsplit(arr[j], ":")[[1]]
+      # to avoid 0 index
+      findex <- as.integer(kv[1]) + 1
+      fvalue <- as.numeric(kv[2])
+      mat[i, findex] <- fvalue
    }
-    mat = as(mat, "sparseMatrix")
-    return(list(label=label, data=mat))
+  }
+  mat <- as(mat, "sparseMatrix")
+  return(list(label = label, data = mat))
 }

-############################
-# Test xgb.DMatrix with local file, sparse matrix and dense matrix in R.
-############################
+############################ Test xgb.DMatrix with local file, sparse matrix and dense matrix in R.

 # Directly read in local file
-dtrain = xgb.DMatrix('agaricus.txt.train')
+dtrain <- xgb.DMatrix("agaricus.txt.train")
 class(dtrain)

 # read file in R
-csc = read.libsvm("agaricus.txt.train", 126)
-y = csc$label
-x = csc$data
+csc <- read.libsvm("agaricus.txt.train", 126)
+y <- csc$label
+x <- csc$data

 # x as Sparse Matrix
 class(x)
-dtrain = xgb.DMatrix(x, label=y)
+dtrain <- xgb.DMatrix(x, label = y)

 # x as dense matrix
-dense.x = as.matrix(x)
-dtrain = xgb.DMatrix(dense.x, label=y)
+dense.x <- as.matrix(x)
+dtrain <- xgb.DMatrix(dense.x, label = y)

-############################
-# Test xgboost with local file, sparse matrix and dense matrix in R.
-############################
+############################ Test xgboost with local file, sparse matrix and dense matrix in R.

 # Test with DMatrix object
-bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic')
+bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
+               objective = "binary:logistic")

 # Verbose = 0,1,2
-bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic', 
-              verbose = 0)
-bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic', 
-              verbose = 1)
-bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic', 
-              verbose = 2)
+bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
+               objective = "binary:logistic", verbose = 0)
+bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
+               objective = "binary:logistic", verbose = 1)
+bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, 
+               objective = "binary:logistic", verbose = 2)

 # Test with local file
-bst = xgboost(data='agaricus.txt.train', max_depth=2, eta=1, objective='binary:logistic')
+bst <- xgboost(data = "agaricus.txt.train", max_depth = 2, eta = 1,
+               objective = "binary:logistic")

 # Test with Sparse Matrix
-bst = xgboost(data = x, label = y, max_depth=2, eta=1, objective='binary:logistic')
+bst <- xgboost(data = x, label = y, max_depth = 2, eta = 1, 
+               objective = "binary:logistic")

 # Test with dense Matrix
-bst = xgboost(data = dense.x, label = y, max_depth=2, eta=1, objective='binary:logistic')
+bst <- xgboost(data = dense.x, label = y, max_depth = 2, eta = 1, 
+               objective = "binary:logistic")


-############################
-# Test predict
-############################
+############################ Test predict

 # Prediction with DMatrix object
-dtest = xgb.DMatrix('agaricus.txt.test')
-pred = predict(bst, dtest)
+dtest <- xgb.DMatrix("agaricus.txt.test")
+pred <- predict(bst, dtest)

 # Prediction with local test file
-pred = predict(bst, 'agaricus.txt.test')
+pred <- predict(bst, "agaricus.txt.test")

 # Prediction with Sparse Matrix
-csc = read.libsvm("agaricus.txt.test", 126)
-test.y = csc$label
-test.x = csc$data
-pred = predict(bst, test.x)
+csc <- read.libsvm("agaricus.txt.test", 126)
+test.y <- csc$label
+test.x <- csc$data
+pred <- predict(bst, test.x)

 # Extrac label with xgb.getinfo
-labels = xgb.getinfo(dtest, "label")
-err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
-print(paste("error=",err))
+labels <- xgb.getinfo(dtest, "label")
+err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels)
+print(paste("error=", err))

-############################
-# Save and load model to hard disk
-############################
+############################ Save and load model to hard disk

 # save model to binary local file
-xgb.save(bst, 'model.save')
+xgb.save(bst, "model.save")

 # load binary model to R
-bst = xgb.load('model.save')
-pred = predict(bst, test.x)
+bst <- xgb.load("model.save")
+pred <- predict(bst, test.x)

 # save model to text file
-xgb.dump(bst, 'model.dump')
+xgb.dump(bst, "model.dump")

 # save a DMatrix object to hard disk
-xgb.DMatrix.save(dtrain,'dtrain.save')
+xgb.DMatrix.save(dtrain, "dtrain.save")

 # load a DMatrix object to R
-dtrain = xgb.DMatrix('dtrain.save')
+dtrain <- xgb.DMatrix("dtrain.save")

-############################
-# More flexible training function xgb.train
-############################
+############################ More flexible training function xgb.train

-param = list(max_depth=2, eta=1, silent = 1, objective="binary:logistic")
-watchlist <- list("eval"=dtest,"train"=dtrain)
+param <- list(max_depth = 2, eta = 1, silent = 1, objective = "binary:logistic")
+watchlist <- list(eval = dtest, train = dtrain)

 # training xgboost model
-bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
+bst <- xgb.train(param, dtrain, nround = 2, watchlist = watchlist)

-############################
-# cutomsized loss function
-############################
+############################ cutomsized loss function

-param <- list(max_depth = 2, eta = 1, silent =1)
+param <- list(max_depth = 2, eta = 1, silent = 1)

-# note: for customized objective function, we leave objective as default
-# note: what we are getting is margin value in prediction
-# you must know what you are doing
+# note: for customized objective function, we leave objective as default note: what we are getting is
+# margin value in prediction you must know what you are doing

-# user define objective function, given prediction, return gradient and second order gradient
-# this is loglikelihood loss
+# user define objective function, given prediction, return gradient and second order gradient this is
+# loglikelihood loss
 logregobj <- function(preds, dtrain) {
-    labels <- xgb.getinfo(dtrain, "label")
-    preds <- 1.0 / (1.0 + exp(-preds))
-    grad <- preds - labels
-    hess <- preds * (1.0-preds)
-    return(list(grad=grad, hess=hess))
+  labels <- xgb.getinfo(dtrain, "label")
+  preds <- 1/(1 + exp(-preds))
+  grad <- preds - labels
+  hess <- preds * (1 - preds)
+  return(list(grad = grad, hess = hess))
 }
-# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
-# NOTE: when you do customized loss function, the default prediction value is margin
-# this may make buildin evalution metric not function properly
-# for example, we are doing logistic loss, the prediction is score before logistic transformation
-# the buildin evaluation error assumes input is after logistic transformation
-# Take this in mind when you use the customization, and maybe you need write customized evaluation function
+# user defined evaluation function, return a list(metric='metric-name', value='metric-value') NOTE: when
+# you do customized loss function, the default prediction value is margin this may make buildin
+# evalution metric not function properly for example, we are doing logistic loss, the prediction is
+# score before logistic transformation the buildin evaluation error assumes input is after logistic
+# transformation Take this in mind when you use the customization, and maybe you need write customized
+# evaluation function
 evalerror <- function(preds, dtrain) {
-    labels <- xgb.getinfo(dtrain, "label")
-    err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
-    return(list(metric="error", value=err))
+  labels <- xgb.getinfo(dtrain, "label")
+  err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
+  return(list(metric = "error", value = err))
 }

-# training with customized objective, we can also do step by step training
-# simply look at xgboost.py"s implementation of train
-bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
-
+# training with customized objective, we can also do step by step training simply look at xgboost.py's
+# implementation of train
+bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror)

+ 
--- a/R-package/inst/examples/demo.R
+++ b/R-package/inst/examples/demo.R
@ -1,103 +1,94 @@
 require(xgboost)
 require(methods)

-# helper function to read libsvm format
-# this is very badly written, load in dense, and convert to sparse
-# use this only for demo purpose
-# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
-read.libsvm = function(fname, maxcol) {
-    content = readLines(fname)
-    nline = length(content)
-    label = numeric(nline)
-    mat = matrix(0, nline, maxcol+1)
-    for (i in 1:nline) {
-        arr = as.vector(strsplit(content[i], " ")[[1]])
-        label[i] = as.numeric(arr[[1]])
-        for (j in 2:length(arr)) {
-            kv = strsplit(arr[j], ":")[[1]]
-            # to avoid 0 index
-            findex = as.integer(kv[1]) + 1
-            fvalue = as.numeric(kv[2])
-            mat[i,findex] = fvalue
-        }
+# helper function to read libsvm format this is very badly written, load in dense, and convert to sparse
+# use this only for demo purpose adopted from
+# https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
+read.libsvm <- function(fname, maxcol) {
+  content <- readLines(fname)
+  nline <- length(content)
+  label <- numeric(nline)
+  mat <- matrix(0, nline, maxcol + 1)
+  for (i in 1:nline) {
+    arr <- as.vector(strsplit(content[i], " ")[[1]])
+    label[i] <- as.numeric(arr[[1]])
+    for (j in 2:length(arr)) {
+      kv <- strsplit(arr[j], ":")[[1]]
+      # to avoid 0 index
+      findex <- as.integer(kv[1]) + 1
+      fvalue <- as.numeric(kv[2])
+      mat[i, findex] <- fvalue
    }
-    mat = as(mat, "sparseMatrix")
-    return(list(label=label, data=mat))
+  }
+  mat <- as(mat, "sparseMatrix")
+  return(list(label = label, data = mat))
 }

 # Parameter setting
 dtrain <- xgb.DMatrix("agaricus.txt.train")
 dtest <- xgb.DMatrix("agaricus.txt.test")
-param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
-watchlist = list("eval"=dtest,"train"=dtrain)
+param <- list(`bst:max_depth` = 2, `bst:eta` = 1, silent = 1, objective = "binary:logistic")
+watchlist <- list(eval = dtest, train = dtrain)

-###########################
-# Train from local file
-###########################
+########################### Train from local file

 # Training
-bst = xgboost(file='agaricus.txt.train',params=param,watchlist=watchlist)
+bst <- xgboost(file = "agaricus.txt.train", params = param, watchlist = watchlist)
 # Prediction
-pred = predict(bst, 'agaricus.txt.test')
+pred <- predict(bst, "agaricus.txt.test")
 # Performance
-labels = xgb.getinfo(dtest, "label")
-err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
-print(paste("error=",err))
+labels <- xgb.getinfo(dtest, "label")
+err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels)
+print(paste("error=", err))

-###########################
-# Train from R object
-###########################
+########################### Train from R object

-csc = read.libsvm("agaricus.txt.train", 126)
-y = csc$label
-x = csc$data
+csc <- read.libsvm("agaricus.txt.train", 126)
+y <- csc$label
+x <- csc$data
 # x as Sparse Matrix
 class(x)

 # Training
-bst = xgboost(x,y,params=param,watchlist=watchlist)
+bst <- xgboost(x, y, params = param, watchlist = watchlist)
 # Prediction
-pred = predict(bst, 'agaricus.txt.test')
+pred <- predict(bst, "agaricus.txt.test")
 # Performance
-labels = xgb.getinfo(dtest, "label")
-err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
-print(paste("error=",err))
+labels <- xgb.getinfo(dtest, "label")
+err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels)
+print(paste("error=", err))

 # Training with dense matrix
-x = as.matrix(x)
-bst = xgboost(x,y,params=param,watchlist=watchlist)
+x <- as.matrix(x)
+bst <- xgboost(x, y, params = param, watchlist = watchlist)

-###########################
-# Train with customization
-###########################
+########################### Train with customization

-# user define objective function, given prediction, return gradient and second order gradient
-# this is loglikelihood loss
-logregobj = function(preds, dtrain) {
-    labels = xgb.getinfo(dtrain, "label")
-    preds = 1.0 / (1.0 + exp(-preds))
-    grad = preds - labels
-    hess = preds * (1.0-preds)
-    return(list(grad=grad, hess=hess))
+# user define objective function, given prediction, return gradient and second order gradient this is
+# loglikelihood loss
+logregobj <- function(preds, dtrain) {
+  labels <- xgb.getinfo(dtrain, "label")
+  preds <- 1/(1 + exp(-preds))
+  grad <- preds - labels
+  hess <- preds * (1 - preds)
+  return(list(grad = grad, hess = hess))
 }
-# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
-# NOTE: when you do customized loss function, the default prediction value is margin
-# this may make buildin evalution metric not function properly
-# for example, we are doing logistic loss, the prediction is score before logistic transformation
-# the buildin evaluation error assumes input is after logistic transformation
-# Take this in mind when you use the customization, and maybe you need write customized evaluation function
-evalerror = function(preds, dtrain) {
-    labels = xgb.getinfo(dtrain, "label")
-    err = as.numeric(sum(labels != (preds > 0.0))) / length(labels)
-    return(list(metric="error", value=err))
+# user defined evaluation function, return a list(metric='metric-name', value='metric-value') NOTE: when
+# you do customized loss function, the default prediction value is margin this may make buildin
+# evalution metric not function properly for example, we are doing logistic loss, the prediction is
+# score before logistic transformation the buildin evaluation error assumes input is after logistic
+# transformation Take this in mind when you use the customization, and maybe you need write customized
+# evaluation function
+evalerror <- function(preds, dtrain) {
+  labels <- xgb.getinfo(dtrain, "label")
+  err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
+  return(list(metric = "error", value = err))
 }

-bst = xgboost(x,y,params=param,watchlist=watchlist,obj=logregobj, feval=evalerror)
+bst <- xgboost(x, y, params = param, watchlist = watchlist, obj = logregobj, feval = evalerror)

-############################
-# Train with previous result
-############################
+############################ Train with previous result

-bst = xgboost(x,y,params=param,watchlist=watchlist)
-pred = predict(bst, 'agaricus.txt.train', outputmargin=TRUE)
-bst2 = xgboost(x,y,params=param,watchlist=watchlist,margin=pred)
+bst <- xgboost(x, y, params = param, watchlist = watchlist)
+pred <- predict(bst, "agaricus.txt.train", outputmargin = TRUE)
+bst2 <- xgboost(x, y, params = param, watchlist = watchlist, margin = pred)