From 262108cf3b6b23a7bd4f0666aacd31a8f0c95ccc Mon Sep 17 00:00:00 2001 From: hetong Date: Tue, 26 Aug 2014 10:02:13 -0700 Subject: [PATCH] modify demo filenames --- R-package/inst/examples/demo-Rinterface.R | 102 ------------- R-package/inst/examples/demo-old.R | 127 ++++++++++++++++ R-package/inst/examples/demo.R | 169 +++++++++------------- 3 files changed, 199 insertions(+), 199 deletions(-) delete mode 100644 R-package/inst/examples/demo-Rinterface.R create mode 100644 R-package/inst/examples/demo-old.R diff --git a/R-package/inst/examples/demo-Rinterface.R b/R-package/inst/examples/demo-Rinterface.R deleted file mode 100644 index 8b85c4cc0..000000000 --- a/R-package/inst/examples/demo-Rinterface.R +++ /dev/null @@ -1,102 +0,0 @@ -require(xgboost) - -# helper function to read libsvm format -# this is very badly written, load in dense, and convert to sparse -# use this only for demo purpose -# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r -read.libsvm = function(fname, maxcol) { - content = readLines(fname) - nline = length(content) - label = numeric(nline) - mat = matrix(0, nline, maxcol+1) - for (i in 1:nline) { - arr = as.vector(strsplit(content[i], " ")[[1]]) - label[i] = as.numeric(arr[[1]]) - for (j in 2:length(arr)) { - kv = strsplit(arr[j], ":")[[1]] - # to avoid 0 index - findex = as.integer(kv[1]) + 1 - fvalue = as.numeric(kv[2]) - mat[i,findex] = fvalue - } - } - mat = as(mat, "sparseMatrix") - return(list(label=label, data=mat)) -} - -# Parameter setting -dtrain <- xgb.DMatrix("agaricus.txt.train") -dtest <- xgb.DMatrix("agaricus.txt.test") -param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic") -watchlist = list("eval"=dtest,"train"=dtrain) - -########################### -# Train from local file -########################### - -# Training -bst = xgboost(file='agaricus.txt.train',params=param,watchlist=watchlist) -# Prediction -pred = predict(bst, 'agaricus.txt.test') -# Performance -labels = xgb.getinfo(dtest, "label") -err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels) -print(paste("error=",err)) - -########################### -# Train from R object -########################### - -csc = read.libsvm("agaricus.txt.train", 126) -y = csc$label -x = csc$data -# x as Sparse Matrix -class(x) - -# Training -bst = xgboost(x,y,params=param,watchlist=watchlist) -# Prediction -pred = predict(bst, 'agaricus.txt.test') -# Performance -labels = xgb.getinfo(dtest, "label") -err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels) -print(paste("error=",err)) - -# Training with dense matrix -x = as.matrix(x) -bst = xgboost(x,y,params=param,watchlist=watchlist) - -########################### -# Train with customization -########################### - -# user define objective function, given prediction, return gradient and second order gradient -# this is loglikelihood loss -logregobj = function(preds, dtrain) { - labels = xgb.getinfo(dtrain, "label") - preds = 1.0 / (1.0 + exp(-preds)) - grad = preds - labels - hess = preds * (1.0-preds) - return(list(grad=grad, hess=hess)) -} -# user defined evaluation function, return a list(metric="metric-name", value="metric-value") -# NOTE: when you do customized loss function, the default prediction value is margin -# this may make buildin evalution metric not function properly -# for example, we are doing logistic loss, the prediction is score before logistic transformation -# the buildin evaluation error assumes input is after logistic transformation -# Take this in mind when you use the customization, and maybe you need write customized evaluation function -evalerror = function(preds, dtrain) { - labels = xgb.getinfo(dtrain, "label") - err = as.numeric(sum(labels != (preds > 0.0))) / length(labels) - return(list(metric="error", value=err)) -} - -bst = xgboost(x,y,params=param,watchlist=watchlist,obj=logregobj, feval=evalerror) - -############################ -# Train with previous result -############################ - -bst = xgboost(x,y,params=param,watchlist=watchlist) -pred = predict(bst, 'agaricus.txt.train', outputmargin=TRUE) -bst2 = xgboost(x,y,params=param,watchlist=watchlist,margin=pred) diff --git a/R-package/inst/examples/demo-old.R b/R-package/inst/examples/demo-old.R new file mode 100644 index 000000000..6332ba37d --- /dev/null +++ b/R-package/inst/examples/demo-old.R @@ -0,0 +1,127 @@ +# load xgboost library +require(xgboost) +require(methods) + +# helper function to read libsvm format +# this is very badly written, load in dense, and convert to sparse +# use this only for demo purpose +# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r +read.libsvm <- function(fname, maxcol) { + content <- readLines(fname) + nline <- length(content) + label <- numeric(nline) + mat <- matrix(0, nline, maxcol+1) + for (i in 1:nline) { + arr <- as.vector(strsplit(content[i], " ")[[1]]) + label[i] <- as.numeric(arr[[1]]) + for (j in 2:length(arr)) { + kv <- strsplit(arr[j], ":")[[1]] + # to avoid 0 index + findex <- as.integer(kv[1]) + 1 + fvalue <- as.numeric(kv[2]) + mat[i,findex] <- fvalue + } + } + mat <- as(mat, "sparseMatrix") + return(list(label=label, data=mat)) +} + +# test code here +dtrain <- xgb.DMatrix("agaricus.txt.train") +dtest <- xgb.DMatrix("agaricus.txt.test") +param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic") +watchlist <- list("eval"=dtest,"train"=dtrain) +# training xgboost model +bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist) +# make prediction +preds <- xgb.predict(bst, dtest) +labels <- xgb.getinfo(dtest, "label") +err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels) +# print error rate +print(paste("error=",err)) + +# dump model +xgb.dump(bst, "dump.raw.txt") +# dump model with feature map +xgb.dump(bst, "dump.nice.txt", "featmap.txt") + +# save dmatrix into binary buffer +succ <- xgb.save(dtest, "dtest.buffer") +# save model into file +succ <- xgb.save(bst, "xgb.model") +# load model and data in +bst2 <- xgb.Booster(modelfile="xgb.model") +dtest2 <- xgb.DMatrix("dtest.buffer") +preds2 <- xgb.predict(bst2, dtest2) +# assert they are the same +stopifnot(sum(abs(preds2-preds)) == 0) + +### +# build dmatrix from sparseMatrix +### +print ('start running example of build DMatrix from R.sparseMatrix') +csc <- read.libsvm("agaricus.txt.train", 126) +label <- csc$label +data <- csc$data +dtrain <- xgb.DMatrix(data, info=list(label=label) ) +watchlist <- list("eval"=dtest,"train"=dtrain) +bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist) + +### +# build dmatrix from dense matrix +### +print ('start running example of build DMatrix from R.Matrix') +mat = as.matrix(data) +dtrain <- xgb.DMatrix(mat, info=list(label=label) ) +watchlist <- list("eval"=dtest,"train"=dtrain) +bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist) + +### +# advanced: cutomsized loss function +# +print("start running example to used cutomized objective function") +# note: for customized objective function, we leave objective as default +# note: what we are getting is margin value in prediction +# you must know what you are doing +param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1) +# user define objective function, given prediction, return gradient and second order gradient +# this is loglikelihood loss +logregobj <- function(preds, dtrain) { + labels <- xgb.getinfo(dtrain, "label") + preds <- 1.0 / (1.0 + exp(-preds)) + grad <- preds - labels + hess <- preds * (1.0-preds) + return(list(grad=grad, hess=hess)) +} +# user defined evaluation function, return a list(metric="metric-name", value="metric-value") +# NOTE: when you do customized loss function, the default prediction value is margin +# this may make buildin evalution metric not function properly +# for example, we are doing logistic loss, the prediction is score before logistic transformation +# the buildin evaluation error assumes input is after logistic transformation +# Take this in mind when you use the customization, and maybe you need write customized evaluation function +evalerror <- function(preds, dtrain) { + labels <- xgb.getinfo(dtrain, "label") + err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels) + return(list(metric="error", value=err)) +} + +# training with customized objective, we can also do step by step training +# simply look at xgboost.py"s implementation of train +bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror) + +### +# advanced: start from a initial base prediction +# +print ("start running example to start from a initial prediction") +# specify parameters via map, definition are same as c++ version +param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic") +# train xgboost for 1 round +bst <- xgb.train( param, dtrain, 1, watchlist ) +# Note: we need the margin value instead of transformed prediction in set_base_margin +# do predict with output_margin=True, will always give you margin values before logistic transformation +ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE) +ptest <- xgb.predict(bst, dtest, outputmargin=TRUE) +succ <- xgb.setinfo(dtrain, "base_margin", ptrain) +succ <- xgb.setinfo(dtest, "base_margin", ptest) +print ("this is result of running from initial prediction") +bst <- xgb.train( param, dtrain, 1, watchlist ) diff --git a/R-package/inst/examples/demo.R b/R-package/inst/examples/demo.R index 6332ba37d..8b85c4cc0 100644 --- a/R-package/inst/examples/demo.R +++ b/R-package/inst/examples/demo.R @@ -1,97 +1,83 @@ -# load xgboost library require(xgboost) -require(methods) # helper function to read libsvm format # this is very badly written, load in dense, and convert to sparse # use this only for demo purpose # adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r -read.libsvm <- function(fname, maxcol) { - content <- readLines(fname) - nline <- length(content) - label <- numeric(nline) - mat <- matrix(0, nline, maxcol+1) - for (i in 1:nline) { - arr <- as.vector(strsplit(content[i], " ")[[1]]) - label[i] <- as.numeric(arr[[1]]) - for (j in 2:length(arr)) { - kv <- strsplit(arr[j], ":")[[1]] - # to avoid 0 index - findex <- as.integer(kv[1]) + 1 - fvalue <- as.numeric(kv[2]) - mat[i,findex] <- fvalue +read.libsvm = function(fname, maxcol) { + content = readLines(fname) + nline = length(content) + label = numeric(nline) + mat = matrix(0, nline, maxcol+1) + for (i in 1:nline) { + arr = as.vector(strsplit(content[i], " ")[[1]]) + label[i] = as.numeric(arr[[1]]) + for (j in 2:length(arr)) { + kv = strsplit(arr[j], ":")[[1]] + # to avoid 0 index + findex = as.integer(kv[1]) + 1 + fvalue = as.numeric(kv[2]) + mat[i,findex] = fvalue + } } - } - mat <- as(mat, "sparseMatrix") - return(list(label=label, data=mat)) + mat = as(mat, "sparseMatrix") + return(list(label=label, data=mat)) } -# test code here +# Parameter setting dtrain <- xgb.DMatrix("agaricus.txt.train") dtest <- xgb.DMatrix("agaricus.txt.test") param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic") -watchlist <- list("eval"=dtest,"train"=dtrain) -# training xgboost model -bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist) -# make prediction -preds <- xgb.predict(bst, dtest) -labels <- xgb.getinfo(dtest, "label") -err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels) -# print error rate +watchlist = list("eval"=dtest,"train"=dtrain) + +########################### +# Train from local file +########################### + +# Training +bst = xgboost(file='agaricus.txt.train',params=param,watchlist=watchlist) +# Prediction +pred = predict(bst, 'agaricus.txt.test') +# Performance +labels = xgb.getinfo(dtest, "label") +err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels) print(paste("error=",err)) -# dump model -xgb.dump(bst, "dump.raw.txt") -# dump model with feature map -xgb.dump(bst, "dump.nice.txt", "featmap.txt") +########################### +# Train from R object +########################### -# save dmatrix into binary buffer -succ <- xgb.save(dtest, "dtest.buffer") -# save model into file -succ <- xgb.save(bst, "xgb.model") -# load model and data in -bst2 <- xgb.Booster(modelfile="xgb.model") -dtest2 <- xgb.DMatrix("dtest.buffer") -preds2 <- xgb.predict(bst2, dtest2) -# assert they are the same -stopifnot(sum(abs(preds2-preds)) == 0) +csc = read.libsvm("agaricus.txt.train", 126) +y = csc$label +x = csc$data +# x as Sparse Matrix +class(x) -### -# build dmatrix from sparseMatrix -### -print ('start running example of build DMatrix from R.sparseMatrix') -csc <- read.libsvm("agaricus.txt.train", 126) -label <- csc$label -data <- csc$data -dtrain <- xgb.DMatrix(data, info=list(label=label) ) -watchlist <- list("eval"=dtest,"train"=dtrain) -bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist) +# Training +bst = xgboost(x,y,params=param,watchlist=watchlist) +# Prediction +pred = predict(bst, 'agaricus.txt.test') +# Performance +labels = xgb.getinfo(dtest, "label") +err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels) +print(paste("error=",err)) -### -# build dmatrix from dense matrix -### -print ('start running example of build DMatrix from R.Matrix') -mat = as.matrix(data) -dtrain <- xgb.DMatrix(mat, info=list(label=label) ) -watchlist <- list("eval"=dtest,"train"=dtrain) -bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist) +# Training with dense matrix +x = as.matrix(x) +bst = xgboost(x,y,params=param,watchlist=watchlist) + +########################### +# Train with customization +########################### -### -# advanced: cutomsized loss function -# -print("start running example to used cutomized objective function") -# note: for customized objective function, we leave objective as default -# note: what we are getting is margin value in prediction -# you must know what you are doing -param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1) # user define objective function, given prediction, return gradient and second order gradient # this is loglikelihood loss -logregobj <- function(preds, dtrain) { - labels <- xgb.getinfo(dtrain, "label") - preds <- 1.0 / (1.0 + exp(-preds)) - grad <- preds - labels - hess <- preds * (1.0-preds) - return(list(grad=grad, hess=hess)) +logregobj = function(preds, dtrain) { + labels = xgb.getinfo(dtrain, "label") + preds = 1.0 / (1.0 + exp(-preds)) + grad = preds - labels + hess = preds * (1.0-preds) + return(list(grad=grad, hess=hess)) } # user defined evaluation function, return a list(metric="metric-name", value="metric-value") # NOTE: when you do customized loss function, the default prediction value is margin @@ -99,29 +85,18 @@ logregobj <- function(preds, dtrain) { # for example, we are doing logistic loss, the prediction is score before logistic transformation # the buildin evaluation error assumes input is after logistic transformation # Take this in mind when you use the customization, and maybe you need write customized evaluation function -evalerror <- function(preds, dtrain) { - labels <- xgb.getinfo(dtrain, "label") - err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels) - return(list(metric="error", value=err)) +evalerror = function(preds, dtrain) { + labels = xgb.getinfo(dtrain, "label") + err = as.numeric(sum(labels != (preds > 0.0))) / length(labels) + return(list(metric="error", value=err)) } -# training with customized objective, we can also do step by step training -# simply look at xgboost.py"s implementation of train -bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror) +bst = xgboost(x,y,params=param,watchlist=watchlist,obj=logregobj, feval=evalerror) -### -# advanced: start from a initial base prediction -# -print ("start running example to start from a initial prediction") -# specify parameters via map, definition are same as c++ version -param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic") -# train xgboost for 1 round -bst <- xgb.train( param, dtrain, 1, watchlist ) -# Note: we need the margin value instead of transformed prediction in set_base_margin -# do predict with output_margin=True, will always give you margin values before logistic transformation -ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE) -ptest <- xgb.predict(bst, dtest, outputmargin=TRUE) -succ <- xgb.setinfo(dtrain, "base_margin", ptrain) -succ <- xgb.setinfo(dtest, "base_margin", ptest) -print ("this is result of running from initial prediction") -bst <- xgb.train( param, dtrain, 1, watchlist ) +############################ +# Train with previous result +############################ + +bst = xgboost(x,y,params=param,watchlist=watchlist) +pred = predict(bst, 'agaricus.txt.train', outputmargin=TRUE) +bst2 = xgboost(x,y,params=param,watchlist=watchlist,margin=pred)