From a060a2e9a68dfd487f76f16ced75c19456c2cd7b Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 27 Aug 2014 13:16:16 -0700 Subject: [PATCH] remove old R demo files --- R-package/inst/examples/demo-new.R | 151 ----------------------------- R-package/inst/examples/demo-old.R | 127 ------------------------ R-package/inst/examples/demo.R | 119 +++++++++++++++++------ 3 files changed, 88 insertions(+), 309 deletions(-) delete mode 100644 R-package/inst/examples/demo-new.R delete mode 100644 R-package/inst/examples/demo-old.R diff --git a/R-package/inst/examples/demo-new.R b/R-package/inst/examples/demo-new.R deleted file mode 100644 index c84bddd63..000000000 --- a/R-package/inst/examples/demo-new.R +++ /dev/null @@ -1,151 +0,0 @@ -require(xgboost) -require(methods) - -# helper function to read libsvm format this is very badly written, load in dense, and convert to sparse -# use this only for demo purpose adopted from -# https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r -read.libsvm <- function(fname, maxcol) { - content <- readLines(fname) - nline <- length(content) - label <- numeric(nline) - mat <- matrix(0, nline, maxcol + 1) - for (i in 1:nline) { - arr <- as.vector(strsplit(content[i], " ")[[1]]) - label[i] <- as.numeric(arr[[1]]) - for (j in 2:length(arr)) { - kv <- strsplit(arr[j], ":")[[1]] - # to avoid 0 index - findex <- as.integer(kv[1]) + 1 - fvalue <- as.numeric(kv[2]) - mat[i, findex] <- fvalue - } - } - mat <- as(mat, "sparseMatrix") - return(list(label = label, data = mat)) -} - -############################ Test xgb.DMatrix with local file, sparse matrix and dense matrix in R. - -# Directly read in local file -dtrain <- xgb.DMatrix("agaricus.txt.train") -class(dtrain) - -# read file in R -csc <- read.libsvm("agaricus.txt.train", 126) -y <- csc$label -x <- csc$data - -# x as Sparse Matrix -class(x) -dtrain <- xgb.DMatrix(x, label = y) - -# x as dense matrix -dense.x <- as.matrix(x) -dtrain <- xgb.DMatrix(dense.x, label = y) - -############################ Test xgboost with local file, sparse matrix and dense matrix in R. - -# Test with DMatrix object -bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, - objective = "binary:logistic") - -# Verbose = 0,1,2 -bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, - objective = "binary:logistic", verbose = 0) -bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, - objective = "binary:logistic", verbose = 1) -bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, - objective = "binary:logistic", verbose = 2) - -# Test with local file -bst <- xgboost(data = "agaricus.txt.train", max_depth = 2, eta = 1, - objective = "binary:logistic") - -# Test with Sparse Matrix -bst <- xgboost(data = x, label = y, max_depth = 2, eta = 1, - objective = "binary:logistic") - -# Test with dense Matrix -bst <- xgboost(data = dense.x, label = y, max_depth = 2, eta = 1, - objective = "binary:logistic") - - -############################ Test predict - -# Prediction with DMatrix object -dtest <- xgb.DMatrix("agaricus.txt.test") -pred <- predict(bst, dtest) - -# Prediction with local test file -pred <- predict(bst, "agaricus.txt.test") - -# Prediction with Sparse Matrix -csc <- read.libsvm("agaricus.txt.test", 126) -test.y <- csc$label -test.x <- csc$data -pred <- predict(bst, test.x) - -# Extrac label with xgb.getinfo -labels <- xgb.getinfo(dtest, "label") -err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels) -print(paste("error=", err)) - -############################ Save and load model to hard disk - -# save model to binary local file -xgb.save(bst, "model.save") - -# load binary model to R -bst <- xgb.load("model.save") -pred <- predict(bst, test.x) - -# save model to text file -xgb.dump(bst, "model.dump") - -# save a DMatrix object to hard disk -xgb.DMatrix.save(dtrain, "dtrain.save") - -# load a DMatrix object to R -dtrain <- xgb.DMatrix("dtrain.save") - -############################ More flexible training function xgb.train - -param <- list(max_depth = 2, eta = 1, silent = 1, objective = "binary:logistic") -watchlist <- list(eval = dtest, train = dtrain) - -# training xgboost model -bst <- xgb.train(param, dtrain, nround = 2, watchlist = watchlist) - -############################ cutomsized loss function - -param <- list(max_depth = 2, eta = 1, silent = 1) - -# note: for customized objective function, we leave objective as default note: what we are getting is -# margin value in prediction you must know what you are doing - -# user define objective function, given prediction, return gradient and second order gradient this is -# loglikelihood loss -logregobj <- function(preds, dtrain) { - labels <- xgb.getinfo(dtrain, "label") - preds <- 1/(1 + exp(-preds)) - grad <- preds - labels - hess <- preds * (1 - preds) - return(list(grad = grad, hess = hess)) -} -# user defined evaluation function, return a list(metric='metric-name', value='metric-value') NOTE: when -# you do customized loss function, the default prediction value is margin this may make buildin -# evalution metric not function properly for example, we are doing logistic loss, the prediction is -# score before logistic transformation the buildin evaluation error assumes input is after logistic -# transformation Take this in mind when you use the customization, and maybe you need write customized -# evaluation function -evalerror <- function(preds, dtrain) { - labels <- xgb.getinfo(dtrain, "label") - err <- as.numeric(sum(labels != (preds > 0)))/length(labels) - return(list(metric = "error", value = err)) -} - -# training with customized objective, we can also do step by step training simply look at xgboost.py's -# implementation of train -bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror) - - diff --git a/R-package/inst/examples/demo-old.R b/R-package/inst/examples/demo-old.R deleted file mode 100644 index 6332ba37d..000000000 --- a/R-package/inst/examples/demo-old.R +++ /dev/null @@ -1,127 +0,0 @@ -# load xgboost library -require(xgboost) -require(methods) - -# helper function to read libsvm format -# this is very badly written, load in dense, and convert to sparse -# use this only for demo purpose -# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r -read.libsvm <- function(fname, maxcol) { - content <- readLines(fname) - nline <- length(content) - label <- numeric(nline) - mat <- matrix(0, nline, maxcol+1) - for (i in 1:nline) { - arr <- as.vector(strsplit(content[i], " ")[[1]]) - label[i] <- as.numeric(arr[[1]]) - for (j in 2:length(arr)) { - kv <- strsplit(arr[j], ":")[[1]] - # to avoid 0 index - findex <- as.integer(kv[1]) + 1 - fvalue <- as.numeric(kv[2]) - mat[i,findex] <- fvalue - } - } - mat <- as(mat, "sparseMatrix") - return(list(label=label, data=mat)) -} - -# test code here -dtrain <- xgb.DMatrix("agaricus.txt.train") -dtest <- xgb.DMatrix("agaricus.txt.test") -param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic") -watchlist <- list("eval"=dtest,"train"=dtrain) -# training xgboost model -bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist) -# make prediction -preds <- xgb.predict(bst, dtest) -labels <- xgb.getinfo(dtest, "label") -err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels) -# print error rate -print(paste("error=",err)) - -# dump model -xgb.dump(bst, "dump.raw.txt") -# dump model with feature map -xgb.dump(bst, "dump.nice.txt", "featmap.txt") - -# save dmatrix into binary buffer -succ <- xgb.save(dtest, "dtest.buffer") -# save model into file -succ <- xgb.save(bst, "xgb.model") -# load model and data in -bst2 <- xgb.Booster(modelfile="xgb.model") -dtest2 <- xgb.DMatrix("dtest.buffer") -preds2 <- xgb.predict(bst2, dtest2) -# assert they are the same -stopifnot(sum(abs(preds2-preds)) == 0) - -### -# build dmatrix from sparseMatrix -### -print ('start running example of build DMatrix from R.sparseMatrix') -csc <- read.libsvm("agaricus.txt.train", 126) -label <- csc$label -data <- csc$data -dtrain <- xgb.DMatrix(data, info=list(label=label) ) -watchlist <- list("eval"=dtest,"train"=dtrain) -bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist) - -### -# build dmatrix from dense matrix -### -print ('start running example of build DMatrix from R.Matrix') -mat = as.matrix(data) -dtrain <- xgb.DMatrix(mat, info=list(label=label) ) -watchlist <- list("eval"=dtest,"train"=dtrain) -bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist) - -### -# advanced: cutomsized loss function -# -print("start running example to used cutomized objective function") -# note: for customized objective function, we leave objective as default -# note: what we are getting is margin value in prediction -# you must know what you are doing -param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1) -# user define objective function, given prediction, return gradient and second order gradient -# this is loglikelihood loss -logregobj <- function(preds, dtrain) { - labels <- xgb.getinfo(dtrain, "label") - preds <- 1.0 / (1.0 + exp(-preds)) - grad <- preds - labels - hess <- preds * (1.0-preds) - return(list(grad=grad, hess=hess)) -} -# user defined evaluation function, return a list(metric="metric-name", value="metric-value") -# NOTE: when you do customized loss function, the default prediction value is margin -# this may make buildin evalution metric not function properly -# for example, we are doing logistic loss, the prediction is score before logistic transformation -# the buildin evaluation error assumes input is after logistic transformation -# Take this in mind when you use the customization, and maybe you need write customized evaluation function -evalerror <- function(preds, dtrain) { - labels <- xgb.getinfo(dtrain, "label") - err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels) - return(list(metric="error", value=err)) -} - -# training with customized objective, we can also do step by step training -# simply look at xgboost.py"s implementation of train -bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror) - -### -# advanced: start from a initial base prediction -# -print ("start running example to start from a initial prediction") -# specify parameters via map, definition are same as c++ version -param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic") -# train xgboost for 1 round -bst <- xgb.train( param, dtrain, 1, watchlist ) -# Note: we need the margin value instead of transformed prediction in set_base_margin -# do predict with output_margin=True, will always give you margin values before logistic transformation -ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE) -ptest <- xgb.predict(bst, dtest, outputmargin=TRUE) -succ <- xgb.setinfo(dtrain, "base_margin", ptrain) -succ <- xgb.setinfo(dtest, "base_margin", ptest) -print ("this is result of running from initial prediction") -bst <- xgb.train( param, dtrain, 1, watchlist ) diff --git a/R-package/inst/examples/demo.R b/R-package/inst/examples/demo.R index 5424622aa..c84bddd63 100644 --- a/R-package/inst/examples/demo.R +++ b/R-package/inst/examples/demo.R @@ -24,45 +24,104 @@ read.libsvm <- function(fname, maxcol) { return(list(label = label, data = mat)) } -# Parameter setting +############################ Test xgb.DMatrix with local file, sparse matrix and dense matrix in R. + +# Directly read in local file dtrain <- xgb.DMatrix("agaricus.txt.train") -dtest <- xgb.DMatrix("agaricus.txt.test") -param <- list(`bst:max_depth` = 2, `bst:eta` = 1, silent = 1, objective = "binary:logistic") -watchlist <- list(eval = dtest, train = dtrain) - -########################### Train from local file - -# Training -bst <- xgboost(file = "agaricus.txt.train", params = param, watchlist = watchlist) -# Prediction -pred <- predict(bst, "agaricus.txt.test") -# Performance -labels <- xgb.getinfo(dtest, "label") -err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels) -print(paste("error=", err)) - -########################### Train from R object +class(dtrain) +# read file in R csc <- read.libsvm("agaricus.txt.train", 126) y <- csc$label x <- csc$data + # x as Sparse Matrix class(x) +dtrain <- xgb.DMatrix(x, label = y) -# Training -bst <- xgboost(x, y, params = param, watchlist = watchlist) -# Prediction +# x as dense matrix +dense.x <- as.matrix(x) +dtrain <- xgb.DMatrix(dense.x, label = y) + +############################ Test xgboost with local file, sparse matrix and dense matrix in R. + +# Test with DMatrix object +bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, + objective = "binary:logistic") + +# Verbose = 0,1,2 +bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, + objective = "binary:logistic", verbose = 0) +bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, + objective = "binary:logistic", verbose = 1) +bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, + objective = "binary:logistic", verbose = 2) + +# Test with local file +bst <- xgboost(data = "agaricus.txt.train", max_depth = 2, eta = 1, + objective = "binary:logistic") + +# Test with Sparse Matrix +bst <- xgboost(data = x, label = y, max_depth = 2, eta = 1, + objective = "binary:logistic") + +# Test with dense Matrix +bst <- xgboost(data = dense.x, label = y, max_depth = 2, eta = 1, + objective = "binary:logistic") + + +############################ Test predict + +# Prediction with DMatrix object +dtest <- xgb.DMatrix("agaricus.txt.test") +pred <- predict(bst, dtest) + +# Prediction with local test file pred <- predict(bst, "agaricus.txt.test") -# Performance + +# Prediction with Sparse Matrix +csc <- read.libsvm("agaricus.txt.test", 126) +test.y <- csc$label +test.x <- csc$data +pred <- predict(bst, test.x) + +# Extrac label with xgb.getinfo labels <- xgb.getinfo(dtest, "label") err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels) print(paste("error=", err)) -# Training with dense matrix -x <- as.matrix(x) -bst <- xgboost(x, y, params = param, watchlist = watchlist) +############################ Save and load model to hard disk -########################### Train with customization +# save model to binary local file +xgb.save(bst, "model.save") + +# load binary model to R +bst <- xgb.load("model.save") +pred <- predict(bst, test.x) + +# save model to text file +xgb.dump(bst, "model.dump") + +# save a DMatrix object to hard disk +xgb.DMatrix.save(dtrain, "dtrain.save") + +# load a DMatrix object to R +dtrain <- xgb.DMatrix("dtrain.save") + +############################ More flexible training function xgb.train + +param <- list(max_depth = 2, eta = 1, silent = 1, objective = "binary:logistic") +watchlist <- list(eval = dtest, train = dtrain) + +# training xgboost model +bst <- xgb.train(param, dtrain, nround = 2, watchlist = watchlist) + +############################ cutomsized loss function + +param <- list(max_depth = 2, eta = 1, silent = 1) + +# note: for customized objective function, we leave objective as default note: what we are getting is +# margin value in prediction you must know what you are doing # user define objective function, given prediction, return gradient and second order gradient this is # loglikelihood loss @@ -85,10 +144,8 @@ evalerror <- function(preds, dtrain) { return(list(metric = "error", value = err)) } -bst <- xgboost(x, y, params = param, watchlist = watchlist, obj = logregobj, feval = evalerror) +# training with customized objective, we can also do step by step training simply look at xgboost.py's +# implementation of train +bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror) -############################ Train with previous result - -bst <- xgboost(x, y, params = param, watchlist = watchlist) -pred <- predict(bst, "agaricus.txt.train", outputmargin = TRUE) -bst2 <- xgboost(x, y, params = param, watchlist = watchlist, margin = pred) +