remove old R demo files

This commit is contained in:
unknown 2014-08-27 13:16:16 -07:00
parent 247e0d5d78
commit a060a2e9a6
3 changed files with 88 additions and 309 deletions

View File

@ -1,151 +0,0 @@
require(xgboost)
require(methods)
# helper function to read libsvm format this is very badly written, load in dense, and convert to sparse
# use this only for demo purpose adopted from
# https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
read.libsvm <- function(fname, maxcol) {
content <- readLines(fname)
nline <- length(content)
label <- numeric(nline)
mat <- matrix(0, nline, maxcol + 1)
for (i in 1:nline) {
arr <- as.vector(strsplit(content[i], " ")[[1]])
label[i] <- as.numeric(arr[[1]])
for (j in 2:length(arr)) {
kv <- strsplit(arr[j], ":")[[1]]
# to avoid 0 index
findex <- as.integer(kv[1]) + 1
fvalue <- as.numeric(kv[2])
mat[i, findex] <- fvalue
}
}
mat <- as(mat, "sparseMatrix")
return(list(label = label, data = mat))
}
############################ Test xgb.DMatrix with local file, sparse matrix and dense matrix in R.
# Directly read in local file
dtrain <- xgb.DMatrix("agaricus.txt.train")
class(dtrain)
# read file in R
csc <- read.libsvm("agaricus.txt.train", 126)
y <- csc$label
x <- csc$data
# x as Sparse Matrix
class(x)
dtrain <- xgb.DMatrix(x, label = y)
# x as dense matrix
dense.x <- as.matrix(x)
dtrain <- xgb.DMatrix(dense.x, label = y)
############################ Test xgboost with local file, sparse matrix and dense matrix in R.
# Test with DMatrix object
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1,
objective = "binary:logistic")
# Verbose = 0,1,2
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1,
objective = "binary:logistic", verbose = 0)
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1,
objective = "binary:logistic", verbose = 1)
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1,
objective = "binary:logistic", verbose = 2)
# Test with local file
bst <- xgboost(data = "agaricus.txt.train", max_depth = 2, eta = 1,
objective = "binary:logistic")
# Test with Sparse Matrix
bst <- xgboost(data = x, label = y, max_depth = 2, eta = 1,
objective = "binary:logistic")
# Test with dense Matrix
bst <- xgboost(data = dense.x, label = y, max_depth = 2, eta = 1,
objective = "binary:logistic")
############################ Test predict
# Prediction with DMatrix object
dtest <- xgb.DMatrix("agaricus.txt.test")
pred <- predict(bst, dtest)
# Prediction with local test file
pred <- predict(bst, "agaricus.txt.test")
# Prediction with Sparse Matrix
csc <- read.libsvm("agaricus.txt.test", 126)
test.y <- csc$label
test.x <- csc$data
pred <- predict(bst, test.x)
# Extrac label with xgb.getinfo
labels <- xgb.getinfo(dtest, "label")
err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels)
print(paste("error=", err))
############################ Save and load model to hard disk
# save model to binary local file
xgb.save(bst, "model.save")
# load binary model to R
bst <- xgb.load("model.save")
pred <- predict(bst, test.x)
# save model to text file
xgb.dump(bst, "model.dump")
# save a DMatrix object to hard disk
xgb.DMatrix.save(dtrain, "dtrain.save")
# load a DMatrix object to R
dtrain <- xgb.DMatrix("dtrain.save")
############################ More flexible training function xgb.train
param <- list(max_depth = 2, eta = 1, silent = 1, objective = "binary:logistic")
watchlist <- list(eval = dtest, train = dtrain)
# training xgboost model
bst <- xgb.train(param, dtrain, nround = 2, watchlist = watchlist)
############################ cutomsized loss function
param <- list(max_depth = 2, eta = 1, silent = 1)
# note: for customized objective function, we leave objective as default note: what we are getting is
# margin value in prediction you must know what you are doing
# user define objective function, given prediction, return gradient and second order gradient this is
# loglikelihood loss
logregobj <- function(preds, dtrain) {
labels <- xgb.getinfo(dtrain, "label")
preds <- 1/(1 + exp(-preds))
grad <- preds - labels
hess <- preds * (1 - preds)
return(list(grad = grad, hess = hess))
}
# user defined evaluation function, return a list(metric='metric-name', value='metric-value') NOTE: when
# you do customized loss function, the default prediction value is margin this may make buildin
# evalution metric not function properly for example, we are doing logistic loss, the prediction is
# score before logistic transformation the buildin evaluation error assumes input is after logistic
# transformation Take this in mind when you use the customization, and maybe you need write customized
# evaluation function
evalerror <- function(preds, dtrain) {
labels <- xgb.getinfo(dtrain, "label")
err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
return(list(metric = "error", value = err))
}
# training with customized objective, we can also do step by step training simply look at xgboost.py's
# implementation of train
bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror)

View File

@ -1,127 +0,0 @@
# load xgboost library
require(xgboost)
require(methods)
# helper function to read libsvm format
# this is very badly written, load in dense, and convert to sparse
# use this only for demo purpose
# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
read.libsvm <- function(fname, maxcol) {
content <- readLines(fname)
nline <- length(content)
label <- numeric(nline)
mat <- matrix(0, nline, maxcol+1)
for (i in 1:nline) {
arr <- as.vector(strsplit(content[i], " ")[[1]])
label[i] <- as.numeric(arr[[1]])
for (j in 2:length(arr)) {
kv <- strsplit(arr[j], ":")[[1]]
# to avoid 0 index
findex <- as.integer(kv[1]) + 1
fvalue <- as.numeric(kv[2])
mat[i,findex] <- fvalue
}
}
mat <- as(mat, "sparseMatrix")
return(list(label=label, data=mat))
}
# test code here
dtrain <- xgb.DMatrix("agaricus.txt.train")
dtest <- xgb.DMatrix("agaricus.txt.test")
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
watchlist <- list("eval"=dtest,"train"=dtrain)
# training xgboost model
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
# make prediction
preds <- xgb.predict(bst, dtest)
labels <- xgb.getinfo(dtest, "label")
err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
# print error rate
print(paste("error=",err))
# dump model
xgb.dump(bst, "dump.raw.txt")
# dump model with feature map
xgb.dump(bst, "dump.nice.txt", "featmap.txt")
# save dmatrix into binary buffer
succ <- xgb.save(dtest, "dtest.buffer")
# save model into file
succ <- xgb.save(bst, "xgb.model")
# load model and data in
bst2 <- xgb.Booster(modelfile="xgb.model")
dtest2 <- xgb.DMatrix("dtest.buffer")
preds2 <- xgb.predict(bst2, dtest2)
# assert they are the same
stopifnot(sum(abs(preds2-preds)) == 0)
###
# build dmatrix from sparseMatrix
###
print ('start running example of build DMatrix from R.sparseMatrix')
csc <- read.libsvm("agaricus.txt.train", 126)
label <- csc$label
data <- csc$data
dtrain <- xgb.DMatrix(data, info=list(label=label) )
watchlist <- list("eval"=dtest,"train"=dtrain)
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
###
# build dmatrix from dense matrix
###
print ('start running example of build DMatrix from R.Matrix')
mat = as.matrix(data)
dtrain <- xgb.DMatrix(mat, info=list(label=label) )
watchlist <- list("eval"=dtest,"train"=dtrain)
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
###
# advanced: cutomsized loss function
#
print("start running example to used cutomized objective function")
# note: for customized objective function, we leave objective as default
# note: what we are getting is margin value in prediction
# you must know what you are doing
param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1)
# user define objective function, given prediction, return gradient and second order gradient
# this is loglikelihood loss
logregobj <- function(preds, dtrain) {
labels <- xgb.getinfo(dtrain, "label")
preds <- 1.0 / (1.0 + exp(-preds))
grad <- preds - labels
hess <- preds * (1.0-preds)
return(list(grad=grad, hess=hess))
}
# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
# NOTE: when you do customized loss function, the default prediction value is margin
# this may make buildin evalution metric not function properly
# for example, we are doing logistic loss, the prediction is score before logistic transformation
# the buildin evaluation error assumes input is after logistic transformation
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
evalerror <- function(preds, dtrain) {
labels <- xgb.getinfo(dtrain, "label")
err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
return(list(metric="error", value=err))
}
# training with customized objective, we can also do step by step training
# simply look at xgboost.py"s implementation of train
bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
###
# advanced: start from a initial base prediction
#
print ("start running example to start from a initial prediction")
# specify parameters via map, definition are same as c++ version
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
# train xgboost for 1 round
bst <- xgb.train( param, dtrain, 1, watchlist )
# Note: we need the margin value instead of transformed prediction in set_base_margin
# do predict with output_margin=True, will always give you margin values before logistic transformation
ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE)
ptest <- xgb.predict(bst, dtest, outputmargin=TRUE)
succ <- xgb.setinfo(dtrain, "base_margin", ptrain)
succ <- xgb.setinfo(dtest, "base_margin", ptest)
print ("this is result of running from initial prediction")
bst <- xgb.train( param, dtrain, 1, watchlist )

View File

@ -24,45 +24,104 @@ read.libsvm <- function(fname, maxcol) {
return(list(label = label, data = mat))
}
# Parameter setting
############################ Test xgb.DMatrix with local file, sparse matrix and dense matrix in R.
# Directly read in local file
dtrain <- xgb.DMatrix("agaricus.txt.train")
dtest <- xgb.DMatrix("agaricus.txt.test")
param <- list(`bst:max_depth` = 2, `bst:eta` = 1, silent = 1, objective = "binary:logistic")
watchlist <- list(eval = dtest, train = dtrain)
########################### Train from local file
# Training
bst <- xgboost(file = "agaricus.txt.train", params = param, watchlist = watchlist)
# Prediction
pred <- predict(bst, "agaricus.txt.test")
# Performance
labels <- xgb.getinfo(dtest, "label")
err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels)
print(paste("error=", err))
########################### Train from R object
class(dtrain)
# read file in R
csc <- read.libsvm("agaricus.txt.train", 126)
y <- csc$label
x <- csc$data
# x as Sparse Matrix
class(x)
dtrain <- xgb.DMatrix(x, label = y)
# Training
bst <- xgboost(x, y, params = param, watchlist = watchlist)
# Prediction
# x as dense matrix
dense.x <- as.matrix(x)
dtrain <- xgb.DMatrix(dense.x, label = y)
############################ Test xgboost with local file, sparse matrix and dense matrix in R.
# Test with DMatrix object
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1,
objective = "binary:logistic")
# Verbose = 0,1,2
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1,
objective = "binary:logistic", verbose = 0)
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1,
objective = "binary:logistic", verbose = 1)
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1,
objective = "binary:logistic", verbose = 2)
# Test with local file
bst <- xgboost(data = "agaricus.txt.train", max_depth = 2, eta = 1,
objective = "binary:logistic")
# Test with Sparse Matrix
bst <- xgboost(data = x, label = y, max_depth = 2, eta = 1,
objective = "binary:logistic")
# Test with dense Matrix
bst <- xgboost(data = dense.x, label = y, max_depth = 2, eta = 1,
objective = "binary:logistic")
############################ Test predict
# Prediction with DMatrix object
dtest <- xgb.DMatrix("agaricus.txt.test")
pred <- predict(bst, dtest)
# Prediction with local test file
pred <- predict(bst, "agaricus.txt.test")
# Performance
# Prediction with Sparse Matrix
csc <- read.libsvm("agaricus.txt.test", 126)
test.y <- csc$label
test.x <- csc$data
pred <- predict(bst, test.x)
# Extrac label with xgb.getinfo
labels <- xgb.getinfo(dtest, "label")
err <- as.numeric(sum(as.integer(pred > 0.5) != labels))/length(labels)
print(paste("error=", err))
# Training with dense matrix
x <- as.matrix(x)
bst <- xgboost(x, y, params = param, watchlist = watchlist)
############################ Save and load model to hard disk
########################### Train with customization
# save model to binary local file
xgb.save(bst, "model.save")
# load binary model to R
bst <- xgb.load("model.save")
pred <- predict(bst, test.x)
# save model to text file
xgb.dump(bst, "model.dump")
# save a DMatrix object to hard disk
xgb.DMatrix.save(dtrain, "dtrain.save")
# load a DMatrix object to R
dtrain <- xgb.DMatrix("dtrain.save")
############################ More flexible training function xgb.train
param <- list(max_depth = 2, eta = 1, silent = 1, objective = "binary:logistic")
watchlist <- list(eval = dtest, train = dtrain)
# training xgboost model
bst <- xgb.train(param, dtrain, nround = 2, watchlist = watchlist)
############################ cutomsized loss function
param <- list(max_depth = 2, eta = 1, silent = 1)
# note: for customized objective function, we leave objective as default note: what we are getting is
# margin value in prediction you must know what you are doing
# user define objective function, given prediction, return gradient and second order gradient this is
# loglikelihood loss
@ -85,10 +144,8 @@ evalerror <- function(preds, dtrain) {
return(list(metric = "error", value = err))
}
bst <- xgboost(x, y, params = param, watchlist = watchlist, obj = logregobj, feval = evalerror)
# training with customized objective, we can also do step by step training simply look at xgboost.py's
# implementation of train
bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror)
############################ Train with previous result
bst <- xgboost(x, y, params = param, watchlist = watchlist)
pred <- predict(bst, "agaricus.txt.train", outputmargin = TRUE)
bst2 <- xgboost(x, y, params = param, watchlist = watchlist, margin = pred)