modify demo filenames
This commit is contained in:
parent
d9f363632a
commit
262108cf3b
@ -1,102 +0,0 @@
|
|||||||
require(xgboost)
|
|
||||||
|
|
||||||
# helper function to read libsvm format
|
|
||||||
# this is very badly written, load in dense, and convert to sparse
|
|
||||||
# use this only for demo purpose
|
|
||||||
# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
|
|
||||||
read.libsvm = function(fname, maxcol) {
|
|
||||||
content = readLines(fname)
|
|
||||||
nline = length(content)
|
|
||||||
label = numeric(nline)
|
|
||||||
mat = matrix(0, nline, maxcol+1)
|
|
||||||
for (i in 1:nline) {
|
|
||||||
arr = as.vector(strsplit(content[i], " ")[[1]])
|
|
||||||
label[i] = as.numeric(arr[[1]])
|
|
||||||
for (j in 2:length(arr)) {
|
|
||||||
kv = strsplit(arr[j], ":")[[1]]
|
|
||||||
# to avoid 0 index
|
|
||||||
findex = as.integer(kv[1]) + 1
|
|
||||||
fvalue = as.numeric(kv[2])
|
|
||||||
mat[i,findex] = fvalue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mat = as(mat, "sparseMatrix")
|
|
||||||
return(list(label=label, data=mat))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Parameter setting
|
|
||||||
dtrain <- xgb.DMatrix("agaricus.txt.train")
|
|
||||||
dtest <- xgb.DMatrix("agaricus.txt.test")
|
|
||||||
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
|
||||||
watchlist = list("eval"=dtest,"train"=dtrain)
|
|
||||||
|
|
||||||
###########################
|
|
||||||
# Train from local file
|
|
||||||
###########################
|
|
||||||
|
|
||||||
# Training
|
|
||||||
bst = xgboost(file='agaricus.txt.train',params=param,watchlist=watchlist)
|
|
||||||
# Prediction
|
|
||||||
pred = predict(bst, 'agaricus.txt.test')
|
|
||||||
# Performance
|
|
||||||
labels = xgb.getinfo(dtest, "label")
|
|
||||||
err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
|
|
||||||
print(paste("error=",err))
|
|
||||||
|
|
||||||
###########################
|
|
||||||
# Train from R object
|
|
||||||
###########################
|
|
||||||
|
|
||||||
csc = read.libsvm("agaricus.txt.train", 126)
|
|
||||||
y = csc$label
|
|
||||||
x = csc$data
|
|
||||||
# x as Sparse Matrix
|
|
||||||
class(x)
|
|
||||||
|
|
||||||
# Training
|
|
||||||
bst = xgboost(x,y,params=param,watchlist=watchlist)
|
|
||||||
# Prediction
|
|
||||||
pred = predict(bst, 'agaricus.txt.test')
|
|
||||||
# Performance
|
|
||||||
labels = xgb.getinfo(dtest, "label")
|
|
||||||
err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
|
|
||||||
print(paste("error=",err))
|
|
||||||
|
|
||||||
# Training with dense matrix
|
|
||||||
x = as.matrix(x)
|
|
||||||
bst = xgboost(x,y,params=param,watchlist=watchlist)
|
|
||||||
|
|
||||||
###########################
|
|
||||||
# Train with customization
|
|
||||||
###########################
|
|
||||||
|
|
||||||
# user define objective function, given prediction, return gradient and second order gradient
|
|
||||||
# this is loglikelihood loss
|
|
||||||
logregobj = function(preds, dtrain) {
|
|
||||||
labels = xgb.getinfo(dtrain, "label")
|
|
||||||
preds = 1.0 / (1.0 + exp(-preds))
|
|
||||||
grad = preds - labels
|
|
||||||
hess = preds * (1.0-preds)
|
|
||||||
return(list(grad=grad, hess=hess))
|
|
||||||
}
|
|
||||||
# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
|
|
||||||
# NOTE: when you do customized loss function, the default prediction value is margin
|
|
||||||
# this may make buildin evalution metric not function properly
|
|
||||||
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
|
||||||
# the buildin evaluation error assumes input is after logistic transformation
|
|
||||||
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
|
||||||
evalerror = function(preds, dtrain) {
|
|
||||||
labels = xgb.getinfo(dtrain, "label")
|
|
||||||
err = as.numeric(sum(labels != (preds > 0.0))) / length(labels)
|
|
||||||
return(list(metric="error", value=err))
|
|
||||||
}
|
|
||||||
|
|
||||||
bst = xgboost(x,y,params=param,watchlist=watchlist,obj=logregobj, feval=evalerror)
|
|
||||||
|
|
||||||
############################
|
|
||||||
# Train with previous result
|
|
||||||
############################
|
|
||||||
|
|
||||||
bst = xgboost(x,y,params=param,watchlist=watchlist)
|
|
||||||
pred = predict(bst, 'agaricus.txt.train', outputmargin=TRUE)
|
|
||||||
bst2 = xgboost(x,y,params=param,watchlist=watchlist,margin=pred)
|
|
||||||
127
R-package/inst/examples/demo-old.R
Normal file
127
R-package/inst/examples/demo-old.R
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
# load xgboost library
|
||||||
|
require(xgboost)
|
||||||
|
require(methods)
|
||||||
|
|
||||||
|
# helper function to read libsvm format
|
||||||
|
# this is very badly written, load in dense, and convert to sparse
|
||||||
|
# use this only for demo purpose
|
||||||
|
# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
|
||||||
|
read.libsvm <- function(fname, maxcol) {
|
||||||
|
content <- readLines(fname)
|
||||||
|
nline <- length(content)
|
||||||
|
label <- numeric(nline)
|
||||||
|
mat <- matrix(0, nline, maxcol+1)
|
||||||
|
for (i in 1:nline) {
|
||||||
|
arr <- as.vector(strsplit(content[i], " ")[[1]])
|
||||||
|
label[i] <- as.numeric(arr[[1]])
|
||||||
|
for (j in 2:length(arr)) {
|
||||||
|
kv <- strsplit(arr[j], ":")[[1]]
|
||||||
|
# to avoid 0 index
|
||||||
|
findex <- as.integer(kv[1]) + 1
|
||||||
|
fvalue <- as.numeric(kv[2])
|
||||||
|
mat[i,findex] <- fvalue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mat <- as(mat, "sparseMatrix")
|
||||||
|
return(list(label=label, data=mat))
|
||||||
|
}
|
||||||
|
|
||||||
|
# test code here
|
||||||
|
dtrain <- xgb.DMatrix("agaricus.txt.train")
|
||||||
|
dtest <- xgb.DMatrix("agaricus.txt.test")
|
||||||
|
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
||||||
|
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||||
|
# training xgboost model
|
||||||
|
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||||
|
# make prediction
|
||||||
|
preds <- xgb.predict(bst, dtest)
|
||||||
|
labels <- xgb.getinfo(dtest, "label")
|
||||||
|
err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
|
||||||
|
# print error rate
|
||||||
|
print(paste("error=",err))
|
||||||
|
|
||||||
|
# dump model
|
||||||
|
xgb.dump(bst, "dump.raw.txt")
|
||||||
|
# dump model with feature map
|
||||||
|
xgb.dump(bst, "dump.nice.txt", "featmap.txt")
|
||||||
|
|
||||||
|
# save dmatrix into binary buffer
|
||||||
|
succ <- xgb.save(dtest, "dtest.buffer")
|
||||||
|
# save model into file
|
||||||
|
succ <- xgb.save(bst, "xgb.model")
|
||||||
|
# load model and data in
|
||||||
|
bst2 <- xgb.Booster(modelfile="xgb.model")
|
||||||
|
dtest2 <- xgb.DMatrix("dtest.buffer")
|
||||||
|
preds2 <- xgb.predict(bst2, dtest2)
|
||||||
|
# assert they are the same
|
||||||
|
stopifnot(sum(abs(preds2-preds)) == 0)
|
||||||
|
|
||||||
|
###
|
||||||
|
# build dmatrix from sparseMatrix
|
||||||
|
###
|
||||||
|
print ('start running example of build DMatrix from R.sparseMatrix')
|
||||||
|
csc <- read.libsvm("agaricus.txt.train", 126)
|
||||||
|
label <- csc$label
|
||||||
|
data <- csc$data
|
||||||
|
dtrain <- xgb.DMatrix(data, info=list(label=label) )
|
||||||
|
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||||
|
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||||
|
|
||||||
|
###
|
||||||
|
# build dmatrix from dense matrix
|
||||||
|
###
|
||||||
|
print ('start running example of build DMatrix from R.Matrix')
|
||||||
|
mat = as.matrix(data)
|
||||||
|
dtrain <- xgb.DMatrix(mat, info=list(label=label) )
|
||||||
|
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||||
|
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||||
|
|
||||||
|
###
|
||||||
|
# advanced: cutomsized loss function
|
||||||
|
#
|
||||||
|
print("start running example to used cutomized objective function")
|
||||||
|
# note: for customized objective function, we leave objective as default
|
||||||
|
# note: what we are getting is margin value in prediction
|
||||||
|
# you must know what you are doing
|
||||||
|
param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1)
|
||||||
|
# user define objective function, given prediction, return gradient and second order gradient
|
||||||
|
# this is loglikelihood loss
|
||||||
|
logregobj <- function(preds, dtrain) {
|
||||||
|
labels <- xgb.getinfo(dtrain, "label")
|
||||||
|
preds <- 1.0 / (1.0 + exp(-preds))
|
||||||
|
grad <- preds - labels
|
||||||
|
hess <- preds * (1.0-preds)
|
||||||
|
return(list(grad=grad, hess=hess))
|
||||||
|
}
|
||||||
|
# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
|
||||||
|
# NOTE: when you do customized loss function, the default prediction value is margin
|
||||||
|
# this may make buildin evalution metric not function properly
|
||||||
|
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
||||||
|
# the buildin evaluation error assumes input is after logistic transformation
|
||||||
|
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||||
|
evalerror <- function(preds, dtrain) {
|
||||||
|
labels <- xgb.getinfo(dtrain, "label")
|
||||||
|
err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
|
||||||
|
return(list(metric="error", value=err))
|
||||||
|
}
|
||||||
|
|
||||||
|
# training with customized objective, we can also do step by step training
|
||||||
|
# simply look at xgboost.py"s implementation of train
|
||||||
|
bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
|
||||||
|
|
||||||
|
###
|
||||||
|
# advanced: start from a initial base prediction
|
||||||
|
#
|
||||||
|
print ("start running example to start from a initial prediction")
|
||||||
|
# specify parameters via map, definition are same as c++ version
|
||||||
|
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
||||||
|
# train xgboost for 1 round
|
||||||
|
bst <- xgb.train( param, dtrain, 1, watchlist )
|
||||||
|
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
||||||
|
# do predict with output_margin=True, will always give you margin values before logistic transformation
|
||||||
|
ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE)
|
||||||
|
ptest <- xgb.predict(bst, dtest, outputmargin=TRUE)
|
||||||
|
succ <- xgb.setinfo(dtrain, "base_margin", ptrain)
|
||||||
|
succ <- xgb.setinfo(dtest, "base_margin", ptest)
|
||||||
|
print ("this is result of running from initial prediction")
|
||||||
|
bst <- xgb.train( param, dtrain, 1, watchlist )
|
||||||
@ -1,97 +1,83 @@
|
|||||||
# load xgboost library
|
|
||||||
require(xgboost)
|
require(xgboost)
|
||||||
require(methods)
|
|
||||||
|
|
||||||
# helper function to read libsvm format
|
# helper function to read libsvm format
|
||||||
# this is very badly written, load in dense, and convert to sparse
|
# this is very badly written, load in dense, and convert to sparse
|
||||||
# use this only for demo purpose
|
# use this only for demo purpose
|
||||||
# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
|
# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
|
||||||
read.libsvm <- function(fname, maxcol) {
|
read.libsvm = function(fname, maxcol) {
|
||||||
content <- readLines(fname)
|
content = readLines(fname)
|
||||||
nline <- length(content)
|
nline = length(content)
|
||||||
label <- numeric(nline)
|
label = numeric(nline)
|
||||||
mat <- matrix(0, nline, maxcol+1)
|
mat = matrix(0, nline, maxcol+1)
|
||||||
for (i in 1:nline) {
|
for (i in 1:nline) {
|
||||||
arr <- as.vector(strsplit(content[i], " ")[[1]])
|
arr = as.vector(strsplit(content[i], " ")[[1]])
|
||||||
label[i] <- as.numeric(arr[[1]])
|
label[i] = as.numeric(arr[[1]])
|
||||||
for (j in 2:length(arr)) {
|
for (j in 2:length(arr)) {
|
||||||
kv <- strsplit(arr[j], ":")[[1]]
|
kv = strsplit(arr[j], ":")[[1]]
|
||||||
# to avoid 0 index
|
# to avoid 0 index
|
||||||
findex <- as.integer(kv[1]) + 1
|
findex = as.integer(kv[1]) + 1
|
||||||
fvalue <- as.numeric(kv[2])
|
fvalue = as.numeric(kv[2])
|
||||||
mat[i,findex] <- fvalue
|
mat[i,findex] = fvalue
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
mat = as(mat, "sparseMatrix")
|
||||||
mat <- as(mat, "sparseMatrix")
|
return(list(label=label, data=mat))
|
||||||
return(list(label=label, data=mat))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# test code here
|
# Parameter setting
|
||||||
dtrain <- xgb.DMatrix("agaricus.txt.train")
|
dtrain <- xgb.DMatrix("agaricus.txt.train")
|
||||||
dtest <- xgb.DMatrix("agaricus.txt.test")
|
dtest <- xgb.DMatrix("agaricus.txt.test")
|
||||||
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
||||||
watchlist <- list("eval"=dtest,"train"=dtrain)
|
watchlist = list("eval"=dtest,"train"=dtrain)
|
||||||
# training xgboost model
|
|
||||||
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
###########################
|
||||||
# make prediction
|
# Train from local file
|
||||||
preds <- xgb.predict(bst, dtest)
|
###########################
|
||||||
labels <- xgb.getinfo(dtest, "label")
|
|
||||||
err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
|
# Training
|
||||||
# print error rate
|
bst = xgboost(file='agaricus.txt.train',params=param,watchlist=watchlist)
|
||||||
|
# Prediction
|
||||||
|
pred = predict(bst, 'agaricus.txt.test')
|
||||||
|
# Performance
|
||||||
|
labels = xgb.getinfo(dtest, "label")
|
||||||
|
err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
|
||||||
print(paste("error=",err))
|
print(paste("error=",err))
|
||||||
|
|
||||||
# dump model
|
###########################
|
||||||
xgb.dump(bst, "dump.raw.txt")
|
# Train from R object
|
||||||
# dump model with feature map
|
###########################
|
||||||
xgb.dump(bst, "dump.nice.txt", "featmap.txt")
|
|
||||||
|
|
||||||
# save dmatrix into binary buffer
|
csc = read.libsvm("agaricus.txt.train", 126)
|
||||||
succ <- xgb.save(dtest, "dtest.buffer")
|
y = csc$label
|
||||||
# save model into file
|
x = csc$data
|
||||||
succ <- xgb.save(bst, "xgb.model")
|
# x as Sparse Matrix
|
||||||
# load model and data in
|
class(x)
|
||||||
bst2 <- xgb.Booster(modelfile="xgb.model")
|
|
||||||
dtest2 <- xgb.DMatrix("dtest.buffer")
|
|
||||||
preds2 <- xgb.predict(bst2, dtest2)
|
|
||||||
# assert they are the same
|
|
||||||
stopifnot(sum(abs(preds2-preds)) == 0)
|
|
||||||
|
|
||||||
###
|
# Training
|
||||||
# build dmatrix from sparseMatrix
|
bst = xgboost(x,y,params=param,watchlist=watchlist)
|
||||||
###
|
# Prediction
|
||||||
print ('start running example of build DMatrix from R.sparseMatrix')
|
pred = predict(bst, 'agaricus.txt.test')
|
||||||
csc <- read.libsvm("agaricus.txt.train", 126)
|
# Performance
|
||||||
label <- csc$label
|
labels = xgb.getinfo(dtest, "label")
|
||||||
data <- csc$data
|
err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
|
||||||
dtrain <- xgb.DMatrix(data, info=list(label=label) )
|
print(paste("error=",err))
|
||||||
watchlist <- list("eval"=dtest,"train"=dtrain)
|
|
||||||
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
|
||||||
|
|
||||||
###
|
# Training with dense matrix
|
||||||
# build dmatrix from dense matrix
|
x = as.matrix(x)
|
||||||
###
|
bst = xgboost(x,y,params=param,watchlist=watchlist)
|
||||||
print ('start running example of build DMatrix from R.Matrix')
|
|
||||||
mat = as.matrix(data)
|
###########################
|
||||||
dtrain <- xgb.DMatrix(mat, info=list(label=label) )
|
# Train with customization
|
||||||
watchlist <- list("eval"=dtest,"train"=dtrain)
|
###########################
|
||||||
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
|
||||||
|
|
||||||
###
|
|
||||||
# advanced: cutomsized loss function
|
|
||||||
#
|
|
||||||
print("start running example to used cutomized objective function")
|
|
||||||
# note: for customized objective function, we leave objective as default
|
|
||||||
# note: what we are getting is margin value in prediction
|
|
||||||
# you must know what you are doing
|
|
||||||
param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1)
|
|
||||||
# user define objective function, given prediction, return gradient and second order gradient
|
# user define objective function, given prediction, return gradient and second order gradient
|
||||||
# this is loglikelihood loss
|
# this is loglikelihood loss
|
||||||
logregobj <- function(preds, dtrain) {
|
logregobj = function(preds, dtrain) {
|
||||||
labels <- xgb.getinfo(dtrain, "label")
|
labels = xgb.getinfo(dtrain, "label")
|
||||||
preds <- 1.0 / (1.0 + exp(-preds))
|
preds = 1.0 / (1.0 + exp(-preds))
|
||||||
grad <- preds - labels
|
grad = preds - labels
|
||||||
hess <- preds * (1.0-preds)
|
hess = preds * (1.0-preds)
|
||||||
return(list(grad=grad, hess=hess))
|
return(list(grad=grad, hess=hess))
|
||||||
}
|
}
|
||||||
# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
|
# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
|
||||||
# NOTE: when you do customized loss function, the default prediction value is margin
|
# NOTE: when you do customized loss function, the default prediction value is margin
|
||||||
@ -99,29 +85,18 @@ logregobj <- function(preds, dtrain) {
|
|||||||
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
||||||
# the buildin evaluation error assumes input is after logistic transformation
|
# the buildin evaluation error assumes input is after logistic transformation
|
||||||
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||||
evalerror <- function(preds, dtrain) {
|
evalerror = function(preds, dtrain) {
|
||||||
labels <- xgb.getinfo(dtrain, "label")
|
labels = xgb.getinfo(dtrain, "label")
|
||||||
err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
|
err = as.numeric(sum(labels != (preds > 0.0))) / length(labels)
|
||||||
return(list(metric="error", value=err))
|
return(list(metric="error", value=err))
|
||||||
}
|
}
|
||||||
|
|
||||||
# training with customized objective, we can also do step by step training
|
bst = xgboost(x,y,params=param,watchlist=watchlist,obj=logregobj, feval=evalerror)
|
||||||
# simply look at xgboost.py"s implementation of train
|
|
||||||
bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
|
|
||||||
|
|
||||||
###
|
############################
|
||||||
# advanced: start from a initial base prediction
|
# Train with previous result
|
||||||
#
|
############################
|
||||||
print ("start running example to start from a initial prediction")
|
|
||||||
# specify parameters via map, definition are same as c++ version
|
bst = xgboost(x,y,params=param,watchlist=watchlist)
|
||||||
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
pred = predict(bst, 'agaricus.txt.train', outputmargin=TRUE)
|
||||||
# train xgboost for 1 round
|
bst2 = xgboost(x,y,params=param,watchlist=watchlist,margin=pred)
|
||||||
bst <- xgb.train( param, dtrain, 1, watchlist )
|
|
||||||
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
|
||||||
# do predict with output_margin=True, will always give you margin values before logistic transformation
|
|
||||||
ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE)
|
|
||||||
ptest <- xgb.predict(bst, dtest, outputmargin=TRUE)
|
|
||||||
succ <- xgb.setinfo(dtrain, "base_margin", ptrain)
|
|
||||||
succ <- xgb.setinfo(dtest, "base_margin", ptest)
|
|
||||||
print ("this is result of running from initial prediction")
|
|
||||||
bst <- xgb.train( param, dtrain, 1, watchlist )
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user