custom eval

This commit is contained in:
hetong 2014-09-06 00:16:55 -07:00
parent 4d00be84c3
commit bb2c61f7b5
2 changed files with 48 additions and 44 deletions

View File

@ -1,31 +1,31 @@
#!/usr/bin/python require(xgboost)
import sys
import numpy as np
sys.path.append('../../wrapper')
import xgboost as xgb
###
# advanced: cutomsized loss function
#
print ('start running example to used cutomized objective function')
dtrain = xgb.DMatrix('../data/agaricus.txt.train') data(iris)
dtest = xgb.DMatrix('../data/agaricus.txt.test') iris[,5] <- as.numeric(iris[,5]=='setosa')
iris <- as.matrix(iris)
set.seed(20)
test_ind <- sample(1:nrow(iris),50)
train_ind <- setdiff(1:nrow(iris),test_ind)
dtrain <- xgb.DMatrix(iris[train_ind,1:4], label=iris[train_ind,5])
dtest <- xgb.DMatrix(iris[test_ind,1:4], label=iris[test_ind,5])
# note: for customized objective function, we leave objective as default # note: for customized objective function, we leave objective as default
# note: what we are getting is margin value in prediction # note: what we are getting is margin value in prediction
# you must know what you are doing # you must know what you are doing
param = {'max_depth':2, 'eta':1, 'silent':1 } param <- list(max_depth=2,eta=1,silent=1)
watchlist = [(dtest,'eval'), (dtrain,'train')] watchlist <- list(eval = dtest, train = dtrain)
num_round = 2 num_round <- 2
# user define objective function, given prediction, return gradient and second order gradient # user define objective function, given prediction, return gradient and second order gradient
# this is loglikelihood loss # this is loglikelihood loss
def logregobj(preds, dtrain): logregobj <- function(preds, dtrain) {
labels = dtrain.get_label() labels <- getinfo(dtrain, "label")
preds = 1.0 / (1.0 + np.exp(-preds)) preds <- 1/(1 + exp(-preds))
grad = preds - labels grad <- preds - labels
hess = preds * (1.0-preds) hess <- preds * (1 - preds)
return grad, hess return(list(grad = grad, hess = hess))
}
# user defined evaluation function, return a pair metric_name, result # user defined evaluation function, return a pair metric_name, result
# NOTE: when you do customized loss function, the default prediction value is margin # NOTE: when you do customized loss function, the default prediction value is margin
@ -33,11 +33,12 @@ def logregobj(preds, dtrain):
# for example, we are doing logistic loss, the prediction is score before logistic transformation # for example, we are doing logistic loss, the prediction is score before logistic transformation
# the buildin evaluation error assumes input is after logistic transformation # the buildin evaluation error assumes input is after logistic transformation
# Take this in mind when you use the customization, and maybe you need write customized evaluation function # Take this in mind when you use the customization, and maybe you need write customized evaluation function
def evalerror(preds, dtrain): evalerror <- function(preds, dtrain) {
labels = dtrain.get_label() labels <- getinfo(dtrain, "label")
# return a pair metric_name, result err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
# since preds are margin(before logistic transformation, cutoff at 0) return(list(metric = "error", value = err))
return 'error', float(sum(labels != (preds > 0.0))) / len(labels) }
# training with customized objective, we can also do step by step training # training with customized objective, we can also do step by step training
# simply look at xgboost.py's implementation of train # simply look at xgboost.py's implementation of train

View File

@ -1,22 +1,25 @@
#!/usr/bin/python require(xgboost)
import sys
import numpy as np
sys.path.append('../../wrapper')
import xgboost as xgb
### load data in do training data(iris)
dtrain = xgb.DMatrix('../data/agaricus.txt.train') iris[,5] <- as.numeric(iris[,5]=='setosa')
dtest = xgb.DMatrix('../data/agaricus.txt.test') iris <- as.matrix(iris)
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' } set.seed(20)
watchlist = [(dtest,'eval'), (dtrain,'train')] test_ind <- sample(1:nrow(iris),50)
num_round = 3 train_ind <- setdiff(1:nrow(iris),test_ind)
dtrain <- xgb.DMatrix(iris[train_ind,1:4], label=iris[train_ind,5])
dtest <- xgb.DMatrix(iris[test_ind,1:4], label=iris[test_ind,5])
param <- list(max_depth=2,eta=1,silent=1,objective='binary:logistic')
watchlist <- list(eval = dtest, train = dtrain)
num_round = 2
bst = xgb.train(param, dtrain, num_round, watchlist) bst = xgb.train(param, dtrain, num_round, watchlist)
print ('start testing prediction from first n trees') cat('start testing prediction from first n trees\n')
### predict using first 1 tree labels <- getinfo(dtest,'label')
label = dtest.get_label() ypred1 = predict(bst, dtest, ntreelimit=1)
ypred1 = bst.predict(dtest, ntree_limit=1) ypred2 = predict(bst, dtest)
# by default, we predict using all the trees
ypred2 = bst.predict(dtest) cat('error of ypred1=', mean(as.numeric(ypred1>0.5)!=labels),'\n')
print ('error of ypred1=%f' % (np.sum((ypred1>0.5)!=label) /float(len(label)))) cat('error of ypred2=', mean(as.numeric(ypred2>0.5)!=labels),'\n')
print ('error of ypred2=%f' % (np.sum((ypred2>0.5)!=label) /float(len(label))))