custom eval

2014-09-06 00:16:55 -07:00 · 2014-09-06 00:16:55 -07:00 · bb2c61f7b5
commit bb2c61f7b5
parent 4d00be84c3
2 changed files with 48 additions and 44 deletions
--- a/demo/guide-R/custom_objective.R
+++ b/demo/guide-R/custom_objective.R
@ -1,31 +1,31 @@
-#!/usr/bin/python
+require(xgboost)
 import sys
 import numpy as np
 sys.path.append('../../wrapper')
 import xgboost as xgb
 ###
 # advanced: cutomsized loss function
 # 
 print ('start running example to used cutomized objective function')
-dtrain = xgb.DMatrix('../data/agaricus.txt.train')
+data(iris)
-dtest = xgb.DMatrix('../data/agaricus.txt.test')
+iris[,5] <- as.numeric(iris[,5]=='setosa')
 iris <- as.matrix(iris)
 set.seed(20)
 test_ind <- sample(1:nrow(iris),50)
 train_ind <- setdiff(1:nrow(iris),test_ind)
 dtrain <- xgb.DMatrix(iris[train_ind,1:4], label=iris[train_ind,5])
 dtest <- xgb.DMatrix(iris[test_ind,1:4], label=iris[test_ind,5])
 # note: for customized objective function, we leave objective as default
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
-param = {'max_depth':2, 'eta':1, 'silent':1 }
+param <- list(max_depth=2,eta=1,silent=1)
-watchlist  = [(dtest,'eval'), (dtrain,'train')]
+watchlist <- list(eval = dtest, train = dtrain)
-num_round = 2
+num_round <- 2
 # user define objective function, given prediction, return gradient and second order gradient
 # this is loglikelihood loss
-def logregobj(preds, dtrain):
+logregobj <- function(preds, dtrain) {
-    labels = dtrain.get_label()
+  labels <- getinfo(dtrain, "label")
-    preds = 1.0 / (1.0 + np.exp(-preds))
+  preds <- 1/(1 + exp(-preds))
-    grad = preds - labels
+  grad <- preds - labels
-    hess = preds * (1.0-preds)
+  hess <- preds * (1 - preds)
-    return grad, hess
+  return(list(grad = grad, hess = hess))
 }
 # user defined evaluation function, return a pair metric_name, result
 # NOTE: when you do customized loss function, the default prediction value is margin
@ -33,11 +33,12 @@ def logregobj(preds, dtrain):
 # for example, we are doing logistic loss, the prediction is score before logistic transformation
 # the buildin evaluation error assumes input is after logistic transformation
 # Take this in mind when you use the customization, and maybe you need write customized evaluation function
-def evalerror(preds, dtrain):
+evalerror <- function(preds, dtrain) {
-    labels = dtrain.get_label()
+  labels <- getinfo(dtrain, "label")
-    # return a pair metric_name, result
+  err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
-    # since preds are margin(before logistic transformation, cutoff at 0)
+  return(list(metric = "error", value = err))
-    return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
+}
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
--- a/demo/guide-R/predict_first_ntree.R
+++ b/demo/guide-R/predict_first_ntree.R
@ -1,22 +1,25 @@
-#!/usr/bin/python
+require(xgboost)
 import sys
 import numpy as np
 sys.path.append('../../wrapper')
 import xgboost as xgb
-### load data in do training
+data(iris)
-dtrain = xgb.DMatrix('../data/agaricus.txt.train')
+iris[,5] <- as.numeric(iris[,5]=='setosa')
-dtest = xgb.DMatrix('../data/agaricus.txt.test')
+iris <- as.matrix(iris)
-param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
+set.seed(20)
-watchlist  = [(dtest,'eval'), (dtrain,'train')]
+test_ind <- sample(1:nrow(iris),50)
-num_round = 3
+train_ind <- setdiff(1:nrow(iris),test_ind)
 dtrain <- xgb.DMatrix(iris[train_ind,1:4], label=iris[train_ind,5])
 dtest <- xgb.DMatrix(iris[test_ind,1:4], label=iris[test_ind,5])
 param <- list(max_depth=2,eta=1,silent=1,objective='binary:logistic')
 watchlist <- list(eval = dtest, train = dtrain)
 num_round = 2
 bst = xgb.train(param, dtrain, num_round, watchlist)
-print ('start testing prediction from first n trees')
+cat('start testing prediction from first n trees\n')
-### predict using first 1 tree
+labels <- getinfo(dtest,'label')
-label = dtest.get_label()
+ypred1 = predict(bst, dtest, ntreelimit=1)
-ypred1 = bst.predict(dtest, ntree_limit=1)
+ypred2 = predict(bst, dtest)
-# by default, we predict using all the trees
+
-ypred2 = bst.predict(dtest)
+cat('error of ypred1=', mean(as.numeric(ypred1>0.5)!=labels),'\n')
-print ('error of ypred1=%f' % (np.sum((ypred1>0.5)!=label) /float(len(label))))
+cat('error of ypred2=', mean(as.numeric(ypred2>0.5)!=labels),'\n')
-print ('error of ypred2=%f' % (np.sum((ypred2>0.5)!=label) /float(len(label))))
+