* Fix various typos * Add override to functions that are overridden gcc gives warnings about functions that are being overridden by not being marked as oveirridden. This fixes it. * Use bst_float consistently Use bst_float for all the variables that involve weight, leaf value, gradient, hessian, gain, loss_chg, predictions, base_margin, feature values. In some cases, when due to additions and so on the value can take a larger value, double is used. This ensures that type conversions are minimal and reduces loss of precision.
43 lines
1.8 KiB
Python
Executable File
43 lines
1.8 KiB
Python
Executable File
#!/usr/bin/python
|
|
import numpy as np
|
|
import xgboost as xgb
|
|
###
|
|
# advanced: customized loss function
|
|
#
|
|
print ('start running example to used customized objective function')
|
|
|
|
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
|
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
|
|
|
# note: for customized objective function, we leave objective as default
|
|
# note: what we are getting is margin value in prediction
|
|
# you must know what you are doing
|
|
param = {'max_depth': 2, 'eta': 1, 'silent': 1}
|
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
|
num_round = 2
|
|
|
|
# user define objective function, given prediction, return gradient and second order gradient
|
|
# this is log likelihood loss
|
|
def logregobj(preds, dtrain):
|
|
labels = dtrain.get_label()
|
|
preds = 1.0 / (1.0 + np.exp(-preds))
|
|
grad = preds - labels
|
|
hess = preds * (1.0-preds)
|
|
return grad, hess
|
|
|
|
# user defined evaluation function, return a pair metric_name, result
|
|
# NOTE: when you do customized loss function, the default prediction value is margin
|
|
# this may make builtin evaluation metric not function properly
|
|
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
|
# the builtin evaluation error assumes input is after logistic transformation
|
|
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
|
def evalerror(preds, dtrain):
|
|
labels = dtrain.get_label()
|
|
# return a pair metric_name, result
|
|
# since preds are margin(before logistic transformation, cutoff at 0)
|
|
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
|
|
|
# training with customized objective, we can also do step by step training
|
|
# simply look at xgboost.py's implementation of train
|
|
bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
|