70 lines
2.4 KiB
Python
70 lines
2.4 KiB
Python
"""
|
|
Demo for using cross validation
|
|
===============================
|
|
"""
|
|
import os
|
|
|
|
import numpy as np
|
|
|
|
import xgboost as xgb
|
|
|
|
# load data in do training
|
|
CURRENT_DIR = os.path.dirname(__file__)
|
|
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
|
param = {'max_depth':2, 'eta':1, 'objective':'binary:logistic'}
|
|
num_round = 2
|
|
|
|
print('running cross validation')
|
|
# do cross validation, this will print result out as
|
|
# [iteration] metric_name:mean_value+std_value
|
|
# std_value is standard deviation of the metric
|
|
xgb.cv(param, dtrain, num_round, nfold=5,
|
|
metrics={'error'}, seed=0,
|
|
callbacks=[xgb.callback.EvaluationMonitor(show_stdv=True)])
|
|
|
|
print('running cross validation, disable standard deviation display')
|
|
# do cross validation, this will print result out as
|
|
# [iteration] metric_name:mean_value
|
|
res = xgb.cv(param, dtrain, num_boost_round=10, nfold=5,
|
|
metrics={'error'}, seed=0,
|
|
callbacks=[xgb.callback.EvaluationMonitor(show_stdv=False),
|
|
xgb.callback.EarlyStopping(3)])
|
|
print(res)
|
|
print('running cross validation, with preprocessing function')
|
|
# define the preprocessing function
|
|
# used to return the preprocessed training, test data, and parameter
|
|
# we can use this to do weight rescale, etc.
|
|
# as a example, we try to set scale_pos_weight
|
|
def fpreproc(dtrain, dtest, param):
|
|
label = dtrain.get_label()
|
|
ratio = float(np.sum(label == 0)) / np.sum(label == 1)
|
|
param['scale_pos_weight'] = ratio
|
|
return (dtrain, dtest, param)
|
|
|
|
# do cross validation, for each fold
|
|
# the dtrain, dtest, param will be passed into fpreproc
|
|
# then the return value of fpreproc will be used to generate
|
|
# results of that fold
|
|
xgb.cv(param, dtrain, num_round, nfold=5,
|
|
metrics={'auc'}, seed=0, fpreproc=fpreproc)
|
|
|
|
###
|
|
# you can also do cross validation with customized loss function
|
|
# See custom_objective.py
|
|
##
|
|
print('running cross validation, with customized loss function')
|
|
def logregobj(preds, dtrain):
|
|
labels = dtrain.get_label()
|
|
preds = 1.0 / (1.0 + np.exp(-preds))
|
|
grad = preds - labels
|
|
hess = preds * (1.0 - preds)
|
|
return grad, hess
|
|
def evalerror(preds, dtrain):
|
|
labels = dtrain.get_label()
|
|
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
|
|
|
param = {'max_depth':2, 'eta':1}
|
|
# train with customized objective
|
|
xgb.cv(param, dtrain, num_round, nfold=5, seed=0,
|
|
obj=logregobj, feval=evalerror)
|