""" Demo for using cross validation =============================== """ import os import numpy as np import xgboost as xgb # load data in do training CURRENT_DIR = os.path.dirname(__file__) dtrain = xgb.DMatrix( os.path.join(CURRENT_DIR, "../data/agaricus.txt.train?format=libsvm") ) param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"} num_round = 2 print("running cross validation") # do cross validation, this will print result out as # [iteration] metric_name:mean_value+std_value # std_value is standard deviation of the metric xgb.cv( param, dtrain, num_round, nfold=5, metrics={"error"}, seed=0, callbacks=[xgb.callback.EvaluationMonitor(show_stdv=True)], ) print("running cross validation, disable standard deviation display") # do cross validation, this will print result out as # [iteration] metric_name:mean_value res = xgb.cv( param, dtrain, num_boost_round=10, nfold=5, metrics={"error"}, seed=0, callbacks=[ xgb.callback.EvaluationMonitor(show_stdv=False), xgb.callback.EarlyStopping(3), ], ) print(res) print("running cross validation, with preprocessing function") # define the preprocessing function # used to return the preprocessed training, test data, and parameter # we can use this to do weight rescale, etc. # as a example, we try to set scale_pos_weight def fpreproc(dtrain, dtest, param): label = dtrain.get_label() ratio = float(np.sum(label == 0)) / np.sum(label == 1) param["scale_pos_weight"] = ratio return (dtrain, dtest, param) # do cross validation, for each fold # the dtrain, dtest, param will be passed into fpreproc # then the return value of fpreproc will be used to generate # results of that fold xgb.cv(param, dtrain, num_round, nfold=5, metrics={"auc"}, seed=0, fpreproc=fpreproc) ### # you can also do cross validation with customized loss function # See custom_objective.py ## print("running cross validation, with customized loss function") def logregobj(preds, dtrain): labels = dtrain.get_label() preds = 1.0 / (1.0 + np.exp(-preds)) grad = preds - labels hess = preds * (1.0 - preds) return grad, hess def evalerror(preds, dtrain): labels = dtrain.get_label() return "error", float(sum(labels != (preds > 0.0))) / len(labels) param = {"max_depth": 2, "eta": 1} # train with customized objective xgb.cv(param, dtrain, num_round, nfold=5, seed=0, obj=logregobj, feval=evalerror)