38 lines
1.3 KiB
Python
38 lines
1.3 KiB
Python
"""
|
|
Demo for GLM
|
|
============
|
|
"""
|
|
import os
|
|
|
|
import xgboost as xgb
|
|
|
|
##
|
|
# this script demonstrate how to fit generalized linear model in xgboost
|
|
# basically, we are using linear model, instead of tree for our boosters
|
|
##
|
|
CURRENT_DIR = os.path.dirname(__file__)
|
|
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
|
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
|
|
# change booster to gblinear, so that we are fitting a linear model
|
|
# alpha is the L1 regularizer
|
|
# lambda is the L2 regularizer
|
|
# you can also set lambda_bias which is L2 regularizer on the bias term
|
|
param = {'objective':'binary:logistic', 'booster':'gblinear',
|
|
'alpha': 0.0001, 'lambda': 1}
|
|
|
|
# normally, you do not need to set eta (step_size)
|
|
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
|
|
# there could be affection on convergence with parallelization on certain cases
|
|
# setting eta to be smaller value, e.g 0.5 can make the optimization more stable
|
|
# param['eta'] = 1
|
|
|
|
##
|
|
# the rest of settings are the same
|
|
##
|
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
|
num_round = 4
|
|
bst = xgb.train(param, dtrain, num_round, watchlist)
|
|
preds = bst.predict(dtest)
|
|
labels = dtest.get_label()
|
|
print('error=%f' % (sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds))))
|