From 512a0f69fdb7e0678c5d666bc51b636af5c2a508 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 4 Sep 2014 21:09:52 -0700 Subject: [PATCH] add glm --- demo/README.md | 9 +++--- demo/guide-python/README.md | 1 + demo/guide-python/generalized_linear_model.py | 32 +++++++++++++++++++ demo/guide-python/runall.sh | 1 + 4 files changed, 39 insertions(+), 4 deletions(-) create mode 100755 demo/guide-python/generalized_linear_model.py diff --git a/demo/README.md b/demo/README.md index e83bde6b4..5414e642e 100644 --- a/demo/README.md +++ b/demo/README.md @@ -8,10 +8,11 @@ This folder contains the all example codes using xgboost. Features Walkthrough ==== This is a list of short codes introducing different functionalities of xgboost and its wrapper. -* Basic walkthrough of wrappers. [python](guide-python/basic_walkthrough.py) -* Cutomize loss function, and evaluation metric. [python](guide-python/custom_objective.py) -* Boosting from existing prediction. [python](guide-python/boost_from_prediction.py) -* Predicting using first n trees. [python](guide-python/predict_first_ntree.py) +* Basic walkthrough of wrappers [python](guide-python/basic_walkthrough.py) +* Cutomize loss function, and evaluation metric [python](guide-python/custom_objective.py) +* Boosting from existing prediction [python](guide-python/boost_from_prediction.py) +* Predicting using first n trees [python](guide-python/predict_first_ntree.py) +* Generalized Linear Model [python](guide-python/generalized_linear_model.py) * Cross validation [python](guide-python/cross_validation.py) Basic Examples by Tasks diff --git a/demo/guide-python/README.md b/demo/guide-python/README.md index 3014ee23e..3625c40f5 100644 --- a/demo/guide-python/README.md +++ b/demo/guide-python/README.md @@ -4,4 +4,5 @@ XGBoost Python Feature Walkthrough * [Cutomize loss function, and evaluation metric](custom_objective.py) * [Boosting from existing prediction](boost_from_prediction.py) * [Predicting using first n trees](predict_first_ntree.py) +* [Generalized Linear Model](generalized_linear_model.py) * [Cross validation](cross_validation.py) diff --git a/demo/guide-python/generalized_linear_model.py b/demo/guide-python/generalized_linear_model.py new file mode 100755 index 000000000..b6b60be35 --- /dev/null +++ b/demo/guide-python/generalized_linear_model.py @@ -0,0 +1,32 @@ +#!/usr/bin/python +import sys +sys.path.append('../../wrapper') +import xgboost as xgb +## +# this script demonstrate how to fit generalized linear model in xgboost +# basically, we are using linear model, instead of tree for our boosters +## +dtrain = xgb.DMatrix('../data/agaricus.txt.train') +dtest = xgb.DMatrix('../data/agaricus.txt.test') +# change booster to gblinear, so that we are fitting a linear model +# alpha is the L1 regularizer +# lambda is the L2 regularizer +# you can also set lambda_bias which is L2 regularizer on the bias term +param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', + 'alpha': 0.0001, 'lambda': 1 } + +# normally, you do not need to set eta (step_size) +# XGBoost uses a parallel coordinate descent algorithm (shotgun), +# there could be affection on convergence with parallelization on certain cases +# setting eta to be smaller value, e.g 0.5 can make the optimization more stable +# param['eta'] = 1 + +## +# the rest of settings are the same +## +watchlist = [(dtest,'eval'), (dtrain,'train')] +num_round = 4 +bst = xgb.train(param, dtrain, num_round, watchlist) +preds = bst.predict(dtest) +labels = dtest.get_label() +print ('error=%f' % ( sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds)))) diff --git a/demo/guide-python/runall.sh b/demo/guide-python/runall.sh index ce71f96f2..2dd2c20b0 100755 --- a/demo/guide-python/runall.sh +++ b/demo/guide-python/runall.sh @@ -2,5 +2,6 @@ python basic_walkthrough.py python custom_objective.py python boost_from_prediction.py +python generalized_linear_model.py python cross_validation.py rm -rf *~ *.model *.buffer \ No newline at end of file