diff --git a/demo/kaggle-higgs/README.md b/demo/kaggle-higgs/README.md index 2d9e2fd01..8b6dead19 100644 --- a/demo/kaggle-higgs/README.md +++ b/demo/kaggle-higgs/README.md @@ -1,3 +1,9 @@ +Highlights +===== +Higgs challenge ends recently, xgboost is being used by many users. This list highlights the xgboost solutions of players +* Blogpost by phunther: [Winning solution of Kaggle Higgs competition: what a single model can do](http://no2147483647.wordpress.com/2014/09/17/winning-solution-of-kaggle-higgs-competition-what-a-single-model-can-do/) +* The solution by Tianqi Chen and Tong He [Link](https://github.com/hetong007/higgsml) + Guide for Kaggle Higgs Challenge ===== diff --git a/demo/kaggle-higgs/higgs-cv.py b/demo/kaggle-higgs/higgs-cv.py index 1d660aa8f..3e36fa66b 100755 --- a/demo/kaggle-higgs/higgs-cv.py +++ b/demo/kaggle-higgs/higgs-cv.py @@ -10,7 +10,7 @@ label = train[:,32] data = train[:,1:31] weight = train[:,31] dtrain = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight ) -param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4, 'updater':'grow_histmaker,prune'} +param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4} num_round = 120 print ('running cross validation, with preprocessing function') diff --git a/demo/kaggle-higgs/speedtest.py b/demo/kaggle-higgs/speedtest.py index 2da9c86ef..c5cc2fd29 100755 --- a/demo/kaggle-higgs/speedtest.py +++ b/demo/kaggle-higgs/speedtest.py @@ -5,6 +5,7 @@ import numpy as np # add path of xgboost python module sys.path.append('../../wrapper/') import xgboost as xgb +from sklearn.ensemble import GradientBoostingClassifier import time test_size = 550000 @@ -37,30 +38,29 @@ param['objective'] = 'binary:logitraw' param['scale_pos_weight'] = sum_wneg/sum_wpos param['bst:eta'] = 0.1 param['bst:max_depth'] = 6 -#param['eval_metric'] = 'auc' +param['eval_metric'] = 'auc' param['silent'] = 1 -param['updater'] = sys.argv[1] param['nthread'] = 4 -#plst = param.items()+[('eval_metric', 'ams@0.15')] +plst = param.items()+[('eval_metric', 'ams@0.15')] watchlist = [ (xgmat,'train') ] # boost 10 tres num_round = 10 print ('loading data end, start to boost trees') print ("training GBM from sklearn") -#tmp = time.time() -#gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2) -#gbm.fit(data, label) -#print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp)) +tmp = time.time() +gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2) +gbm.fit(data, label) +print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp)) #raw_input() print ("training xgboost") threads = [1, 2, 4, 16] for i in threads: param['nthread'] = i tmp = time.time() - #plst = param.items()+[('eval_metric', 'ams@0.15')] - bst = xgb.train( param, xgmat, num_round, watchlist ); + plst = param.items()+[('eval_metric', 'ams@0.15')] + bst = xgb.train( plst, xgmat, num_round, watchlist ); print ("XGBoost with %d thread costs: %s seconds" % (i, str(time.time() - tmp))) print ('finish training')