Merge branch 'unity' of ssh://github.com/tqchen/xgboost into unity

Conflicts:
	src/tree/updater_histmaker-inl.hpp
This commit is contained in:
tqchen 2014-11-19 09:55:05 -08:00
commit dffcbc838b
3 changed files with 16 additions and 10 deletions

View File

@ -1,3 +1,9 @@
Highlights
=====
Higgs challenge ends recently, xgboost is being used by many users. This list highlights the xgboost solutions of players
* Blogpost by phunther: [Winning solution of Kaggle Higgs competition: what a single model can do](http://no2147483647.wordpress.com/2014/09/17/winning-solution-of-kaggle-higgs-competition-what-a-single-model-can-do/)
* The solution by Tianqi Chen and Tong He [Link](https://github.com/hetong007/higgsml)
Guide for Kaggle Higgs Challenge
=====

View File

@ -10,7 +10,7 @@ label = train[:,32]
data = train[:,1:31]
weight = train[:,31]
dtrain = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4, 'updater':'grow_histmaker,prune'}
param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4}
num_round = 120
print ('running cross validation, with preprocessing function')

View File

@ -5,6 +5,7 @@ import numpy as np
# add path of xgboost python module
sys.path.append('../../wrapper/')
import xgboost as xgb
from sklearn.ensemble import GradientBoostingClassifier
import time
test_size = 550000
@ -37,30 +38,29 @@ param['objective'] = 'binary:logitraw'
param['scale_pos_weight'] = sum_wneg/sum_wpos
param['bst:eta'] = 0.1
param['bst:max_depth'] = 6
#param['eval_metric'] = 'auc'
param['eval_metric'] = 'auc'
param['silent'] = 1
param['updater'] = sys.argv[1]
param['nthread'] = 4
#plst = param.items()+[('eval_metric', 'ams@0.15')]
plst = param.items()+[('eval_metric', 'ams@0.15')]
watchlist = [ (xgmat,'train') ]
# boost 10 tres
num_round = 10
print ('loading data end, start to boost trees')
print ("training GBM from sklearn")
#tmp = time.time()
#gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)
#gbm.fit(data, label)
#print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp))
tmp = time.time()
gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)
gbm.fit(data, label)
print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp))
#raw_input()
print ("training xgboost")
threads = [1, 2, 4, 16]
for i in threads:
param['nthread'] = i
tmp = time.time()
#plst = param.items()+[('eval_metric', 'ams@0.15')]
bst = xgb.train( param, xgmat, num_round, watchlist );
plst = param.items()+[('eval_metric', 'ams@0.15')]
bst = xgb.train( plst, xgmat, num_round, watchlist );
print ("XGBoost with %d thread costs: %s seconds" % (i, str(time.time() - tmp)))
print ('finish training')