Merge branch 'unity' of ssh://github.com/tqchen/xgboost into unity
Conflicts: src/tree/updater_histmaker-inl.hpp
This commit is contained in:
commit
dffcbc838b
@ -1,3 +1,9 @@
|
||||
Highlights
|
||||
=====
|
||||
Higgs challenge ends recently, xgboost is being used by many users. This list highlights the xgboost solutions of players
|
||||
* Blogpost by phunther: [Winning solution of Kaggle Higgs competition: what a single model can do](http://no2147483647.wordpress.com/2014/09/17/winning-solution-of-kaggle-higgs-competition-what-a-single-model-can-do/)
|
||||
* The solution by Tianqi Chen and Tong He [Link](https://github.com/hetong007/higgsml)
|
||||
|
||||
Guide for Kaggle Higgs Challenge
|
||||
=====
|
||||
|
||||
|
||||
@ -10,7 +10,7 @@ label = train[:,32]
|
||||
data = train[:,1:31]
|
||||
weight = train[:,31]
|
||||
dtrain = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
|
||||
param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4, 'updater':'grow_histmaker,prune'}
|
||||
param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4}
|
||||
num_round = 120
|
||||
|
||||
print ('running cross validation, with preprocessing function')
|
||||
|
||||
@ -5,6 +5,7 @@ import numpy as np
|
||||
# add path of xgboost python module
|
||||
sys.path.append('../../wrapper/')
|
||||
import xgboost as xgb
|
||||
from sklearn.ensemble import GradientBoostingClassifier
|
||||
import time
|
||||
test_size = 550000
|
||||
|
||||
@ -37,30 +38,29 @@ param['objective'] = 'binary:logitraw'
|
||||
param['scale_pos_weight'] = sum_wneg/sum_wpos
|
||||
param['bst:eta'] = 0.1
|
||||
param['bst:max_depth'] = 6
|
||||
#param['eval_metric'] = 'auc'
|
||||
param['eval_metric'] = 'auc'
|
||||
param['silent'] = 1
|
||||
param['updater'] = sys.argv[1]
|
||||
param['nthread'] = 4
|
||||
|
||||
#plst = param.items()+[('eval_metric', 'ams@0.15')]
|
||||
plst = param.items()+[('eval_metric', 'ams@0.15')]
|
||||
|
||||
watchlist = [ (xgmat,'train') ]
|
||||
# boost 10 tres
|
||||
num_round = 10
|
||||
print ('loading data end, start to boost trees')
|
||||
print ("training GBM from sklearn")
|
||||
#tmp = time.time()
|
||||
#gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)
|
||||
#gbm.fit(data, label)
|
||||
#print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp))
|
||||
tmp = time.time()
|
||||
gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)
|
||||
gbm.fit(data, label)
|
||||
print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp))
|
||||
#raw_input()
|
||||
print ("training xgboost")
|
||||
threads = [1, 2, 4, 16]
|
||||
for i in threads:
|
||||
param['nthread'] = i
|
||||
tmp = time.time()
|
||||
#plst = param.items()+[('eval_metric', 'ams@0.15')]
|
||||
bst = xgb.train( param, xgmat, num_round, watchlist );
|
||||
plst = param.items()+[('eval_metric', 'ams@0.15')]
|
||||
bst = xgb.train( plst, xgmat, num_round, watchlist );
|
||||
print ("XGBoost with %d thread costs: %s seconds" % (i, str(time.time() - tmp)))
|
||||
|
||||
print ('finish training')
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user