Merge branch 'unity' of ssh://github.com/tqchen/xgboost into unity
Conflicts: src/tree/updater_histmaker-inl.hpp
This commit is contained in:
commit
dffcbc838b
@ -1,3 +1,9 @@
|
|||||||
|
Highlights
|
||||||
|
=====
|
||||||
|
Higgs challenge ends recently, xgboost is being used by many users. This list highlights the xgboost solutions of players
|
||||||
|
* Blogpost by phunther: [Winning solution of Kaggle Higgs competition: what a single model can do](http://no2147483647.wordpress.com/2014/09/17/winning-solution-of-kaggle-higgs-competition-what-a-single-model-can-do/)
|
||||||
|
* The solution by Tianqi Chen and Tong He [Link](https://github.com/hetong007/higgsml)
|
||||||
|
|
||||||
Guide for Kaggle Higgs Challenge
|
Guide for Kaggle Higgs Challenge
|
||||||
=====
|
=====
|
||||||
|
|
||||||
|
|||||||
@ -10,7 +10,7 @@ label = train[:,32]
|
|||||||
data = train[:,1:31]
|
data = train[:,1:31]
|
||||||
weight = train[:,31]
|
weight = train[:,31]
|
||||||
dtrain = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
|
dtrain = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
|
||||||
param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4, 'updater':'grow_histmaker,prune'}
|
param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4}
|
||||||
num_round = 120
|
num_round = 120
|
||||||
|
|
||||||
print ('running cross validation, with preprocessing function')
|
print ('running cross validation, with preprocessing function')
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import numpy as np
|
|||||||
# add path of xgboost python module
|
# add path of xgboost python module
|
||||||
sys.path.append('../../wrapper/')
|
sys.path.append('../../wrapper/')
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
from sklearn.ensemble import GradientBoostingClassifier
|
||||||
import time
|
import time
|
||||||
test_size = 550000
|
test_size = 550000
|
||||||
|
|
||||||
@ -37,30 +38,29 @@ param['objective'] = 'binary:logitraw'
|
|||||||
param['scale_pos_weight'] = sum_wneg/sum_wpos
|
param['scale_pos_weight'] = sum_wneg/sum_wpos
|
||||||
param['bst:eta'] = 0.1
|
param['bst:eta'] = 0.1
|
||||||
param['bst:max_depth'] = 6
|
param['bst:max_depth'] = 6
|
||||||
#param['eval_metric'] = 'auc'
|
param['eval_metric'] = 'auc'
|
||||||
param['silent'] = 1
|
param['silent'] = 1
|
||||||
param['updater'] = sys.argv[1]
|
|
||||||
param['nthread'] = 4
|
param['nthread'] = 4
|
||||||
|
|
||||||
#plst = param.items()+[('eval_metric', 'ams@0.15')]
|
plst = param.items()+[('eval_metric', 'ams@0.15')]
|
||||||
|
|
||||||
watchlist = [ (xgmat,'train') ]
|
watchlist = [ (xgmat,'train') ]
|
||||||
# boost 10 tres
|
# boost 10 tres
|
||||||
num_round = 10
|
num_round = 10
|
||||||
print ('loading data end, start to boost trees')
|
print ('loading data end, start to boost trees')
|
||||||
print ("training GBM from sklearn")
|
print ("training GBM from sklearn")
|
||||||
#tmp = time.time()
|
tmp = time.time()
|
||||||
#gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)
|
gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)
|
||||||
#gbm.fit(data, label)
|
gbm.fit(data, label)
|
||||||
#print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp))
|
print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp))
|
||||||
#raw_input()
|
#raw_input()
|
||||||
print ("training xgboost")
|
print ("training xgboost")
|
||||||
threads = [1, 2, 4, 16]
|
threads = [1, 2, 4, 16]
|
||||||
for i in threads:
|
for i in threads:
|
||||||
param['nthread'] = i
|
param['nthread'] = i
|
||||||
tmp = time.time()
|
tmp = time.time()
|
||||||
#plst = param.items()+[('eval_metric', 'ams@0.15')]
|
plst = param.items()+[('eval_metric', 'ams@0.15')]
|
||||||
bst = xgb.train( param, xgmat, num_round, watchlist );
|
bst = xgb.train( plst, xgmat, num_round, watchlist );
|
||||||
print ("XGBoost with %d thread costs: %s seconds" % (i, str(time.time() - tmp)))
|
print ("XGBoost with %d thread costs: %s seconds" % (i, str(time.time() - tmp)))
|
||||||
|
|
||||||
print ('finish training')
|
print ('finish training')
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user