Merge branch 'unity' of ssh://github.com/tqchen/xgboost into unity

Conflicts: src/tree/updater_histmaker-inl.hpp
2014-11-19 09:55:05 -08:00 · 2014-11-19 09:55:05 -08:00 · dffcbc838b
commit dffcbc838b
parent fa1581b94c 32beb56ba3
3 changed files with 16 additions and 10 deletions
--- a/demo/kaggle-higgs/README.md
+++ b/demo/kaggle-higgs/README.md
@ -1,3 +1,9 @@
 Highlights
 =====
 Higgs challenge ends recently, xgboost is being used by many users. This list highlights the xgboost solutions of players
 * Blogpost by phunther: [Winning solution of Kaggle Higgs competition: what a single model can do](http://no2147483647.wordpress.com/2014/09/17/winning-solution-of-kaggle-higgs-competition-what-a-single-model-can-do/) 
 * The solution by Tianqi Chen and Tong He [Link](https://github.com/hetong007/higgsml)
 Guide for Kaggle Higgs Challenge
 =====
--- a/demo/kaggle-higgs/higgs-cv.py
+++ b/demo/kaggle-higgs/higgs-cv.py
@ -10,7 +10,7 @@ label  = train[:,32]
 data   = train[:,1:31]
 weight = train[:,31]
 dtrain = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
-param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4, 'updater':'grow_histmaker,prune'}
+param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4}
 num_round = 120
 print ('running cross validation, with preprocessing function')
--- a/demo/kaggle-higgs/speedtest.py
+++ b/demo/kaggle-higgs/speedtest.py
@ -5,6 +5,7 @@ import numpy as np
 # add path of xgboost python module
 sys.path.append('../../wrapper/')
 import xgboost as xgb
 from sklearn.ensemble import GradientBoostingClassifier
 import time
 test_size = 550000
@ -37,30 +38,29 @@ param['objective'] = 'binary:logitraw'
 param['scale_pos_weight'] = sum_wneg/sum_wpos
 param['bst:eta'] = 0.1
 param['bst:max_depth'] = 6
-#param['eval_metric'] = 'auc'
+param['eval_metric'] = 'auc'
 param['silent'] = 1
 param['updater'] = sys.argv[1]
 param['nthread'] = 4
-#plst = param.items()+[('eval_metric', 'ams@0.15')]
+plst = param.items()+[('eval_metric', 'ams@0.15')]
 watchlist = [ (xgmat,'train') ]
 # boost 10 tres
 num_round = 10
 print ('loading data end, start to boost trees')
 print ("training GBM from sklearn")
-#tmp = time.time()
+tmp = time.time()
-#gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)
+gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)
-#gbm.fit(data, label)
+gbm.fit(data, label)
-#print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp))
+print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp))
 #raw_input()
 print ("training xgboost")
 threads = [1, 2, 4, 16]
 for i in threads:
    param['nthread'] = i
    tmp = time.time()
-    #plst = param.items()+[('eval_metric', 'ams@0.15')]
+    plst = param.items()+[('eval_metric', 'ams@0.15')]
-    bst = xgb.train( param, xgmat, num_round, watchlist );
+    bst = xgb.train( plst, xgmat, num_round, watchlist );
    print ("XGBoost with %d thread costs: %s seconds" % (i, str(time.time() - tmp)))
 print ('finish training')