Merge branch 'unity' of ssh://github.com/tqchen/xgboost into unity

Conflicts: src/tree/updater_histmaker-inl.hpp
2014-11-19 09:55:05 -08:00
parent fa1581b94c 32beb56ba3
commit dffcbc838b
3 changed files with 16 additions and 10 deletions
--- a/demo/kaggle-higgs/README.md
+++ b/demo/kaggle-higgs/README.md
@@ -1,3 +1,9 @@
+Highlights
+=====
+Higgs challenge ends recently, xgboost is being used by many users. This list highlights the xgboost solutions of players
+* Blogpost by phunther: [Winning solution of Kaggle Higgs competition: what a single model can do](http://no2147483647.wordpress.com/2014/09/17/winning-solution-of-kaggle-higgs-competition-what-a-single-model-can-do/) 
+* The solution by Tianqi Chen and Tong He [Link](https://github.com/hetong007/higgsml)
+
 Guide for Kaggle Higgs Challenge
 =====

--- a/demo/kaggle-higgs/higgs-cv.py
+++ b/demo/kaggle-higgs/higgs-cv.py
@@ -10,7 +10,7 @@ label  = train[:,32]
 data   = train[:,1:31]
 weight = train[:,31]
 dtrain = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
-param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4, 'updater':'grow_histmaker,prune'}
+param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4}
 num_round = 120

 print ('running cross validation, with preprocessing function')
--- a/demo/kaggle-higgs/speedtest.py
+++ b/demo/kaggle-higgs/speedtest.py
@@ -5,6 +5,7 @@ import numpy as np
 # add path of xgboost python module
 sys.path.append('../../wrapper/')
 import xgboost as xgb
+from sklearn.ensemble import GradientBoostingClassifier
 import time
 test_size = 550000

@@ -37,30 +38,29 @@ param['objective'] = 'binary:logitraw'
 param['scale_pos_weight'] = sum_wneg/sum_wpos
 param['bst:eta'] = 0.1
 param['bst:max_depth'] = 6
-#param['eval_metric'] = 'auc'
+param['eval_metric'] = 'auc'
 param['silent'] = 1
-param['updater'] = sys.argv[1]
 param['nthread'] = 4

-#plst = param.items()+[('eval_metric', 'ams@0.15')]
+plst = param.items()+[('eval_metric', 'ams@0.15')]

 watchlist = [ (xgmat,'train') ]
 # boost 10 tres
 num_round = 10
 print ('loading data end, start to boost trees')
 print ("training GBM from sklearn")
-#tmp = time.time()
-#gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)
-#gbm.fit(data, label)
-#print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp))
+tmp = time.time()
+gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)
+gbm.fit(data, label)
+print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp))
 #raw_input()
 print ("training xgboost")
 threads = [1, 2, 4, 16]
 for i in threads:
    param['nthread'] = i
    tmp = time.time()
-    #plst = param.items()+[('eval_metric', 'ams@0.15')]
-    bst = xgb.train( param, xgmat, num_round, watchlist );
+    plst = param.items()+[('eval_metric', 'ams@0.15')]
+    bst = xgb.train( plst, xgmat, num_round, watchlist );
    print ("XGBoost with %d thread costs: %s seconds" % (i, str(time.time() - tmp)))

 print ('finish training')