This commit is contained in:
tqchen 2014-11-16 11:47:21 -08:00
parent 129fee64f3
commit 5061d55725
2 changed files with 10 additions and 10 deletions

View File

@ -5,7 +5,6 @@ import numpy as np
# add path of xgboost python module # add path of xgboost python module
sys.path.append('../../wrapper/') sys.path.append('../../wrapper/')
import xgboost as xgb import xgboost as xgb
from sklearn.ensemble import GradientBoostingClassifier
import time import time
test_size = 550000 test_size = 550000
@ -38,29 +37,30 @@ param['objective'] = 'binary:logitraw'
param['scale_pos_weight'] = sum_wneg/sum_wpos param['scale_pos_weight'] = sum_wneg/sum_wpos
param['bst:eta'] = 0.1 param['bst:eta'] = 0.1
param['bst:max_depth'] = 6 param['bst:max_depth'] = 6
param['eval_metric'] = 'auc' #param['eval_metric'] = 'auc'
param['silent'] = 1 param['silent'] = 1
param['updater'] = sys.argv[1]
param['nthread'] = 4 param['nthread'] = 4
plst = param.items()+[('eval_metric', 'ams@0.15')] #plst = param.items()+[('eval_metric', 'ams@0.15')]
watchlist = [ (xgmat,'train') ] watchlist = [ (xgmat,'train') ]
# boost 10 tres # boost 10 tres
num_round = 10 num_round = 10
print ('loading data end, start to boost trees') print ('loading data end, start to boost trees')
print ("training GBM from sklearn") print ("training GBM from sklearn")
tmp = time.time() #tmp = time.time()
gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2) #gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)
gbm.fit(data, label) #gbm.fit(data, label)
print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp)) #print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp))
#raw_input() #raw_input()
print ("training xgboost") print ("training xgboost")
threads = [1, 2, 4, 16] threads = [1, 2, 4, 16]
for i in threads: for i in threads:
param['nthread'] = i param['nthread'] = i
tmp = time.time() tmp = time.time()
plst = param.items()+[('eval_metric', 'ams@0.15')] #plst = param.items()+[('eval_metric', 'ams@0.15')]
bst = xgb.train( plst, xgmat, num_round, watchlist ); bst = xgb.train( param, xgmat, num_round, watchlist );
print ("XGBoost with %d thread costs: %s seconds" % (i, str(time.time() - tmp))) print ("XGBoost with %d thread costs: %s seconds" % (i, str(time.time() - tmp)))
print ('finish training') print ('finish training')

View File

@ -466,7 +466,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
if (a.size != 0) { if (a.size != 0) {
bst_float cpt = a.data[a.size - 1].value; bst_float cpt = a.data[a.size - 1].value;
// this must be bigger than last value in a scale // this must be bigger than last value in a scale
bst_float last = cpt + fabs(cpt); bst_float last = cpt + fabs(cpt) + rt_eps;
this->wspace.cut.push_back(last); this->wspace.cut.push_back(last);
} }
this->wspace.rptr.push_back(this->wspace.cut.size()); this->wspace.rptr.push_back(this->wspace.cut.size());