Update demo scripts to use installed python library

This commit is contained in:
Skipper Seabold
2015-04-08 14:22:54 -05:00
parent ceb62e9231
commit a0e07f16c4
15 changed files with 27 additions and 65 deletions

View File

@@ -1,7 +1,5 @@
#!/usr/bin/python
import sys
import numpy as np
sys.path.append('../../wrapper')
import xgboost as xgb
### load data in do training

View File

@@ -1,14 +1,6 @@
#!/usr/bin/python
# this is the example script to use xgboost to train
import inspect
import os
import sys
# this is the example script to use xgboost to train
import numpy as np
# add path of xgboost python module
code_path = os.path.join(
os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../wrapper")
sys.path.append(code_path)
import xgboost as xgb
@@ -29,7 +21,7 @@ weight = dtrain[:,31] * float(test_size) / len(label)
sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0 )
sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0 )
# print weight statistics
# print weight statistics
print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
@@ -42,13 +34,13 @@ param = {}
param['objective'] = 'binary:logitraw'
# scale weight of positive examples
param['scale_pos_weight'] = sum_wneg/sum_wpos
param['eta'] = 0.1
param['eta'] = 0.1
param['max_depth'] = 6
param['eval_metric'] = 'auc'
param['silent'] = 1
param['nthread'] = 16
# you can directly throw param in, though we want to watch multiple metrics here
# you can directly throw param in, though we want to watch multiple metrics here
plst = list(param.items())+[('eval_metric', 'ams@0.15')]
watchlist = [ (xgmat,'train') ]

View File

@@ -1,9 +1,6 @@
#!/usr/bin/python
# make prediction
import sys
# make prediction
import numpy as np
# add path of xgboost python module
sys.path.append('../../wrapper/')
import xgboost as xgb
# path to where the data lies
@@ -11,7 +8,7 @@ dpath = 'data'
modelfile = 'higgs.model'
outfile = 'higgs.pred.csv'
# make top 15% as positive
# make top 15% as positive
threshold_ratio = 0.15
# load in training data, directly use numpy
@@ -24,7 +21,7 @@ xgmat = xgb.DMatrix( data, missing = -999.0 )
bst = xgb.Booster({'nthread':16}, model_file = modelfile)
ypred = bst.predict( xgmat )
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
rorder = {}
for k, v in sorted( res, key = lambda x:-x[1] ):
@@ -36,12 +33,12 @@ fo = open(outfile, 'w')
nhit = 0
ntot = 0
fo.write('EventId,RankOrder,Class\n')
for k, v in res:
for k, v in res:
if rorder[k] <= ntop:
lb = 's'
nhit += 1
else:
lb = 'b'
lb = 'b'
# change output rank order to follow Kaggle convention
fo.write('%s,%d,%s\n' % ( k, len(rorder)+1-rorder[k], lb ) )
ntot += 1

View File

@@ -1,9 +1,6 @@
#!/usr/bin/python
# this is the example script to use xgboost to train
import sys
import numpy as np
# add path of xgboost python module
sys.path.append('../../wrapper/')
import xgboost as xgb
from sklearn.ensemble import GradientBoostingClassifier
import time