54 lines
1.2 KiB
Python
Executable File
54 lines
1.2 KiB
Python
Executable File
#!/usr/bin/python
|
|
# make prediction
|
|
import sys
|
|
import numpy as np
|
|
# add path of xgboost python module
|
|
sys.path.append('../../wrapper/')
|
|
import xgboost as xgb
|
|
|
|
# path to where the data lies
|
|
dpath = 'data'
|
|
|
|
modelfile = 'higgs.model'
|
|
outfile = 'higgs.pred.csv'
|
|
# make top 15% as positive
|
|
threshold_ratio = 0.15
|
|
|
|
# load in training data, directly use numpy
|
|
dtest = np.loadtxt( dpath+'/test.csv', delimiter=',', skiprows=1 )
|
|
data = dtest[:,1:31]
|
|
idx = dtest[:,0]
|
|
|
|
print ('finish loading from csv ')
|
|
xgmat = xgb.DMatrix( data, missing = -999.0 )
|
|
bst = xgb.Booster({'nthread':16}, model_file = modelfile)
|
|
ypred = bst.predict( xgmat )
|
|
|
|
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
|
|
|
|
rorder = {}
|
|
for k, v in sorted( res, key = lambda x:-x[1] ):
|
|
rorder[ k ] = len(rorder) + 1
|
|
|
|
# write out predictions
|
|
ntop = int( threshold_ratio * len(rorder ) )
|
|
fo = open(outfile, 'w')
|
|
nhit = 0
|
|
ntot = 0
|
|
fo.write('EventId,RankOrder,Class\n')
|
|
for k, v in res:
|
|
if rorder[k] <= ntop:
|
|
lb = 's'
|
|
nhit += 1
|
|
else:
|
|
lb = 'b'
|
|
# change output rank order to follow Kaggle convention
|
|
fo.write('%s,%d,%s\n' % ( k, len(rorder)+1-rorder[k], lb ) )
|
|
ntot += 1
|
|
fo.close()
|
|
|
|
print ('finished writing into prediction file')
|
|
|
|
|
|
|