Update demo scripts to use installed python library
This commit is contained in:
@@ -1,7 +1,5 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
import numpy as np
|
||||
sys.path.append('../../wrapper')
|
||||
import xgboost as xgb
|
||||
|
||||
### load data in do training
|
||||
|
||||
@@ -1,14 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
# this is the example script to use xgboost to train
|
||||
import inspect
|
||||
import os
|
||||
import sys
|
||||
# this is the example script to use xgboost to train
|
||||
import numpy as np
|
||||
# add path of xgboost python module
|
||||
code_path = os.path.join(
|
||||
os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../wrapper")
|
||||
|
||||
sys.path.append(code_path)
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
@@ -29,7 +21,7 @@ weight = dtrain[:,31] * float(test_size) / len(label)
|
||||
sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0 )
|
||||
sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0 )
|
||||
|
||||
# print weight statistics
|
||||
# print weight statistics
|
||||
print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
|
||||
|
||||
# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
|
||||
@@ -42,13 +34,13 @@ param = {}
|
||||
param['objective'] = 'binary:logitraw'
|
||||
# scale weight of positive examples
|
||||
param['scale_pos_weight'] = sum_wneg/sum_wpos
|
||||
param['eta'] = 0.1
|
||||
param['eta'] = 0.1
|
||||
param['max_depth'] = 6
|
||||
param['eval_metric'] = 'auc'
|
||||
param['silent'] = 1
|
||||
param['nthread'] = 16
|
||||
|
||||
# you can directly throw param in, though we want to watch multiple metrics here
|
||||
# you can directly throw param in, though we want to watch multiple metrics here
|
||||
plst = list(param.items())+[('eval_metric', 'ams@0.15')]
|
||||
|
||||
watchlist = [ (xgmat,'train') ]
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
# make prediction
|
||||
import sys
|
||||
# make prediction
|
||||
import numpy as np
|
||||
# add path of xgboost python module
|
||||
sys.path.append('../../wrapper/')
|
||||
import xgboost as xgb
|
||||
|
||||
# path to where the data lies
|
||||
@@ -11,7 +8,7 @@ dpath = 'data'
|
||||
|
||||
modelfile = 'higgs.model'
|
||||
outfile = 'higgs.pred.csv'
|
||||
# make top 15% as positive
|
||||
# make top 15% as positive
|
||||
threshold_ratio = 0.15
|
||||
|
||||
# load in training data, directly use numpy
|
||||
@@ -24,7 +21,7 @@ xgmat = xgb.DMatrix( data, missing = -999.0 )
|
||||
bst = xgb.Booster({'nthread':16}, model_file = modelfile)
|
||||
ypred = bst.predict( xgmat )
|
||||
|
||||
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
|
||||
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
|
||||
|
||||
rorder = {}
|
||||
for k, v in sorted( res, key = lambda x:-x[1] ):
|
||||
@@ -36,12 +33,12 @@ fo = open(outfile, 'w')
|
||||
nhit = 0
|
||||
ntot = 0
|
||||
fo.write('EventId,RankOrder,Class\n')
|
||||
for k, v in res:
|
||||
for k, v in res:
|
||||
if rorder[k] <= ntop:
|
||||
lb = 's'
|
||||
nhit += 1
|
||||
else:
|
||||
lb = 'b'
|
||||
lb = 'b'
|
||||
# change output rank order to follow Kaggle convention
|
||||
fo.write('%s,%d,%s\n' % ( k, len(rorder)+1-rorder[k], lb ) )
|
||||
ntot += 1
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
# this is the example script to use xgboost to train
|
||||
import sys
|
||||
import numpy as np
|
||||
# add path of xgboost python module
|
||||
sys.path.append('../../wrapper/')
|
||||
import xgboost as xgb
|
||||
from sklearn.ensemble import GradientBoostingClassifier
|
||||
import time
|
||||
|
||||
Reference in New Issue
Block a user