Update demo scripts to use installed python library
This commit is contained in:
parent
ceb62e9231
commit
a0e07f16c4
@ -1,17 +1,16 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
|
|
||||||
def loadfmap( fname ):
|
def loadfmap( fname ):
|
||||||
fmap = {}
|
fmap = {}
|
||||||
nmap = {}
|
nmap = {}
|
||||||
|
|
||||||
for l in open( fname ):
|
for l in open( fname ):
|
||||||
arr = l.split()
|
arr = l.split()
|
||||||
if arr[0].find('.') != -1:
|
if arr[0].find('.') != -1:
|
||||||
idx = int( arr[0].strip('.') )
|
idx = int( arr[0].strip('.') )
|
||||||
assert idx not in fmap
|
assert idx not in fmap
|
||||||
fmap[ idx ] = {}
|
fmap[ idx ] = {}
|
||||||
ftype = arr[1].strip(':')
|
ftype = arr[1].strip(':')
|
||||||
content = arr[2]
|
content = arr[2]
|
||||||
else:
|
else:
|
||||||
content = arr[0]
|
content = arr[0]
|
||||||
@ -23,7 +22,7 @@ def loadfmap( fname ):
|
|||||||
nmap[ len(nmap) ] = ftype+'='+k
|
nmap[ len(nmap) ] = ftype+'='+k
|
||||||
return fmap, nmap
|
return fmap, nmap
|
||||||
|
|
||||||
def write_nmap( fo, nmap ):
|
def write_nmap( fo, nmap ):
|
||||||
for i in range( len(nmap) ):
|
for i in range( len(nmap) ):
|
||||||
fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
|
fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
|
||||||
|
|
||||||
@ -33,7 +32,7 @@ fo = open( 'featmap.txt', 'w' )
|
|||||||
write_nmap( fo, nmap )
|
write_nmap( fo, nmap )
|
||||||
fo.close()
|
fo.close()
|
||||||
|
|
||||||
fo = open( 'agaricus.txt', 'w' )
|
fo = open( 'agaricus.txt', 'w' )
|
||||||
for l in open( 'agaricus-lepiota.data' ):
|
for l in open( 'agaricus-lepiota.data' ):
|
||||||
arr = l.split(',')
|
arr = l.split(',')
|
||||||
if arr[0] == 'p':
|
if arr[0] == 'p':
|
||||||
@ -47,4 +46,4 @@ for l in open( 'agaricus-lepiota.data' ):
|
|||||||
|
|
||||||
fo.close()
|
fo.close()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,10 +1,6 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.sparse
|
import scipy.sparse
|
||||||
# append the path to xgboost, you may need to change the following line
|
|
||||||
# alternatively, you can add the path to PYTHONPATH environment variable
|
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
### simple example
|
### simple example
|
||||||
@ -33,7 +29,7 @@ bst.dump_model('dump.nice.txt','../data/featmap.txt')
|
|||||||
# save dmatrix into binary buffer
|
# save dmatrix into binary buffer
|
||||||
dtest.save_binary('dtest.buffer')
|
dtest.save_binary('dtest.buffer')
|
||||||
bst.save_model('xgb.model')
|
bst.save_model('xgb.model')
|
||||||
# load model and data in
|
# load model and data in
|
||||||
bst2 = xgb.Booster(model_file='xgb.model')
|
bst2 = xgb.Booster(model_file='xgb.model')
|
||||||
dtest2 = xgb.DMatrix('dtest.buffer')
|
dtest2 = xgb.DMatrix('dtest.buffer')
|
||||||
preds2 = bst2.predict(dtest2)
|
preds2 = bst2.predict(dtest2)
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
### load data in do training
|
### load data in do training
|
||||||
@ -56,7 +54,7 @@ def evalerror(preds, dtrain):
|
|||||||
labels = dtrain.get_label()
|
labels = dtrain.get_label()
|
||||||
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
||||||
|
|
||||||
param = {'max_depth':2, 'eta':1, 'silent':1}
|
param = {'max_depth':2, 'eta':1, 'silent':1}
|
||||||
# train with customized objective
|
# train with customized objective
|
||||||
xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
|
xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
|
||||||
obj = logregobj, feval=evalerror)
|
obj = logregobj, feval=evalerror)
|
||||||
|
|||||||
@ -1,11 +1,9 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
###
|
###
|
||||||
# advanced: cutomsized loss function
|
# advanced: cutomsized loss function
|
||||||
#
|
#
|
||||||
print ('start running example to used cutomized objective function')
|
print ('start running example to used cutomized objective function')
|
||||||
|
|
||||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
||||||
|
|||||||
@ -1,6 +1,4 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
##
|
##
|
||||||
# this script demonstrate how to fit generalized linear model in xgboost
|
# this script demonstrate how to fit generalized linear model in xgboost
|
||||||
@ -9,17 +7,17 @@ import xgboost as xgb
|
|||||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
||||||
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
||||||
# change booster to gblinear, so that we are fitting a linear model
|
# change booster to gblinear, so that we are fitting a linear model
|
||||||
# alpha is the L1 regularizer
|
# alpha is the L1 regularizer
|
||||||
# lambda is the L2 regularizer
|
# lambda is the L2 regularizer
|
||||||
# you can also set lambda_bias which is L2 regularizer on the bias term
|
# you can also set lambda_bias which is L2 regularizer on the bias term
|
||||||
param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
|
param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
|
||||||
'alpha': 0.0001, 'lambda': 1 }
|
'alpha': 0.0001, 'lambda': 1 }
|
||||||
|
|
||||||
# normally, you do not need to set eta (step_size)
|
# normally, you do not need to set eta (step_size)
|
||||||
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
|
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
|
||||||
# there could be affection on convergence with parallelization on certain cases
|
# there could be affection on convergence with parallelization on certain cases
|
||||||
# setting eta to be smaller value, e.g 0.5 can make the optimization more stable
|
# setting eta to be smaller value, e.g 0.5 can make the optimization more stable
|
||||||
# param['eta'] = 1
|
# param['eta'] = 1
|
||||||
|
|
||||||
##
|
##
|
||||||
# the rest of settings are the same
|
# the rest of settings are the same
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
### load data in do training
|
### load data in do training
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
### load data in do training
|
### load data in do training
|
||||||
|
|||||||
@ -4,8 +4,6 @@ Created on 1 Apr 2015
|
|||||||
@author: Jamie Hall
|
@author: Jamie Hall
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import sys
|
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
### load data in do training
|
### load data in do training
|
||||||
|
|||||||
@ -1,14 +1,6 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
# this is the example script to use xgboost to train
|
# this is the example script to use xgboost to train
|
||||||
import inspect
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
# add path of xgboost python module
|
|
||||||
code_path = os.path.join(
|
|
||||||
os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../wrapper")
|
|
||||||
|
|
||||||
sys.path.append(code_path)
|
|
||||||
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
@ -29,7 +21,7 @@ weight = dtrain[:,31] * float(test_size) / len(label)
|
|||||||
sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0 )
|
sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0 )
|
||||||
sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0 )
|
sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0 )
|
||||||
|
|
||||||
# print weight statistics
|
# print weight statistics
|
||||||
print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
|
print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
|
||||||
|
|
||||||
# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
|
# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
|
||||||
@ -42,13 +34,13 @@ param = {}
|
|||||||
param['objective'] = 'binary:logitraw'
|
param['objective'] = 'binary:logitraw'
|
||||||
# scale weight of positive examples
|
# scale weight of positive examples
|
||||||
param['scale_pos_weight'] = sum_wneg/sum_wpos
|
param['scale_pos_weight'] = sum_wneg/sum_wpos
|
||||||
param['eta'] = 0.1
|
param['eta'] = 0.1
|
||||||
param['max_depth'] = 6
|
param['max_depth'] = 6
|
||||||
param['eval_metric'] = 'auc'
|
param['eval_metric'] = 'auc'
|
||||||
param['silent'] = 1
|
param['silent'] = 1
|
||||||
param['nthread'] = 16
|
param['nthread'] = 16
|
||||||
|
|
||||||
# you can directly throw param in, though we want to watch multiple metrics here
|
# you can directly throw param in, though we want to watch multiple metrics here
|
||||||
plst = list(param.items())+[('eval_metric', 'ams@0.15')]
|
plst = list(param.items())+[('eval_metric', 'ams@0.15')]
|
||||||
|
|
||||||
watchlist = [ (xgmat,'train') ]
|
watchlist = [ (xgmat,'train') ]
|
||||||
|
|||||||
@ -1,9 +1,6 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
# make prediction
|
# make prediction
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
# add path of xgboost python module
|
|
||||||
sys.path.append('../../wrapper/')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
# path to where the data lies
|
# path to where the data lies
|
||||||
@ -11,7 +8,7 @@ dpath = 'data'
|
|||||||
|
|
||||||
modelfile = 'higgs.model'
|
modelfile = 'higgs.model'
|
||||||
outfile = 'higgs.pred.csv'
|
outfile = 'higgs.pred.csv'
|
||||||
# make top 15% as positive
|
# make top 15% as positive
|
||||||
threshold_ratio = 0.15
|
threshold_ratio = 0.15
|
||||||
|
|
||||||
# load in training data, directly use numpy
|
# load in training data, directly use numpy
|
||||||
@ -24,7 +21,7 @@ xgmat = xgb.DMatrix( data, missing = -999.0 )
|
|||||||
bst = xgb.Booster({'nthread':16}, model_file = modelfile)
|
bst = xgb.Booster({'nthread':16}, model_file = modelfile)
|
||||||
ypred = bst.predict( xgmat )
|
ypred = bst.predict( xgmat )
|
||||||
|
|
||||||
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
|
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
|
||||||
|
|
||||||
rorder = {}
|
rorder = {}
|
||||||
for k, v in sorted( res, key = lambda x:-x[1] ):
|
for k, v in sorted( res, key = lambda x:-x[1] ):
|
||||||
@ -36,12 +33,12 @@ fo = open(outfile, 'w')
|
|||||||
nhit = 0
|
nhit = 0
|
||||||
ntot = 0
|
ntot = 0
|
||||||
fo.write('EventId,RankOrder,Class\n')
|
fo.write('EventId,RankOrder,Class\n')
|
||||||
for k, v in res:
|
for k, v in res:
|
||||||
if rorder[k] <= ntop:
|
if rorder[k] <= ntop:
|
||||||
lb = 's'
|
lb = 's'
|
||||||
nhit += 1
|
nhit += 1
|
||||||
else:
|
else:
|
||||||
lb = 'b'
|
lb = 'b'
|
||||||
# change output rank order to follow Kaggle convention
|
# change output rank order to follow Kaggle convention
|
||||||
fo.write('%s,%d,%s\n' % ( k, len(rorder)+1-rorder[k], lb ) )
|
fo.write('%s,%d,%s\n' % ( k, len(rorder)+1-rorder[k], lb ) )
|
||||||
ntot += 1
|
ntot += 1
|
||||||
|
|||||||
@ -1,9 +1,6 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
# this is the example script to use xgboost to train
|
# this is the example script to use xgboost to train
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
# add path of xgboost python module
|
|
||||||
sys.path.append('../../wrapper/')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
from sklearn.ensemble import GradientBoostingClassifier
|
from sklearn.ensemble import GradientBoostingClassifier
|
||||||
import time
|
import time
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
#! /usr/bin/python
|
#! /usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
sys.path.append('../../wrapper/')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
# label need to be 0 to num_class -1
|
# label need to be 0 to num_class -1
|
||||||
|
|||||||
@ -1,7 +1,6 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
|
|
||||||
fo = open( 'machine.txt', 'w' )
|
fo = open( 'machine.txt', 'w' )
|
||||||
cnt = 6
|
cnt = 6
|
||||||
fmap = {}
|
fmap = {}
|
||||||
for l in open( 'machine.data' ):
|
for l in open( 'machine.data' ):
|
||||||
@ -9,12 +8,12 @@ for l in open( 'machine.data' ):
|
|||||||
fo.write(arr[8])
|
fo.write(arr[8])
|
||||||
for i in range( 0,6 ):
|
for i in range( 0,6 ):
|
||||||
fo.write( ' %d:%s' %(i,arr[i+2]) )
|
fo.write( ' %d:%s' %(i,arr[i+2]) )
|
||||||
|
|
||||||
if arr[0] not in fmap:
|
if arr[0] not in fmap:
|
||||||
fmap[arr[0]] = cnt
|
fmap[arr[0]] = cnt
|
||||||
cnt += 1
|
cnt += 1
|
||||||
|
|
||||||
fo.write( ' %d:1' % fmap[arr[0]] )
|
fo.write( ' %d:1' % fmap[arr[0]] )
|
||||||
fo.write('\n')
|
fo.write('\n')
|
||||||
|
|
||||||
fo.close()
|
fo.close()
|
||||||
@ -22,7 +21,7 @@ fo.close()
|
|||||||
# create feature map for machine data
|
# create feature map for machine data
|
||||||
fo = open('featmap.txt', 'w')
|
fo = open('featmap.txt', 'w')
|
||||||
# list from machine.names
|
# list from machine.names
|
||||||
names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
|
names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
|
||||||
|
|
||||||
for i in range(0,6):
|
for i in range(0,6):
|
||||||
fo.write( '%d\t%s\tint\n' % (i, names[i+1]))
|
fo.write( '%d\t%s\tint\n' % (i, names[i+1]))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user