Compatibility with both Python 2(.7) and 3
This commit is contained in:
parent
991634a58e
commit
93d83ca077
@ -24,7 +24,7 @@ def loadfmap( fname ):
|
|||||||
return fmap, nmap
|
return fmap, nmap
|
||||||
|
|
||||||
def write_nmap( fo, nmap ):
|
def write_nmap( fo, nmap ):
|
||||||
for i in xrange( len(nmap) ):
|
for i in range( len(nmap) ):
|
||||||
fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
|
fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
|
||||||
|
|
||||||
# start here
|
# start here
|
||||||
@ -41,7 +41,7 @@ for l in open( 'agaricus-lepiota.data' ):
|
|||||||
else:
|
else:
|
||||||
assert arr[0] == 'e'
|
assert arr[0] == 'e'
|
||||||
fo.write('0')
|
fo.write('0')
|
||||||
for i in xrange( 1,len(arr) ):
|
for i in range( 1,len(arr) ):
|
||||||
fo.write( ' %d:1' % fmap[i][arr[i].strip()] )
|
fo.write( ' %d:1' % fmap[i][arr[i].strip()] )
|
||||||
fo.write('\n')
|
fo.write('\n')
|
||||||
|
|
||||||
|
|||||||
@ -3,7 +3,7 @@ import sys
|
|||||||
import random
|
import random
|
||||||
|
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
print 'Usage:<filename> <k> [nfold = 5]'
|
print ('Usage:<filename> <k> [nfold = 5]')
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
random.seed( 10 )
|
random.seed( 10 )
|
||||||
|
|||||||
@ -1,9 +1,15 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
# this is the example script to use xgboost to train
|
# this is the example script to use xgboost to train
|
||||||
|
import inspect
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
import numpy as np
|
import numpy as np
|
||||||
# add path of xgboost python module
|
# add path of xgboost python module
|
||||||
sys.path.append('../../python/')
|
code_path = os.path.join(
|
||||||
|
os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../python")
|
||||||
|
|
||||||
|
sys.path.append(code_path)
|
||||||
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
test_size = 550000
|
test_size = 550000
|
||||||
@ -12,19 +18,19 @@ test_size = 550000
|
|||||||
dpath = 'data'
|
dpath = 'data'
|
||||||
|
|
||||||
# load in training data, directly use numpy
|
# load in training data, directly use numpy
|
||||||
dtrain = np.loadtxt( dpath+'/training.csv', delimiter=',', skiprows=1, converters={32: lambda x:int(x=='s') } )
|
dtrain = np.loadtxt( dpath+'/training.csv', delimiter=',', skiprows=1, converters={32: lambda x:int(x=='s'.encode('utf-8')) } )
|
||||||
print 'finish loading from csv '
|
print ('finish loading from csv ')
|
||||||
|
|
||||||
label = dtrain[:,32]
|
label = dtrain[:,32]
|
||||||
data = dtrain[:,1:31]
|
data = dtrain[:,1:31]
|
||||||
# rescale weight to make it same as test set
|
# rescale weight to make it same as test set
|
||||||
weight = dtrain[:,31] * float(test_size) / len(label)
|
weight = dtrain[:,31] * float(test_size) / len(label)
|
||||||
|
|
||||||
sum_wpos = sum( weight[i] for i in xrange(len(label)) if label[i] == 1.0 )
|
sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0 )
|
||||||
sum_wneg = sum( weight[i] for i in xrange(len(label)) if label[i] == 0.0 )
|
sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0 )
|
||||||
|
|
||||||
# print weight statistics
|
# print weight statistics
|
||||||
print 'weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos )
|
print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
|
||||||
|
|
||||||
# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
|
# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
|
||||||
xgmat = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
|
xgmat = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
|
||||||
@ -43,14 +49,14 @@ param['silent'] = 1
|
|||||||
param['nthread'] = 16
|
param['nthread'] = 16
|
||||||
|
|
||||||
# you can directly throw param in, though we want to watch multiple metrics here
|
# you can directly throw param in, though we want to watch multiple metrics here
|
||||||
plst = param.items()+[('eval_metric', 'ams@0.15')]
|
plst = list(param.items())+[('eval_metric', 'ams@0.15')]
|
||||||
|
|
||||||
watchlist = [ (xgmat,'train') ]
|
watchlist = [ (xgmat,'train') ]
|
||||||
# boost 120 tres
|
# boost 120 tres
|
||||||
num_round = 120
|
num_round = 120
|
||||||
print 'loading data end, start to boost trees'
|
print ('loading data end, start to boost trees')
|
||||||
bst = xgb.train( plst, xgmat, num_round, watchlist );
|
bst = xgb.train( plst, xgmat, num_round, watchlist );
|
||||||
# save out model
|
# save out model
|
||||||
bst.save_model('higgs.model')
|
bst.save_model('higgs.model')
|
||||||
|
|
||||||
print 'finish training'
|
print ('finish training')
|
||||||
|
|||||||
@ -19,13 +19,13 @@ dtest = np.loadtxt( dpath+'/test.csv', delimiter=',', skiprows=1 )
|
|||||||
data = dtest[:,1:31]
|
data = dtest[:,1:31]
|
||||||
idx = dtest[:,0]
|
idx = dtest[:,0]
|
||||||
|
|
||||||
print 'finish loading from csv '
|
print ('finish loading from csv ')
|
||||||
xgmat = xgb.DMatrix( data, missing = -999.0 )
|
xgmat = xgb.DMatrix( data, missing = -999.0 )
|
||||||
bst = xgb.Booster({'nthread':16})
|
bst = xgb.Booster({'nthread':16})
|
||||||
bst.load_model( modelfile )
|
bst.load_model( modelfile )
|
||||||
ypred = bst.predict( xgmat )
|
ypred = bst.predict( xgmat )
|
||||||
|
|
||||||
res = [ ( int(idx[i]), ypred[i] ) for i in xrange(len(ypred)) ]
|
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
|
||||||
|
|
||||||
rorder = {}
|
rorder = {}
|
||||||
for k, v in sorted( res, key = lambda x:-x[1] ):
|
for k, v in sorted( res, key = lambda x:-x[1] ):
|
||||||
@ -47,7 +47,7 @@ for k, v in res:
|
|||||||
ntot += 1
|
ntot += 1
|
||||||
fo.close()
|
fo.close()
|
||||||
|
|
||||||
print 'finished writing into prediction file'
|
print ('finished writing into prediction file')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,14 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
python higgs-numpy.py
|
python -u higgs-numpy.py
|
||||||
python higgs-pred.py
|
ret=$?
|
||||||
|
if [[ $ret != 0 ]]; then
|
||||||
|
echo "ERROR in higgs-numpy.py"
|
||||||
|
exit $ret
|
||||||
|
fi
|
||||||
|
python -u higgs-pred.py
|
||||||
|
ret=$?
|
||||||
|
if [[ $ret != 0 ]]; then
|
||||||
|
echo "ERROR in higgs-pred.py"
|
||||||
|
exit $ret
|
||||||
|
fi
|
||||||
|
|||||||
@ -14,18 +14,18 @@ dpath = 'data'
|
|||||||
|
|
||||||
# load in training data, directly use numpy
|
# load in training data, directly use numpy
|
||||||
dtrain = np.loadtxt( dpath+'/training.csv', delimiter=',', skiprows=1, converters={32: lambda x:int(x=='s') } )
|
dtrain = np.loadtxt( dpath+'/training.csv', delimiter=',', skiprows=1, converters={32: lambda x:int(x=='s') } )
|
||||||
print 'finish loading from csv '
|
print ('finish loading from csv ')
|
||||||
|
|
||||||
label = dtrain[:,32]
|
label = dtrain[:,32]
|
||||||
data = dtrain[:,1:31]
|
data = dtrain[:,1:31]
|
||||||
# rescale weight to make it same as test set
|
# rescale weight to make it same as test set
|
||||||
weight = dtrain[:,31] * float(test_size) / len(label)
|
weight = dtrain[:,31] * float(test_size) / len(label)
|
||||||
|
|
||||||
sum_wpos = sum( weight[i] for i in xrange(len(label)) if label[i] == 1.0 )
|
sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0 )
|
||||||
sum_wneg = sum( weight[i] for i in xrange(len(label)) if label[i] == 0.0 )
|
sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0 )
|
||||||
|
|
||||||
# print weight statistics
|
# print weight statistics
|
||||||
print 'weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos )
|
print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
|
||||||
|
|
||||||
# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
|
# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
|
||||||
xgmat = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
|
xgmat = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
|
||||||
@ -47,20 +47,20 @@ plst = param.items()+[('eval_metric', 'ams@0.15')]
|
|||||||
watchlist = [ (xgmat,'train') ]
|
watchlist = [ (xgmat,'train') ]
|
||||||
# boost 10 tres
|
# boost 10 tres
|
||||||
num_round = 10
|
num_round = 10
|
||||||
print 'loading data end, start to boost trees'
|
print ('loading data end, start to boost trees')
|
||||||
print "training GBM from sklearn"
|
print ("training GBM from sklearn")
|
||||||
tmp = time.time()
|
tmp = time.time()
|
||||||
gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)
|
gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)
|
||||||
gbm.fit(data, label)
|
gbm.fit(data, label)
|
||||||
print "sklearn.GBM costs: %s seconds" % str(time.time() - tmp)
|
print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp))
|
||||||
#raw_input()
|
#raw_input()
|
||||||
print "training xgboost"
|
print ("training xgboost")
|
||||||
threads = [1, 2, 4, 16]
|
threads = [1, 2, 4, 16]
|
||||||
for i in threads:
|
for i in threads:
|
||||||
param['nthread'] = i
|
param['nthread'] = i
|
||||||
tmp = time.time()
|
tmp = time.time()
|
||||||
plst = param.items()+[('eval_metric', 'ams@0.15')]
|
plst = param.items()+[('eval_metric', 'ams@0.15')]
|
||||||
bst = xgb.train( plst, xgmat, num_round, watchlist );
|
bst = xgb.train( plst, xgmat, num_round, watchlist );
|
||||||
print "XGBoost with %d thread costs: %s seconds" % (i, str(time.time() - tmp))
|
print ("XGBoost with %d thread costs: %s seconds" % (i, str(time.time() - tmp)))
|
||||||
|
|
||||||
print 'finish training'
|
print ('finish training')
|
||||||
|
|||||||
@ -37,6 +37,6 @@ bst = xgb.train(param, xg_train, num_round, watchlist );
|
|||||||
# get prediction
|
# get prediction
|
||||||
pred = bst.predict( xg_test );
|
pred = bst.predict( xg_test );
|
||||||
|
|
||||||
print 'predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in xrange(len(test_Y))) / float(len(test_Y)) )
|
print ('predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) ))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -12,7 +12,7 @@ def save_data(group_data,output_feature,output_group):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) != 4:
|
if len(sys.argv) != 4:
|
||||||
print "Usage: python trans_data.py [Ranksvm Format Input] [Output Feature File] [Output Group File]"
|
print ("Usage: python trans_data.py [Ranksvm Format Input] [Output Feature File] [Output Group File]")
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
fi = open(sys.argv[1])
|
fi = open(sys.argv[1])
|
||||||
|
|||||||
@ -7,7 +7,7 @@ fmap = {}
|
|||||||
for l in open( 'machine.data' ):
|
for l in open( 'machine.data' ):
|
||||||
arr = l.split(',')
|
arr = l.split(',')
|
||||||
fo.write(arr[8])
|
fo.write(arr[8])
|
||||||
for i in xrange( 0,6 ):
|
for i in range( 0,6 ):
|
||||||
fo.write( ' %d:%s' %(i,arr[i+2]) )
|
fo.write( ' %d:%s' %(i,arr[i+2]) )
|
||||||
|
|
||||||
if arr[0] not in fmap:
|
if arr[0] not in fmap:
|
||||||
@ -24,9 +24,9 @@ fo = open('featmap.txt', 'w')
|
|||||||
# list from machine.names
|
# list from machine.names
|
||||||
names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
|
names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
|
||||||
|
|
||||||
for i in xrange(0,6):
|
for i in range(0,6):
|
||||||
fo.write( '%d\t%s\tint\n' % (i, names[i+1]))
|
fo.write( '%d\t%s\tint\n' % (i, names[i+1]))
|
||||||
|
|
||||||
for v, k in sorted( fmap.iteritems(), key = lambda x:x[1] ):
|
for v, k in sorted( fmap.items(), key = lambda x:x[1] ):
|
||||||
fo.write( '%d\tvendor=%s\ti\n' % (k, v))
|
fo.write( '%d\tvendor=%s\ti\n' % (k, v))
|
||||||
fo.close()
|
fo.close()
|
||||||
|
|||||||
@ -3,7 +3,7 @@ import sys
|
|||||||
import random
|
import random
|
||||||
|
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
print 'Usage:<filename> <k> [nfold = 5]'
|
print ('Usage:<filename> <k> [nfold = 5]')
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
random.seed( 10 )
|
random.seed( 10 )
|
||||||
|
|||||||
@ -22,7 +22,7 @@ bst = xgb.train( param, dtrain, num_round, evallist )
|
|||||||
# this is prediction
|
# this is prediction
|
||||||
preds = bst.predict( dtest )
|
preds = bst.predict( dtest )
|
||||||
labels = dtest.get_label()
|
labels = dtest.get_label()
|
||||||
print 'error=%f' % ( sum(1 for i in xrange(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds)))
|
print ('error=%f' % ( sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds))))
|
||||||
bst.save_model('0001.model')
|
bst.save_model('0001.model')
|
||||||
# dump model
|
# dump model
|
||||||
bst.dump_model('dump.raw.txt')
|
bst.dump_model('dump.raw.txt')
|
||||||
@ -32,7 +32,7 @@ bst.dump_model('dump.raw.txt','featmap.txt')
|
|||||||
###
|
###
|
||||||
# build dmatrix in python iteratively
|
# build dmatrix in python iteratively
|
||||||
#
|
#
|
||||||
print 'start running example of build DMatrix in python'
|
print ('start running example of build DMatrix in python')
|
||||||
dtrain = xgb.DMatrix()
|
dtrain = xgb.DMatrix()
|
||||||
labels = []
|
labels = []
|
||||||
for l in open('agaricus.txt.train'):
|
for l in open('agaricus.txt.train'):
|
||||||
@ -50,7 +50,7 @@ bst = xgb.train( param, dtrain, num_round, evallist )
|
|||||||
|
|
||||||
###
|
###
|
||||||
# build dmatrix from scipy.sparse
|
# build dmatrix from scipy.sparse
|
||||||
print 'start running example of build DMatrix from scipy.sparse'
|
print ('start running example of build DMatrix from scipy.sparse')
|
||||||
labels = []
|
labels = []
|
||||||
row = []; col = []; dat = []
|
row = []; col = []; dat = []
|
||||||
i = 0
|
i = 0
|
||||||
@ -68,7 +68,7 @@ dtrain.set_label(labels)
|
|||||||
evallist = [(dtest,'eval'), (dtrain,'train')]
|
evallist = [(dtest,'eval'), (dtrain,'train')]
|
||||||
bst = xgb.train( param, dtrain, num_round, evallist )
|
bst = xgb.train( param, dtrain, num_round, evallist )
|
||||||
|
|
||||||
print 'start running example of build DMatrix from numpy array'
|
print ('start running example of build DMatrix from numpy array')
|
||||||
# NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation,then convert to DMatrix
|
# NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation,then convert to DMatrix
|
||||||
npymat = csr.todense()
|
npymat = csr.todense()
|
||||||
dtrain = xgb.DMatrix( npymat )
|
dtrain = xgb.DMatrix( npymat )
|
||||||
@ -79,7 +79,7 @@ bst = xgb.train( param, dtrain, num_round, evallist )
|
|||||||
###
|
###
|
||||||
# advanced: cutomsized loss function, set loss_type to 0, so that predict get untransformed score
|
# advanced: cutomsized loss function, set loss_type to 0, so that predict get untransformed score
|
||||||
#
|
#
|
||||||
print 'start running example to used cutomized objective function'
|
print ('start running example to used cutomized objective function')
|
||||||
|
|
||||||
# note: set loss_type properly, loss_type=2 means the prediction will get logistic transformed
|
# note: set loss_type properly, loss_type=2 means the prediction will get logistic transformed
|
||||||
# in most case, we may want to set loss_type = 0, to get untransformed score to compute gradient
|
# in most case, we may want to set loss_type = 0, to get untransformed score to compute gradient
|
||||||
|
|||||||
@ -41,7 +41,7 @@ class DMatrix:
|
|||||||
if data == None:
|
if data == None:
|
||||||
return
|
return
|
||||||
if isinstance(data,str):
|
if isinstance(data,str):
|
||||||
xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data), 1)
|
xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data.encode('utf-8')), 1)
|
||||||
elif isinstance(data,scp.csr_matrix):
|
elif isinstance(data,scp.csr_matrix):
|
||||||
self.__init_from_csr(data)
|
self.__init_from_csr(data)
|
||||||
elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
|
elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
|
||||||
@ -51,7 +51,7 @@ class DMatrix:
|
|||||||
csr = scp.csr_matrix(data)
|
csr = scp.csr_matrix(data)
|
||||||
self.__init_from_csr(csr)
|
self.__init_from_csr(csr)
|
||||||
except:
|
except:
|
||||||
raise Exception, "can not intialize DMatrix from"+str(type(data))
|
raise Exception("can not intialize DMatrix from"+str(type(data)))
|
||||||
if label != None:
|
if label != None:
|
||||||
self.set_label(label)
|
self.set_label(label)
|
||||||
if weight !=None:
|
if weight !=None:
|
||||||
@ -76,10 +76,10 @@ class DMatrix:
|
|||||||
xglib.XGDMatrixFree(self.handle)
|
xglib.XGDMatrixFree(self.handle)
|
||||||
# load data from file
|
# load data from file
|
||||||
def load(self, fname, silent=True):
|
def load(self, fname, silent=True):
|
||||||
xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(fname), int(silent))
|
xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
|
||||||
# load data from file
|
# load data from file
|
||||||
def save_binary(self, fname, silent=True):
|
def save_binary(self, fname, silent=True):
|
||||||
xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname), int(silent))
|
xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
|
||||||
# set label of dmatrix
|
# set label of dmatrix
|
||||||
def set_label(self, label):
|
def set_label(self, label):
|
||||||
xglib.XGDMatrixSetLabel(self.handle, (ctypes.c_float*len(label))(*label), len(label) )
|
xglib.XGDMatrixSetLabel(self.handle, (ctypes.c_float*len(label))(*label), len(label) )
|
||||||
@ -111,7 +111,7 @@ class DMatrix:
|
|||||||
def __getitem__(self, ridx):
|
def __getitem__(self, ridx):
|
||||||
length = ctypes.c_ulong()
|
length = ctypes.c_ulong()
|
||||||
row = xglib.XGDMatrixGetRow(self.handle, ridx, ctypes.byref(length) );
|
row = xglib.XGDMatrixGetRow(self.handle, ridx, ctypes.byref(length) );
|
||||||
return [ (int(row[i].findex),row[i].fvalue) for i in xrange(length.value) ]
|
return [ (int(row[i].findex),row[i].fvalue) for i in range(length.value) ]
|
||||||
|
|
||||||
class Booster:
|
class Booster:
|
||||||
"""learner class """
|
"""learner class """
|
||||||
@ -126,13 +126,19 @@ class Booster:
|
|||||||
xglib.XGBoosterFree(self.handle)
|
xglib.XGBoosterFree(self.handle)
|
||||||
def set_param(self, params, pv=None):
|
def set_param(self, params, pv=None):
|
||||||
if isinstance(params,dict):
|
if isinstance(params,dict):
|
||||||
for k, v in params.iteritems():
|
for k, v in params.items():
|
||||||
xglib.XGBoosterSetParam( self.handle, ctypes.c_char_p(k), ctypes.c_char_p(str(v)) )
|
xglib.XGBoosterSetParam(
|
||||||
|
self.handle, ctypes.c_char_p(k.encode('utf-8')),
|
||||||
|
ctypes.c_char_p(str(v).encode('utf-8')))
|
||||||
elif isinstance(params,str) and pv != None:
|
elif isinstance(params,str) and pv != None:
|
||||||
xglib.XGBoosterSetParam( self.handle, ctypes.c_char_p(params), ctypes.c_char_p(str(pv)) )
|
xglib.XGBoosterSetParam(
|
||||||
|
self.handle, ctypes.c_char_p(params.encode('utf-8')),
|
||||||
|
ctypes.c_char_p(str(pv).encode('utf-8')) )
|
||||||
else:
|
else:
|
||||||
for k, v in params:
|
for k, v in params:
|
||||||
xglib.XGBoosterSetParam( self.handle, ctypes.c_char_p(k), ctypes.c_char_p(str(v)) )
|
xglib.XGBoosterSetParam(
|
||||||
|
self.handle, ctypes.c_char_p(k.encode('utf-8')),
|
||||||
|
ctypes.c_char_p(str(v).encode('utf-8')) )
|
||||||
def update(self, dtrain):
|
def update(self, dtrain):
|
||||||
""" update """
|
""" update """
|
||||||
assert isinstance(dtrain, DMatrix)
|
assert isinstance(dtrain, DMatrix)
|
||||||
@ -150,13 +156,15 @@ class Booster:
|
|||||||
assert isinstance(dtrain, DMatrix)
|
assert isinstance(dtrain, DMatrix)
|
||||||
if booster_index != None:
|
if booster_index != None:
|
||||||
self.set_param('interact:booster_index', str(booster_index))
|
self.set_param('interact:booster_index', str(booster_index))
|
||||||
xglib.XGBoosterUpdateInteract( self.handle, dtrain.handle, ctypes.c_char_p(str(action)) )
|
xglib.XGBoosterUpdateInteract(
|
||||||
|
self.handle, dtrain.handle, ctypes.c_char_p(str(action)) )
|
||||||
def eval_set(self, evals, it = 0):
|
def eval_set(self, evals, it = 0):
|
||||||
for d in evals:
|
for d in evals:
|
||||||
assert isinstance(d[0], DMatrix)
|
assert isinstance(d[0], DMatrix)
|
||||||
assert isinstance(d[1], str)
|
assert isinstance(d[1], str)
|
||||||
dmats = ( ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
|
dmats = ( ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
|
||||||
evnames = ( ctypes.c_char_p * len(evals) )(*[ ctypes.c_char_p(d[1]) for d in evals])
|
evnames = ( ctypes.c_char_p * len(evals) )(
|
||||||
|
*[ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
|
||||||
xglib.XGBoosterEvalOneIter( self.handle, it, dmats, evnames, len(evals) )
|
xglib.XGBoosterEvalOneIter( self.handle, it, dmats, evnames, len(evals) )
|
||||||
def eval(self, mat, name = 'eval', it = 0 ):
|
def eval(self, mat, name = 'eval', it = 0 ):
|
||||||
self.eval_set( [(mat,name)], it)
|
self.eval_set( [(mat,name)], it)
|
||||||
@ -166,25 +174,27 @@ class Booster:
|
|||||||
return ctypes2numpy( preds, length.value )
|
return ctypes2numpy( preds, length.value )
|
||||||
def save_model(self, fname):
|
def save_model(self, fname):
|
||||||
""" save model to file """
|
""" save model to file """
|
||||||
xglib.XGBoosterSaveModel( self.handle, ctypes.c_char_p(fname) )
|
xglib.XGBoosterSaveModel(self.handle, ctypes.c_char_p(fname.encode('utf-8')))
|
||||||
def load_model(self, fname):
|
def load_model(self, fname):
|
||||||
"""load model from file"""
|
"""load model from file"""
|
||||||
xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname) )
|
xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname.encode('utf-8')) )
|
||||||
def dump_model(self, fname, fmap=''):
|
def dump_model(self, fname, fmap=''):
|
||||||
"""dump model into text file"""
|
"""dump model into text file"""
|
||||||
xglib.XGBoosterDumpModel( self.handle, ctypes.c_char_p(fname), ctypes.c_char_p(fmap) )
|
xglib.XGBoosterDumpModel(
|
||||||
|
self.handle, ctypes.c_char_p(fname.encode('utf-8')),
|
||||||
|
ctypes.c_char_p(fmap.encode('utf-8')))
|
||||||
|
|
||||||
def train(params, dtrain, num_boost_round = 10, evals = [], obj=None):
|
def train(params, dtrain, num_boost_round = 10, evals = [], obj=None):
|
||||||
""" train a booster with given paramaters """
|
""" train a booster with given paramaters """
|
||||||
bst = Booster(params, [dtrain] )
|
bst = Booster(params, [dtrain] )
|
||||||
if obj == None:
|
if obj == None:
|
||||||
for i in xrange(num_boost_round):
|
for i in range(num_boost_round):
|
||||||
bst.update( dtrain )
|
bst.update( dtrain )
|
||||||
if len(evals) != 0:
|
if len(evals) != 0:
|
||||||
bst.eval_set( evals, i )
|
bst.eval_set( evals, i )
|
||||||
else:
|
else:
|
||||||
# try customized objective function
|
# try customized objective function
|
||||||
for i in xrange(num_boost_round):
|
for i in range(num_boost_round):
|
||||||
pred = bst.predict( dtrain )
|
pred = bst.predict( dtrain )
|
||||||
grad, hess = obj( pred, dtrain )
|
grad, hess = obj( pred, dtrain )
|
||||||
bst.boost( dtrain, grad, hess )
|
bst.boost( dtrain, grad, hess )
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user