Merge pull request #230 from jseabold/python-install
Make the Python wrappers installable without path munging
This commit is contained in:
commit
23c273173f
@ -1,17 +1,16 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
|
|
||||||
def loadfmap( fname ):
|
def loadfmap( fname ):
|
||||||
fmap = {}
|
fmap = {}
|
||||||
nmap = {}
|
nmap = {}
|
||||||
|
|
||||||
for l in open( fname ):
|
for l in open( fname ):
|
||||||
arr = l.split()
|
arr = l.split()
|
||||||
if arr[0].find('.') != -1:
|
if arr[0].find('.') != -1:
|
||||||
idx = int( arr[0].strip('.') )
|
idx = int( arr[0].strip('.') )
|
||||||
assert idx not in fmap
|
assert idx not in fmap
|
||||||
fmap[ idx ] = {}
|
fmap[ idx ] = {}
|
||||||
ftype = arr[1].strip(':')
|
ftype = arr[1].strip(':')
|
||||||
content = arr[2]
|
content = arr[2]
|
||||||
else:
|
else:
|
||||||
content = arr[0]
|
content = arr[0]
|
||||||
@ -23,7 +22,7 @@ def loadfmap( fname ):
|
|||||||
nmap[ len(nmap) ] = ftype+'='+k
|
nmap[ len(nmap) ] = ftype+'='+k
|
||||||
return fmap, nmap
|
return fmap, nmap
|
||||||
|
|
||||||
def write_nmap( fo, nmap ):
|
def write_nmap( fo, nmap ):
|
||||||
for i in range( len(nmap) ):
|
for i in range( len(nmap) ):
|
||||||
fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
|
fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
|
||||||
|
|
||||||
@ -33,7 +32,7 @@ fo = open( 'featmap.txt', 'w' )
|
|||||||
write_nmap( fo, nmap )
|
write_nmap( fo, nmap )
|
||||||
fo.close()
|
fo.close()
|
||||||
|
|
||||||
fo = open( 'agaricus.txt', 'w' )
|
fo = open( 'agaricus.txt', 'w' )
|
||||||
for l in open( 'agaricus-lepiota.data' ):
|
for l in open( 'agaricus-lepiota.data' ):
|
||||||
arr = l.split(',')
|
arr = l.split(',')
|
||||||
if arr[0] == 'p':
|
if arr[0] == 'p':
|
||||||
@ -47,4 +46,4 @@ for l in open( 'agaricus-lepiota.data' ):
|
|||||||
|
|
||||||
fo.close()
|
fo.close()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,10 +1,6 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.sparse
|
import scipy.sparse
|
||||||
# append the path to xgboost, you may need to change the following line
|
|
||||||
# alternatively, you can add the path to PYTHONPATH environment variable
|
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
### simple example
|
### simple example
|
||||||
@ -33,7 +29,7 @@ bst.dump_model('dump.nice.txt','../data/featmap.txt')
|
|||||||
# save dmatrix into binary buffer
|
# save dmatrix into binary buffer
|
||||||
dtest.save_binary('dtest.buffer')
|
dtest.save_binary('dtest.buffer')
|
||||||
bst.save_model('xgb.model')
|
bst.save_model('xgb.model')
|
||||||
# load model and data in
|
# load model and data in
|
||||||
bst2 = xgb.Booster(model_file='xgb.model')
|
bst2 = xgb.Booster(model_file='xgb.model')
|
||||||
dtest2 = xgb.DMatrix('dtest.buffer')
|
dtest2 = xgb.DMatrix('dtest.buffer')
|
||||||
preds2 = bst2.predict(dtest2)
|
preds2 = bst2.predict(dtest2)
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
### load data in do training
|
### load data in do training
|
||||||
@ -56,7 +54,7 @@ def evalerror(preds, dtrain):
|
|||||||
labels = dtrain.get_label()
|
labels = dtrain.get_label()
|
||||||
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
||||||
|
|
||||||
param = {'max_depth':2, 'eta':1, 'silent':1}
|
param = {'max_depth':2, 'eta':1, 'silent':1}
|
||||||
# train with customized objective
|
# train with customized objective
|
||||||
xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
|
xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
|
||||||
obj = logregobj, feval=evalerror)
|
obj = logregobj, feval=evalerror)
|
||||||
|
|||||||
@ -1,11 +1,9 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
###
|
###
|
||||||
# advanced: cutomsized loss function
|
# advanced: cutomsized loss function
|
||||||
#
|
#
|
||||||
print ('start running example to used cutomized objective function')
|
print ('start running example to used cutomized objective function')
|
||||||
|
|
||||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
||||||
|
|||||||
@ -1,6 +1,4 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
##
|
##
|
||||||
# this script demonstrate how to fit generalized linear model in xgboost
|
# this script demonstrate how to fit generalized linear model in xgboost
|
||||||
@ -9,17 +7,17 @@ import xgboost as xgb
|
|||||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
||||||
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
||||||
# change booster to gblinear, so that we are fitting a linear model
|
# change booster to gblinear, so that we are fitting a linear model
|
||||||
# alpha is the L1 regularizer
|
# alpha is the L1 regularizer
|
||||||
# lambda is the L2 regularizer
|
# lambda is the L2 regularizer
|
||||||
# you can also set lambda_bias which is L2 regularizer on the bias term
|
# you can also set lambda_bias which is L2 regularizer on the bias term
|
||||||
param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
|
param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
|
||||||
'alpha': 0.0001, 'lambda': 1 }
|
'alpha': 0.0001, 'lambda': 1 }
|
||||||
|
|
||||||
# normally, you do not need to set eta (step_size)
|
# normally, you do not need to set eta (step_size)
|
||||||
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
|
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
|
||||||
# there could be affection on convergence with parallelization on certain cases
|
# there could be affection on convergence with parallelization on certain cases
|
||||||
# setting eta to be smaller value, e.g 0.5 can make the optimization more stable
|
# setting eta to be smaller value, e.g 0.5 can make the optimization more stable
|
||||||
# param['eta'] = 1
|
# param['eta'] = 1
|
||||||
|
|
||||||
##
|
##
|
||||||
# the rest of settings are the same
|
# the rest of settings are the same
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
### load data in do training
|
### load data in do training
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
### load data in do training
|
### load data in do training
|
||||||
|
|||||||
@ -4,8 +4,6 @@ Created on 1 Apr 2015
|
|||||||
@author: Jamie Hall
|
@author: Jamie Hall
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import sys
|
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
sys.path.append('../../wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
### load data in do training
|
### load data in do training
|
||||||
|
|||||||
@ -1,14 +1,6 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
# this is the example script to use xgboost to train
|
# this is the example script to use xgboost to train
|
||||||
import inspect
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
# add path of xgboost python module
|
|
||||||
code_path = os.path.join(
|
|
||||||
os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../wrapper")
|
|
||||||
|
|
||||||
sys.path.append(code_path)
|
|
||||||
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
@ -29,7 +21,7 @@ weight = dtrain[:,31] * float(test_size) / len(label)
|
|||||||
sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0 )
|
sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0 )
|
||||||
sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0 )
|
sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0 )
|
||||||
|
|
||||||
# print weight statistics
|
# print weight statistics
|
||||||
print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
|
print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
|
||||||
|
|
||||||
# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
|
# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
|
||||||
@ -42,13 +34,13 @@ param = {}
|
|||||||
param['objective'] = 'binary:logitraw'
|
param['objective'] = 'binary:logitraw'
|
||||||
# scale weight of positive examples
|
# scale weight of positive examples
|
||||||
param['scale_pos_weight'] = sum_wneg/sum_wpos
|
param['scale_pos_weight'] = sum_wneg/sum_wpos
|
||||||
param['eta'] = 0.1
|
param['eta'] = 0.1
|
||||||
param['max_depth'] = 6
|
param['max_depth'] = 6
|
||||||
param['eval_metric'] = 'auc'
|
param['eval_metric'] = 'auc'
|
||||||
param['silent'] = 1
|
param['silent'] = 1
|
||||||
param['nthread'] = 16
|
param['nthread'] = 16
|
||||||
|
|
||||||
# you can directly throw param in, though we want to watch multiple metrics here
|
# you can directly throw param in, though we want to watch multiple metrics here
|
||||||
plst = list(param.items())+[('eval_metric', 'ams@0.15')]
|
plst = list(param.items())+[('eval_metric', 'ams@0.15')]
|
||||||
|
|
||||||
watchlist = [ (xgmat,'train') ]
|
watchlist = [ (xgmat,'train') ]
|
||||||
|
|||||||
@ -1,9 +1,6 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
# make prediction
|
# make prediction
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
# add path of xgboost python module
|
|
||||||
sys.path.append('../../wrapper/')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
# path to where the data lies
|
# path to where the data lies
|
||||||
@ -11,7 +8,7 @@ dpath = 'data'
|
|||||||
|
|
||||||
modelfile = 'higgs.model'
|
modelfile = 'higgs.model'
|
||||||
outfile = 'higgs.pred.csv'
|
outfile = 'higgs.pred.csv'
|
||||||
# make top 15% as positive
|
# make top 15% as positive
|
||||||
threshold_ratio = 0.15
|
threshold_ratio = 0.15
|
||||||
|
|
||||||
# load in training data, directly use numpy
|
# load in training data, directly use numpy
|
||||||
@ -24,7 +21,7 @@ xgmat = xgb.DMatrix( data, missing = -999.0 )
|
|||||||
bst = xgb.Booster({'nthread':16}, model_file = modelfile)
|
bst = xgb.Booster({'nthread':16}, model_file = modelfile)
|
||||||
ypred = bst.predict( xgmat )
|
ypred = bst.predict( xgmat )
|
||||||
|
|
||||||
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
|
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
|
||||||
|
|
||||||
rorder = {}
|
rorder = {}
|
||||||
for k, v in sorted( res, key = lambda x:-x[1] ):
|
for k, v in sorted( res, key = lambda x:-x[1] ):
|
||||||
@ -36,12 +33,12 @@ fo = open(outfile, 'w')
|
|||||||
nhit = 0
|
nhit = 0
|
||||||
ntot = 0
|
ntot = 0
|
||||||
fo.write('EventId,RankOrder,Class\n')
|
fo.write('EventId,RankOrder,Class\n')
|
||||||
for k, v in res:
|
for k, v in res:
|
||||||
if rorder[k] <= ntop:
|
if rorder[k] <= ntop:
|
||||||
lb = 's'
|
lb = 's'
|
||||||
nhit += 1
|
nhit += 1
|
||||||
else:
|
else:
|
||||||
lb = 'b'
|
lb = 'b'
|
||||||
# change output rank order to follow Kaggle convention
|
# change output rank order to follow Kaggle convention
|
||||||
fo.write('%s,%d,%s\n' % ( k, len(rorder)+1-rorder[k], lb ) )
|
fo.write('%s,%d,%s\n' % ( k, len(rorder)+1-rorder[k], lb ) )
|
||||||
ntot += 1
|
ntot += 1
|
||||||
|
|||||||
@ -1,9 +1,6 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
# this is the example script to use xgboost to train
|
# this is the example script to use xgboost to train
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
# add path of xgboost python module
|
|
||||||
sys.path.append('../../wrapper/')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
from sklearn.ensemble import GradientBoostingClassifier
|
from sklearn.ensemble import GradientBoostingClassifier
|
||||||
import time
|
import time
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
#! /usr/bin/python
|
#! /usr/bin/python
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
sys.path.append('../../wrapper/')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
# label need to be 0 to num_class -1
|
# label need to be 0 to num_class -1
|
||||||
|
|||||||
@ -1,7 +1,6 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
|
||||||
|
|
||||||
fo = open( 'machine.txt', 'w' )
|
fo = open( 'machine.txt', 'w' )
|
||||||
cnt = 6
|
cnt = 6
|
||||||
fmap = {}
|
fmap = {}
|
||||||
for l in open( 'machine.data' ):
|
for l in open( 'machine.data' ):
|
||||||
@ -9,12 +8,12 @@ for l in open( 'machine.data' ):
|
|||||||
fo.write(arr[8])
|
fo.write(arr[8])
|
||||||
for i in range( 0,6 ):
|
for i in range( 0,6 ):
|
||||||
fo.write( ' %d:%s' %(i,arr[i+2]) )
|
fo.write( ' %d:%s' %(i,arr[i+2]) )
|
||||||
|
|
||||||
if arr[0] not in fmap:
|
if arr[0] not in fmap:
|
||||||
fmap[arr[0]] = cnt
|
fmap[arr[0]] = cnt
|
||||||
cnt += 1
|
cnt += 1
|
||||||
|
|
||||||
fo.write( ' %d:1' % fmap[arr[0]] )
|
fo.write( ' %d:1' % fmap[arr[0]] )
|
||||||
fo.write('\n')
|
fo.write('\n')
|
||||||
|
|
||||||
fo.close()
|
fo.close()
|
||||||
@ -22,7 +21,7 @@ fo.close()
|
|||||||
# create feature map for machine data
|
# create feature map for machine data
|
||||||
fo = open('featmap.txt', 'w')
|
fo = open('featmap.txt', 'w')
|
||||||
# list from machine.names
|
# list from machine.names
|
||||||
names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
|
names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
|
||||||
|
|
||||||
for i in range(0,6):
|
for i in range(0,6):
|
||||||
fo.write( '%d\t%s\tint\n' % (i, names[i+1]))
|
fo.write( '%d\t%s\tint\n' % (i, names[i+1]))
|
||||||
|
|||||||
@ -10,13 +10,17 @@ This should give you xgboost.exe for CLI version and xgboost_wrapper.dll for pyt
|
|||||||
|
|
||||||
Use Python Module
|
Use Python Module
|
||||||
=====
|
=====
|
||||||
* After you build the dll, you can simply add the path to [../wrapper](../wrapper) to sys.path and import xgboost
|
* After you build the dll, you can install the Python package from the [../wrapper](../wrapper) folder
|
||||||
|
|
||||||
|
```
|
||||||
|
python setup.py install
|
||||||
|
```
|
||||||
|
|
||||||
|
And import it as usual
|
||||||
|
|
||||||
```
|
```
|
||||||
sys.path.append('path/to/xgboost/wrapper')
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
```
|
```
|
||||||
* Alternatively, you can add that path to system enviroment variable ```PYTHONPATH```
|
|
||||||
- Doing so allows you to import xgboost directly like other python packages
|
|
||||||
|
|
||||||
R Package
|
R Package
|
||||||
====
|
====
|
||||||
|
|||||||
@ -5,6 +5,7 @@ This folder provides wrapper of xgboost to other languages
|
|||||||
Python
|
Python
|
||||||
=====
|
=====
|
||||||
* To make the python module, type ```make``` in the root directory of project
|
* To make the python module, type ```make``` in the root directory of project
|
||||||
|
* Install with `python setup.py install` from this directory.
|
||||||
* Refer also to the walk through example in [demo folder](../demo/guide-python)
|
* Refer also to the walk through example in [demo folder](../demo/guide-python)
|
||||||
|
|
||||||
R
|
R
|
||||||
|
|||||||
0
wrapper/__init__.py
Normal file
0
wrapper/__init__.py
Normal file
28
wrapper/setup.py
Normal file
28
wrapper/setup.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from setuptools import setup
|
||||||
|
|
||||||
|
|
||||||
|
class XGBoostLibraryNotFound(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
if os.name == 'nt':
|
||||||
|
dll_path = os.path.join(cur_dir,
|
||||||
|
'../windows/x64/Release/xgboost_wrapper.dll')
|
||||||
|
else:
|
||||||
|
dll_path = os.path.join(cur_dir, 'libxgboostwrapper.so')
|
||||||
|
|
||||||
|
if not os.path.exists(dll_path):
|
||||||
|
raise XGBoostLibraryNotFound("XGBoost library not found. Did you run "
|
||||||
|
"../make?")
|
||||||
|
|
||||||
|
setup(name="xgboost",
|
||||||
|
version="0.32",
|
||||||
|
description="Python wrappers for XGBoost: eXtreme Gradient Boosting",
|
||||||
|
zip_safe=False,
|
||||||
|
py_modules=['xgboost'],
|
||||||
|
data_files=[dll_path],
|
||||||
|
url="https://github.com/dmlc/xgboost")
|
||||||
Loading…
x
Reference in New Issue
Block a user