Merge pull request #230 from jseabold/python-install
Make the Python wrappers installable without path munging
This commit is contained in:
commit
23c273173f
@ -1,17 +1,16 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
|
||||
def loadfmap( fname ):
|
||||
fmap = {}
|
||||
nmap = {}
|
||||
|
||||
|
||||
for l in open( fname ):
|
||||
arr = l.split()
|
||||
if arr[0].find('.') != -1:
|
||||
if arr[0].find('.') != -1:
|
||||
idx = int( arr[0].strip('.') )
|
||||
assert idx not in fmap
|
||||
assert idx not in fmap
|
||||
fmap[ idx ] = {}
|
||||
ftype = arr[1].strip(':')
|
||||
ftype = arr[1].strip(':')
|
||||
content = arr[2]
|
||||
else:
|
||||
content = arr[0]
|
||||
@ -23,7 +22,7 @@ def loadfmap( fname ):
|
||||
nmap[ len(nmap) ] = ftype+'='+k
|
||||
return fmap, nmap
|
||||
|
||||
def write_nmap( fo, nmap ):
|
||||
def write_nmap( fo, nmap ):
|
||||
for i in range( len(nmap) ):
|
||||
fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
|
||||
|
||||
@ -33,7 +32,7 @@ fo = open( 'featmap.txt', 'w' )
|
||||
write_nmap( fo, nmap )
|
||||
fo.close()
|
||||
|
||||
fo = open( 'agaricus.txt', 'w' )
|
||||
fo = open( 'agaricus.txt', 'w' )
|
||||
for l in open( 'agaricus-lepiota.data' ):
|
||||
arr = l.split(',')
|
||||
if arr[0] == 'p':
|
||||
@ -47,4 +46,4 @@ for l in open( 'agaricus-lepiota.data' ):
|
||||
|
||||
fo.close()
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1,10 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
import numpy as np
|
||||
import scipy.sparse
|
||||
# append the path to xgboost, you may need to change the following line
|
||||
# alternatively, you can add the path to PYTHONPATH environment variable
|
||||
sys.path.append('../../wrapper')
|
||||
import xgboost as xgb
|
||||
|
||||
### simple example
|
||||
@ -33,7 +29,7 @@ bst.dump_model('dump.nice.txt','../data/featmap.txt')
|
||||
# save dmatrix into binary buffer
|
||||
dtest.save_binary('dtest.buffer')
|
||||
bst.save_model('xgb.model')
|
||||
# load model and data in
|
||||
# load model and data in
|
||||
bst2 = xgb.Booster(model_file='xgb.model')
|
||||
dtest2 = xgb.DMatrix('dtest.buffer')
|
||||
preds2 = bst2.predict(dtest2)
|
||||
|
||||
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
import numpy as np
|
||||
sys.path.append('../../wrapper')
|
||||
import xgboost as xgb
|
||||
|
||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
||||
|
||||
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
import numpy as np
|
||||
sys.path.append('../../wrapper')
|
||||
import xgboost as xgb
|
||||
|
||||
### load data in do training
|
||||
@ -56,7 +54,7 @@ def evalerror(preds, dtrain):
|
||||
labels = dtrain.get_label()
|
||||
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
||||
|
||||
param = {'max_depth':2, 'eta':1, 'silent':1}
|
||||
param = {'max_depth':2, 'eta':1, 'silent':1}
|
||||
# train with customized objective
|
||||
xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
|
||||
obj = logregobj, feval=evalerror)
|
||||
|
||||
@ -1,11 +1,9 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
import numpy as np
|
||||
sys.path.append('../../wrapper')
|
||||
import xgboost as xgb
|
||||
###
|
||||
# advanced: cutomsized loss function
|
||||
#
|
||||
#
|
||||
print ('start running example to used cutomized objective function')
|
||||
|
||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
sys.path.append('../../wrapper')
|
||||
import xgboost as xgb
|
||||
##
|
||||
# this script demonstrate how to fit generalized linear model in xgboost
|
||||
@ -9,17 +7,17 @@ import xgboost as xgb
|
||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
||||
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
||||
# change booster to gblinear, so that we are fitting a linear model
|
||||
# alpha is the L1 regularizer
|
||||
# alpha is the L1 regularizer
|
||||
# lambda is the L2 regularizer
|
||||
# you can also set lambda_bias which is L2 regularizer on the bias term
|
||||
param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
|
||||
'alpha': 0.0001, 'lambda': 1 }
|
||||
|
||||
# normally, you do not need to set eta (step_size)
|
||||
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
|
||||
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
|
||||
# there could be affection on convergence with parallelization on certain cases
|
||||
# setting eta to be smaller value, e.g 0.5 can make the optimization more stable
|
||||
# param['eta'] = 1
|
||||
# param['eta'] = 1
|
||||
|
||||
##
|
||||
# the rest of settings are the same
|
||||
|
||||
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
import numpy as np
|
||||
sys.path.append('../../wrapper')
|
||||
import xgboost as xgb
|
||||
|
||||
### load data in do training
|
||||
|
||||
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
import numpy as np
|
||||
sys.path.append('../../wrapper')
|
||||
import xgboost as xgb
|
||||
|
||||
### load data in do training
|
||||
|
||||
@ -4,8 +4,6 @@ Created on 1 Apr 2015
|
||||
@author: Jamie Hall
|
||||
'''
|
||||
|
||||
import sys
|
||||
sys.path.append('../../wrapper')
|
||||
import xgboost as xgb
|
||||
|
||||
import numpy as np
|
||||
|
||||
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
import numpy as np
|
||||
sys.path.append('../../wrapper')
|
||||
import xgboost as xgb
|
||||
|
||||
### load data in do training
|
||||
|
||||
@ -1,14 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
# this is the example script to use xgboost to train
|
||||
import inspect
|
||||
import os
|
||||
import sys
|
||||
# this is the example script to use xgboost to train
|
||||
import numpy as np
|
||||
# add path of xgboost python module
|
||||
code_path = os.path.join(
|
||||
os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../wrapper")
|
||||
|
||||
sys.path.append(code_path)
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
@ -29,7 +21,7 @@ weight = dtrain[:,31] * float(test_size) / len(label)
|
||||
sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0 )
|
||||
sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0 )
|
||||
|
||||
# print weight statistics
|
||||
# print weight statistics
|
||||
print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
|
||||
|
||||
# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
|
||||
@ -42,13 +34,13 @@ param = {}
|
||||
param['objective'] = 'binary:logitraw'
|
||||
# scale weight of positive examples
|
||||
param['scale_pos_weight'] = sum_wneg/sum_wpos
|
||||
param['eta'] = 0.1
|
||||
param['eta'] = 0.1
|
||||
param['max_depth'] = 6
|
||||
param['eval_metric'] = 'auc'
|
||||
param['silent'] = 1
|
||||
param['nthread'] = 16
|
||||
|
||||
# you can directly throw param in, though we want to watch multiple metrics here
|
||||
# you can directly throw param in, though we want to watch multiple metrics here
|
||||
plst = list(param.items())+[('eval_metric', 'ams@0.15')]
|
||||
|
||||
watchlist = [ (xgmat,'train') ]
|
||||
|
||||
@ -1,9 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
# make prediction
|
||||
import sys
|
||||
# make prediction
|
||||
import numpy as np
|
||||
# add path of xgboost python module
|
||||
sys.path.append('../../wrapper/')
|
||||
import xgboost as xgb
|
||||
|
||||
# path to where the data lies
|
||||
@ -11,7 +8,7 @@ dpath = 'data'
|
||||
|
||||
modelfile = 'higgs.model'
|
||||
outfile = 'higgs.pred.csv'
|
||||
# make top 15% as positive
|
||||
# make top 15% as positive
|
||||
threshold_ratio = 0.15
|
||||
|
||||
# load in training data, directly use numpy
|
||||
@ -24,7 +21,7 @@ xgmat = xgb.DMatrix( data, missing = -999.0 )
|
||||
bst = xgb.Booster({'nthread':16}, model_file = modelfile)
|
||||
ypred = bst.predict( xgmat )
|
||||
|
||||
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
|
||||
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
|
||||
|
||||
rorder = {}
|
||||
for k, v in sorted( res, key = lambda x:-x[1] ):
|
||||
@ -36,12 +33,12 @@ fo = open(outfile, 'w')
|
||||
nhit = 0
|
||||
ntot = 0
|
||||
fo.write('EventId,RankOrder,Class\n')
|
||||
for k, v in res:
|
||||
for k, v in res:
|
||||
if rorder[k] <= ntop:
|
||||
lb = 's'
|
||||
nhit += 1
|
||||
else:
|
||||
lb = 'b'
|
||||
lb = 'b'
|
||||
# change output rank order to follow Kaggle convention
|
||||
fo.write('%s,%d,%s\n' % ( k, len(rorder)+1-rorder[k], lb ) )
|
||||
ntot += 1
|
||||
|
||||
@ -1,9 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
# this is the example script to use xgboost to train
|
||||
import sys
|
||||
import numpy as np
|
||||
# add path of xgboost python module
|
||||
sys.path.append('../../wrapper/')
|
||||
import xgboost as xgb
|
||||
from sklearn.ensemble import GradientBoostingClassifier
|
||||
import time
|
||||
|
||||
@ -1,7 +1,5 @@
|
||||
#! /usr/bin/python
|
||||
import sys
|
||||
import numpy as np
|
||||
sys.path.append('../../wrapper/')
|
||||
import xgboost as xgb
|
||||
|
||||
# label need to be 0 to num_class -1
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
|
||||
fo = open( 'machine.txt', 'w' )
|
||||
fo = open( 'machine.txt', 'w' )
|
||||
cnt = 6
|
||||
fmap = {}
|
||||
for l in open( 'machine.data' ):
|
||||
@ -9,12 +8,12 @@ for l in open( 'machine.data' ):
|
||||
fo.write(arr[8])
|
||||
for i in range( 0,6 ):
|
||||
fo.write( ' %d:%s' %(i,arr[i+2]) )
|
||||
|
||||
|
||||
if arr[0] not in fmap:
|
||||
fmap[arr[0]] = cnt
|
||||
cnt += 1
|
||||
|
||||
fo.write( ' %d:1' % fmap[arr[0]] )
|
||||
|
||||
fo.write( ' %d:1' % fmap[arr[0]] )
|
||||
fo.write('\n')
|
||||
|
||||
fo.close()
|
||||
@ -22,7 +21,7 @@ fo.close()
|
||||
# create feature map for machine data
|
||||
fo = open('featmap.txt', 'w')
|
||||
# list from machine.names
|
||||
names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
|
||||
names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
|
||||
|
||||
for i in range(0,6):
|
||||
fo.write( '%d\t%s\tint\n' % (i, names[i+1]))
|
||||
|
||||
@ -10,13 +10,17 @@ This should give you xgboost.exe for CLI version and xgboost_wrapper.dll for pyt
|
||||
|
||||
Use Python Module
|
||||
=====
|
||||
* After you build the dll, you can simply add the path to [../wrapper](../wrapper) to sys.path and import xgboost
|
||||
* After you build the dll, you can install the Python package from the [../wrapper](../wrapper) folder
|
||||
|
||||
```
|
||||
python setup.py install
|
||||
```
|
||||
|
||||
And import it as usual
|
||||
|
||||
```
|
||||
sys.path.append('path/to/xgboost/wrapper')
|
||||
import xgboost as xgb
|
||||
```
|
||||
* Alternatively, you can add that path to system enviroment variable ```PYTHONPATH```
|
||||
- Doing so allows you to import xgboost directly like other python packages
|
||||
|
||||
R Package
|
||||
====
|
||||
|
||||
@ -5,6 +5,7 @@ This folder provides wrapper of xgboost to other languages
|
||||
Python
|
||||
=====
|
||||
* To make the python module, type ```make``` in the root directory of project
|
||||
* Install with `python setup.py install` from this directory.
|
||||
* Refer also to the walk through example in [demo folder](../demo/guide-python)
|
||||
|
||||
R
|
||||
|
||||
0
wrapper/__init__.py
Normal file
0
wrapper/__init__.py
Normal file
28
wrapper/setup.py
Normal file
28
wrapper/setup.py
Normal file
@ -0,0 +1,28 @@
|
||||
import os
|
||||
|
||||
from setuptools import setup
|
||||
|
||||
|
||||
class XGBoostLibraryNotFound(Exception):
|
||||
pass
|
||||
|
||||
|
||||
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
if os.name == 'nt':
|
||||
dll_path = os.path.join(cur_dir,
|
||||
'../windows/x64/Release/xgboost_wrapper.dll')
|
||||
else:
|
||||
dll_path = os.path.join(cur_dir, 'libxgboostwrapper.so')
|
||||
|
||||
if not os.path.exists(dll_path):
|
||||
raise XGBoostLibraryNotFound("XGBoost library not found. Did you run "
|
||||
"../make?")
|
||||
|
||||
setup(name="xgboost",
|
||||
version="0.32",
|
||||
description="Python wrappers for XGBoost: eXtreme Gradient Boosting",
|
||||
zip_safe=False,
|
||||
py_modules=['xgboost'],
|
||||
data_files=[dll_path],
|
||||
url="https://github.com/dmlc/xgboost")
|
||||
Loading…
x
Reference in New Issue
Block a user