Merge pull request #230 from jseabold/python-install

Make the Python wrappers installable without path munging
This commit is contained in:
Tianqi Chen 2015-04-08 15:02:37 -07:00
commit 23c273173f
19 changed files with 64 additions and 69 deletions

View File

@ -1,17 +1,16 @@
#!/usr/bin/python
import sys
def loadfmap( fname ):
fmap = {}
nmap = {}
for l in open( fname ):
arr = l.split()
if arr[0].find('.') != -1:
if arr[0].find('.') != -1:
idx = int( arr[0].strip('.') )
assert idx not in fmap
assert idx not in fmap
fmap[ idx ] = {}
ftype = arr[1].strip(':')
ftype = arr[1].strip(':')
content = arr[2]
else:
content = arr[0]
@ -23,7 +22,7 @@ def loadfmap( fname ):
nmap[ len(nmap) ] = ftype+'='+k
return fmap, nmap
def write_nmap( fo, nmap ):
def write_nmap( fo, nmap ):
for i in range( len(nmap) ):
fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
@ -33,7 +32,7 @@ fo = open( 'featmap.txt', 'w' )
write_nmap( fo, nmap )
fo.close()
fo = open( 'agaricus.txt', 'w' )
fo = open( 'agaricus.txt', 'w' )
for l in open( 'agaricus-lepiota.data' ):
arr = l.split(',')
if arr[0] == 'p':
@ -47,4 +46,4 @@ for l in open( 'agaricus-lepiota.data' ):
fo.close()

View File

@ -1,10 +1,6 @@
#!/usr/bin/python
import sys
import numpy as np
import scipy.sparse
# append the path to xgboost, you may need to change the following line
# alternatively, you can add the path to PYTHONPATH environment variable
sys.path.append('../../wrapper')
import xgboost as xgb
### simple example
@ -33,7 +29,7 @@ bst.dump_model('dump.nice.txt','../data/featmap.txt')
# save dmatrix into binary buffer
dtest.save_binary('dtest.buffer')
bst.save_model('xgb.model')
# load model and data in
# load model and data in
bst2 = xgb.Booster(model_file='xgb.model')
dtest2 = xgb.DMatrix('dtest.buffer')
preds2 = bst2.predict(dtest2)

View File

@ -1,7 +1,5 @@
#!/usr/bin/python
import sys
import numpy as np
sys.path.append('../../wrapper')
import xgboost as xgb
dtrain = xgb.DMatrix('../data/agaricus.txt.train')

View File

@ -1,7 +1,5 @@
#!/usr/bin/python
import sys
import numpy as np
sys.path.append('../../wrapper')
import xgboost as xgb
### load data in do training
@ -56,7 +54,7 @@ def evalerror(preds, dtrain):
labels = dtrain.get_label()
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
param = {'max_depth':2, 'eta':1, 'silent':1}
param = {'max_depth':2, 'eta':1, 'silent':1}
# train with customized objective
xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
obj = logregobj, feval=evalerror)

View File

@ -1,11 +1,9 @@
#!/usr/bin/python
import sys
import numpy as np
sys.path.append('../../wrapper')
import xgboost as xgb
###
# advanced: cutomsized loss function
#
#
print ('start running example to used cutomized objective function')
dtrain = xgb.DMatrix('../data/agaricus.txt.train')

View File

@ -1,6 +1,4 @@
#!/usr/bin/python
import sys
sys.path.append('../../wrapper')
import xgboost as xgb
##
# this script demonstrate how to fit generalized linear model in xgboost
@ -9,17 +7,17 @@ import xgboost as xgb
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
dtest = xgb.DMatrix('../data/agaricus.txt.test')
# change booster to gblinear, so that we are fitting a linear model
# alpha is the L1 regularizer
# alpha is the L1 regularizer
# lambda is the L2 regularizer
# you can also set lambda_bias which is L2 regularizer on the bias term
param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
'alpha': 0.0001, 'lambda': 1 }
# normally, you do not need to set eta (step_size)
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
# there could be affection on convergence with parallelization on certain cases
# setting eta to be smaller value, e.g 0.5 can make the optimization more stable
# param['eta'] = 1
# param['eta'] = 1
##
# the rest of settings are the same

View File

@ -1,7 +1,5 @@
#!/usr/bin/python
import sys
import numpy as np
sys.path.append('../../wrapper')
import xgboost as xgb
### load data in do training

View File

@ -1,7 +1,5 @@
#!/usr/bin/python
import sys
import numpy as np
sys.path.append('../../wrapper')
import xgboost as xgb
### load data in do training

View File

@ -4,8 +4,6 @@ Created on 1 Apr 2015
@author: Jamie Hall
'''
import sys
sys.path.append('../../wrapper')
import xgboost as xgb
import numpy as np

View File

@ -1,7 +1,5 @@
#!/usr/bin/python
import sys
import numpy as np
sys.path.append('../../wrapper')
import xgboost as xgb
### load data in do training

View File

@ -1,14 +1,6 @@
#!/usr/bin/python
# this is the example script to use xgboost to train
import inspect
import os
import sys
# this is the example script to use xgboost to train
import numpy as np
# add path of xgboost python module
code_path = os.path.join(
os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../wrapper")
sys.path.append(code_path)
import xgboost as xgb
@ -29,7 +21,7 @@ weight = dtrain[:,31] * float(test_size) / len(label)
sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0 )
sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0 )
# print weight statistics
# print weight statistics
print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
@ -42,13 +34,13 @@ param = {}
param['objective'] = 'binary:logitraw'
# scale weight of positive examples
param['scale_pos_weight'] = sum_wneg/sum_wpos
param['eta'] = 0.1
param['eta'] = 0.1
param['max_depth'] = 6
param['eval_metric'] = 'auc'
param['silent'] = 1
param['nthread'] = 16
# you can directly throw param in, though we want to watch multiple metrics here
# you can directly throw param in, though we want to watch multiple metrics here
plst = list(param.items())+[('eval_metric', 'ams@0.15')]
watchlist = [ (xgmat,'train') ]

View File

@ -1,9 +1,6 @@
#!/usr/bin/python
# make prediction
import sys
# make prediction
import numpy as np
# add path of xgboost python module
sys.path.append('../../wrapper/')
import xgboost as xgb
# path to where the data lies
@ -11,7 +8,7 @@ dpath = 'data'
modelfile = 'higgs.model'
outfile = 'higgs.pred.csv'
# make top 15% as positive
# make top 15% as positive
threshold_ratio = 0.15
# load in training data, directly use numpy
@ -24,7 +21,7 @@ xgmat = xgb.DMatrix( data, missing = -999.0 )
bst = xgb.Booster({'nthread':16}, model_file = modelfile)
ypred = bst.predict( xgmat )
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
rorder = {}
for k, v in sorted( res, key = lambda x:-x[1] ):
@ -36,12 +33,12 @@ fo = open(outfile, 'w')
nhit = 0
ntot = 0
fo.write('EventId,RankOrder,Class\n')
for k, v in res:
for k, v in res:
if rorder[k] <= ntop:
lb = 's'
nhit += 1
else:
lb = 'b'
lb = 'b'
# change output rank order to follow Kaggle convention
fo.write('%s,%d,%s\n' % ( k, len(rorder)+1-rorder[k], lb ) )
ntot += 1

View File

@ -1,9 +1,6 @@
#!/usr/bin/python
# this is the example script to use xgboost to train
import sys
import numpy as np
# add path of xgboost python module
sys.path.append('../../wrapper/')
import xgboost as xgb
from sklearn.ensemble import GradientBoostingClassifier
import time

View File

@ -1,7 +1,5 @@
#! /usr/bin/python
import sys
import numpy as np
sys.path.append('../../wrapper/')
import xgboost as xgb
# label need to be 0 to num_class -1

View File

@ -1,7 +1,6 @@
#!/usr/bin/python
import sys
fo = open( 'machine.txt', 'w' )
fo = open( 'machine.txt', 'w' )
cnt = 6
fmap = {}
for l in open( 'machine.data' ):
@ -9,12 +8,12 @@ for l in open( 'machine.data' ):
fo.write(arr[8])
for i in range( 0,6 ):
fo.write( ' %d:%s' %(i,arr[i+2]) )
if arr[0] not in fmap:
fmap[arr[0]] = cnt
cnt += 1
fo.write( ' %d:1' % fmap[arr[0]] )
fo.write( ' %d:1' % fmap[arr[0]] )
fo.write('\n')
fo.close()
@ -22,7 +21,7 @@ fo.close()
# create feature map for machine data
fo = open('featmap.txt', 'w')
# list from machine.names
names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
for i in range(0,6):
fo.write( '%d\t%s\tint\n' % (i, names[i+1]))

View File

@ -10,13 +10,17 @@ This should give you xgboost.exe for CLI version and xgboost_wrapper.dll for pyt
Use Python Module
=====
* After you build the dll, you can simply add the path to [../wrapper](../wrapper) to sys.path and import xgboost
* After you build the dll, you can install the Python package from the [../wrapper](../wrapper) folder
```
python setup.py install
```
And import it as usual
```
sys.path.append('path/to/xgboost/wrapper')
import xgboost as xgb
```
* Alternatively, you can add that path to system enviroment variable ```PYTHONPATH```
- Doing so allows you to import xgboost directly like other python packages
R Package
====

View File

@ -5,6 +5,7 @@ This folder provides wrapper of xgboost to other languages
Python
=====
* To make the python module, type ```make``` in the root directory of project
* Install with `python setup.py install` from this directory.
* Refer also to the walk through example in [demo folder](../demo/guide-python)
R

0
wrapper/__init__.py Normal file
View File

28
wrapper/setup.py Normal file
View File

@ -0,0 +1,28 @@
import os
from setuptools import setup
class XGBoostLibraryNotFound(Exception):
pass
cur_dir = os.path.dirname(os.path.abspath(__file__))
if os.name == 'nt':
dll_path = os.path.join(cur_dir,
'../windows/x64/Release/xgboost_wrapper.dll')
else:
dll_path = os.path.join(cur_dir, 'libxgboostwrapper.so')
if not os.path.exists(dll_path):
raise XGBoostLibraryNotFound("XGBoost library not found. Did you run "
"../make?")
setup(name="xgboost",
version="0.32",
description="Python wrappers for XGBoost: eXtreme Gradient Boosting",
zip_safe=False,
py_modules=['xgboost'],
data_files=[dll_path],
url="https://github.com/dmlc/xgboost")