From c972feb4b503b0537ca41e7d7f170a8ecebca70d Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Wed, 8 Apr 2015 14:07:37 -0500 Subject: [PATCH 1/3] Make Python package installable. --- wrapper/__init__.py | 0 wrapper/setup.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 wrapper/__init__.py create mode 100644 wrapper/setup.py diff --git a/wrapper/__init__.py b/wrapper/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/wrapper/setup.py b/wrapper/setup.py new file mode 100644 index 000000000..49b1a7872 --- /dev/null +++ b/wrapper/setup.py @@ -0,0 +1,28 @@ +import os + +from setuptools import setup + + +class XGBoostLibraryNotFound(Exception): + pass + + +cur_dir = os.path.dirname(os.path.abspath(__file__)) + +if os.name == 'nt': + dll_path = os.path.join(cur_dir, + '../windows/x64/Release/xgboost_wrapper.dll') +else: + dll_path = os.path.join(cur_dir, 'libxgboostwrapper.so') + +if not os.path.exists(dll_path): + raise XGBoostLibraryNotFound("XGBoost library not found. Did you run " + "../make?") + +setup(name="xgboost", + version="0.32", + description="Python wrappers for XGBoost: eXtreme Gradient Boosting", + zip_safe=False, + py_modules=['xgboost'], + data_files=[dll_path], + url="https://github.com/dmlc/xgboost") From ceb62e923140513512b0161119266d22d1066ae1 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Wed, 8 Apr 2015 14:20:52 -0500 Subject: [PATCH 2/3] Update docs about python module install --- windows/README.md | 12 ++++++++---- wrapper/README.md | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/windows/README.md b/windows/README.md index 82efbc54a..6fca36d1c 100644 --- a/windows/README.md +++ b/windows/README.md @@ -10,13 +10,17 @@ This should give you xgboost.exe for CLI version and xgboost_wrapper.dll for pyt Use Python Module ===== -* After you build the dll, you can simply add the path to [../wrapper](../wrapper) to sys.path and import xgboost +* After you build the dll, you can install the Python package from the [../wrapper](../wrapper) folder + +``` +python setup.py install +``` + +And import it as usual + ``` -sys.path.append('path/to/xgboost/wrapper') import xgboost as xgb ``` -* Alternatively, you can add that path to system enviroment variable ```PYTHONPATH``` - - Doing so allows you to import xgboost directly like other python packages R Package ==== diff --git a/wrapper/README.md b/wrapper/README.md index 09851b97f..0a170257f 100644 --- a/wrapper/README.md +++ b/wrapper/README.md @@ -5,6 +5,7 @@ This folder provides wrapper of xgboost to other languages Python ===== * To make the python module, type ```make``` in the root directory of project +* Install with `python setup.py install` from this directory. * Refer also to the walk through example in [demo folder](../demo/guide-python) R From a0e07f16c491faea1fa917b302e9089ed3a05ad7 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Wed, 8 Apr 2015 14:22:54 -0500 Subject: [PATCH 3/3] Update demo scripts to use installed python library --- demo/binary_classification/mapfeat.py | 15 +++++++-------- demo/guide-python/basic_walkthrough.py | 6 +----- demo/guide-python/boost_from_prediction.py | 2 -- demo/guide-python/cross_validation.py | 4 +--- demo/guide-python/custom_objective.py | 4 +--- demo/guide-python/generalized_linear_model.py | 8 +++----- demo/guide-python/predict_first_ntree.py | 2 -- demo/guide-python/predict_leaf_indices.py | 2 -- demo/guide-python/sklearn_examples.py | 2 -- demo/kaggle-higgs/higgs-cv.py | 2 -- demo/kaggle-higgs/higgs-numpy.py | 16 ++++------------ demo/kaggle-higgs/higgs-pred.py | 13 +++++-------- demo/kaggle-higgs/speedtest.py | 3 --- demo/multiclass_classification/train.py | 2 -- demo/regression/mapfeat.py | 11 +++++------ 15 files changed, 27 insertions(+), 65 deletions(-) diff --git a/demo/binary_classification/mapfeat.py b/demo/binary_classification/mapfeat.py index 186af29e6..5eb8878f9 100755 --- a/demo/binary_classification/mapfeat.py +++ b/demo/binary_classification/mapfeat.py @@ -1,17 +1,16 @@ #!/usr/bin/python -import sys def loadfmap( fname ): fmap = {} nmap = {} - + for l in open( fname ): arr = l.split() - if arr[0].find('.') != -1: + if arr[0].find('.') != -1: idx = int( arr[0].strip('.') ) - assert idx not in fmap + assert idx not in fmap fmap[ idx ] = {} - ftype = arr[1].strip(':') + ftype = arr[1].strip(':') content = arr[2] else: content = arr[0] @@ -23,7 +22,7 @@ def loadfmap( fname ): nmap[ len(nmap) ] = ftype+'='+k return fmap, nmap -def write_nmap( fo, nmap ): +def write_nmap( fo, nmap ): for i in range( len(nmap) ): fo.write('%d\t%s\ti\n' % (i, nmap[i]) ) @@ -33,7 +32,7 @@ fo = open( 'featmap.txt', 'w' ) write_nmap( fo, nmap ) fo.close() -fo = open( 'agaricus.txt', 'w' ) +fo = open( 'agaricus.txt', 'w' ) for l in open( 'agaricus-lepiota.data' ): arr = l.split(',') if arr[0] == 'p': @@ -47,4 +46,4 @@ for l in open( 'agaricus-lepiota.data' ): fo.close() - + diff --git a/demo/guide-python/basic_walkthrough.py b/demo/guide-python/basic_walkthrough.py index 81b35ab45..ba8a4319f 100755 --- a/demo/guide-python/basic_walkthrough.py +++ b/demo/guide-python/basic_walkthrough.py @@ -1,10 +1,6 @@ #!/usr/bin/python -import sys import numpy as np import scipy.sparse -# append the path to xgboost, you may need to change the following line -# alternatively, you can add the path to PYTHONPATH environment variable -sys.path.append('../../wrapper') import xgboost as xgb ### simple example @@ -33,7 +29,7 @@ bst.dump_model('dump.nice.txt','../data/featmap.txt') # save dmatrix into binary buffer dtest.save_binary('dtest.buffer') bst.save_model('xgb.model') -# load model and data in +# load model and data in bst2 = xgb.Booster(model_file='xgb.model') dtest2 = xgb.DMatrix('dtest.buffer') preds2 = bst2.predict(dtest2) diff --git a/demo/guide-python/boost_from_prediction.py b/demo/guide-python/boost_from_prediction.py index 0aa2e56ab..4870fc49c 100755 --- a/demo/guide-python/boost_from_prediction.py +++ b/demo/guide-python/boost_from_prediction.py @@ -1,7 +1,5 @@ #!/usr/bin/python -import sys import numpy as np -sys.path.append('../../wrapper') import xgboost as xgb dtrain = xgb.DMatrix('../data/agaricus.txt.train') diff --git a/demo/guide-python/cross_validation.py b/demo/guide-python/cross_validation.py index a50586c58..6ca13d460 100755 --- a/demo/guide-python/cross_validation.py +++ b/demo/guide-python/cross_validation.py @@ -1,7 +1,5 @@ #!/usr/bin/python -import sys import numpy as np -sys.path.append('../../wrapper') import xgboost as xgb ### load data in do training @@ -56,7 +54,7 @@ def evalerror(preds, dtrain): labels = dtrain.get_label() return 'error', float(sum(labels != (preds > 0.0))) / len(labels) -param = {'max_depth':2, 'eta':1, 'silent':1} +param = {'max_depth':2, 'eta':1, 'silent':1} # train with customized objective xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0, obj = logregobj, feval=evalerror) diff --git a/demo/guide-python/custom_objective.py b/demo/guide-python/custom_objective.py index 5a7f110f4..d2bd4d9b2 100755 --- a/demo/guide-python/custom_objective.py +++ b/demo/guide-python/custom_objective.py @@ -1,11 +1,9 @@ #!/usr/bin/python -import sys import numpy as np -sys.path.append('../../wrapper') import xgboost as xgb ### # advanced: cutomsized loss function -# +# print ('start running example to used cutomized objective function') dtrain = xgb.DMatrix('../data/agaricus.txt.train') diff --git a/demo/guide-python/generalized_linear_model.py b/demo/guide-python/generalized_linear_model.py index b6b60be35..243bd603c 100755 --- a/demo/guide-python/generalized_linear_model.py +++ b/demo/guide-python/generalized_linear_model.py @@ -1,6 +1,4 @@ #!/usr/bin/python -import sys -sys.path.append('../../wrapper') import xgboost as xgb ## # this script demonstrate how to fit generalized linear model in xgboost @@ -9,17 +7,17 @@ import xgboost as xgb dtrain = xgb.DMatrix('../data/agaricus.txt.train') dtest = xgb.DMatrix('../data/agaricus.txt.test') # change booster to gblinear, so that we are fitting a linear model -# alpha is the L1 regularizer +# alpha is the L1 regularizer # lambda is the L2 regularizer # you can also set lambda_bias which is L2 regularizer on the bias term param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', 'alpha': 0.0001, 'lambda': 1 } # normally, you do not need to set eta (step_size) -# XGBoost uses a parallel coordinate descent algorithm (shotgun), +# XGBoost uses a parallel coordinate descent algorithm (shotgun), # there could be affection on convergence with parallelization on certain cases # setting eta to be smaller value, e.g 0.5 can make the optimization more stable -# param['eta'] = 1 +# param['eta'] = 1 ## # the rest of settings are the same diff --git a/demo/guide-python/predict_first_ntree.py b/demo/guide-python/predict_first_ntree.py index 03f327e7f..2ea91232e 100755 --- a/demo/guide-python/predict_first_ntree.py +++ b/demo/guide-python/predict_first_ntree.py @@ -1,7 +1,5 @@ #!/usr/bin/python -import sys import numpy as np -sys.path.append('../../wrapper') import xgboost as xgb ### load data in do training diff --git a/demo/guide-python/predict_leaf_indices.py b/demo/guide-python/predict_leaf_indices.py index 291ad1ee7..6f7d68da6 100755 --- a/demo/guide-python/predict_leaf_indices.py +++ b/demo/guide-python/predict_leaf_indices.py @@ -1,7 +1,5 @@ #!/usr/bin/python -import sys import numpy as np -sys.path.append('../../wrapper') import xgboost as xgb ### load data in do training diff --git a/demo/guide-python/sklearn_examples.py b/demo/guide-python/sklearn_examples.py index b30d785fa..96cd876e9 100644 --- a/demo/guide-python/sklearn_examples.py +++ b/demo/guide-python/sklearn_examples.py @@ -4,8 +4,6 @@ Created on 1 Apr 2015 @author: Jamie Hall ''' -import sys -sys.path.append('../../wrapper') import xgboost as xgb import numpy as np diff --git a/demo/kaggle-higgs/higgs-cv.py b/demo/kaggle-higgs/higgs-cv.py index 3e36fa66b..d5bbc39ef 100755 --- a/demo/kaggle-higgs/higgs-cv.py +++ b/demo/kaggle-higgs/higgs-cv.py @@ -1,7 +1,5 @@ #!/usr/bin/python -import sys import numpy as np -sys.path.append('../../wrapper') import xgboost as xgb ### load data in do training diff --git a/demo/kaggle-higgs/higgs-numpy.py b/demo/kaggle-higgs/higgs-numpy.py index 1e7448a4c..02b76b362 100755 --- a/demo/kaggle-higgs/higgs-numpy.py +++ b/demo/kaggle-higgs/higgs-numpy.py @@ -1,14 +1,6 @@ #!/usr/bin/python -# this is the example script to use xgboost to train -import inspect -import os -import sys +# this is the example script to use xgboost to train import numpy as np -# add path of xgboost python module -code_path = os.path.join( - os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../wrapper") - -sys.path.append(code_path) import xgboost as xgb @@ -29,7 +21,7 @@ weight = dtrain[:,31] * float(test_size) / len(label) sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0 ) sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0 ) -# print weight statistics +# print weight statistics print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos )) # construct xgboost.DMatrix from numpy array, treat -999.0 as missing value @@ -42,13 +34,13 @@ param = {} param['objective'] = 'binary:logitraw' # scale weight of positive examples param['scale_pos_weight'] = sum_wneg/sum_wpos -param['eta'] = 0.1 +param['eta'] = 0.1 param['max_depth'] = 6 param['eval_metric'] = 'auc' param['silent'] = 1 param['nthread'] = 16 -# you can directly throw param in, though we want to watch multiple metrics here +# you can directly throw param in, though we want to watch multiple metrics here plst = list(param.items())+[('eval_metric', 'ams@0.15')] watchlist = [ (xgmat,'train') ] diff --git a/demo/kaggle-higgs/higgs-pred.py b/demo/kaggle-higgs/higgs-pred.py index e5383f89d..bc669f557 100755 --- a/demo/kaggle-higgs/higgs-pred.py +++ b/demo/kaggle-higgs/higgs-pred.py @@ -1,9 +1,6 @@ #!/usr/bin/python -# make prediction -import sys +# make prediction import numpy as np -# add path of xgboost python module -sys.path.append('../../wrapper/') import xgboost as xgb # path to where the data lies @@ -11,7 +8,7 @@ dpath = 'data' modelfile = 'higgs.model' outfile = 'higgs.pred.csv' -# make top 15% as positive +# make top 15% as positive threshold_ratio = 0.15 # load in training data, directly use numpy @@ -24,7 +21,7 @@ xgmat = xgb.DMatrix( data, missing = -999.0 ) bst = xgb.Booster({'nthread':16}, model_file = modelfile) ypred = bst.predict( xgmat ) -res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ] +res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ] rorder = {} for k, v in sorted( res, key = lambda x:-x[1] ): @@ -36,12 +33,12 @@ fo = open(outfile, 'w') nhit = 0 ntot = 0 fo.write('EventId,RankOrder,Class\n') -for k, v in res: +for k, v in res: if rorder[k] <= ntop: lb = 's' nhit += 1 else: - lb = 'b' + lb = 'b' # change output rank order to follow Kaggle convention fo.write('%s,%d,%s\n' % ( k, len(rorder)+1-rorder[k], lb ) ) ntot += 1 diff --git a/demo/kaggle-higgs/speedtest.py b/demo/kaggle-higgs/speedtest.py index c5cc2fd29..472f9f44b 100755 --- a/demo/kaggle-higgs/speedtest.py +++ b/demo/kaggle-higgs/speedtest.py @@ -1,9 +1,6 @@ #!/usr/bin/python # this is the example script to use xgboost to train -import sys import numpy as np -# add path of xgboost python module -sys.path.append('../../wrapper/') import xgboost as xgb from sklearn.ensemble import GradientBoostingClassifier import time diff --git a/demo/multiclass_classification/train.py b/demo/multiclass_classification/train.py index f387de7c0..9e2a82ed2 100755 --- a/demo/multiclass_classification/train.py +++ b/demo/multiclass_classification/train.py @@ -1,7 +1,5 @@ #! /usr/bin/python -import sys import numpy as np -sys.path.append('../../wrapper/') import xgboost as xgb # label need to be 0 to num_class -1 diff --git a/demo/regression/mapfeat.py b/demo/regression/mapfeat.py index d86dca38a..c747c7b49 100755 --- a/demo/regression/mapfeat.py +++ b/demo/regression/mapfeat.py @@ -1,7 +1,6 @@ #!/usr/bin/python -import sys -fo = open( 'machine.txt', 'w' ) +fo = open( 'machine.txt', 'w' ) cnt = 6 fmap = {} for l in open( 'machine.data' ): @@ -9,12 +8,12 @@ for l in open( 'machine.data' ): fo.write(arr[8]) for i in range( 0,6 ): fo.write( ' %d:%s' %(i,arr[i+2]) ) - + if arr[0] not in fmap: fmap[arr[0]] = cnt cnt += 1 - - fo.write( ' %d:1' % fmap[arr[0]] ) + + fo.write( ' %d:1' % fmap[arr[0]] ) fo.write('\n') fo.close() @@ -22,7 +21,7 @@ fo.close() # create feature map for machine data fo = open('featmap.txt', 'w') # list from machine.names -names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ]; +names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ]; for i in range(0,6): fo.write( '%d\t%s\tint\n' % (i, names[i+1]))