From c972feb4b503b0537ca41e7d7f170a8ecebca70d Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Wed, 8 Apr 2015 14:07:37 -0500
Subject: [PATCH 1/3] Make Python package installable.

---
 wrapper/__init__.py |  0
 wrapper/setup.py    | 28 ++++++++++++++++++++++++++++
 2 files changed, 28 insertions(+)
 create mode 100644 wrapper/__init__.py
 create mode 100644 wrapper/setup.py

diff --git a/wrapper/__init__.py b/wrapper/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/wrapper/setup.py b/wrapper/setup.py
new file mode 100644
index 000000000..49b1a7872
--- /dev/null
+++ b/wrapper/setup.py
@@ -0,0 +1,28 @@
+import os
+
+from setuptools import setup
+
+
+class XGBoostLibraryNotFound(Exception):
+    pass
+
+
+cur_dir = os.path.dirname(os.path.abspath(__file__))
+
+if os.name == 'nt':
+    dll_path = os.path.join(cur_dir,
+                            '../windows/x64/Release/xgboost_wrapper.dll')
+else:
+    dll_path = os.path.join(cur_dir, 'libxgboostwrapper.so')
+
+if not os.path.exists(dll_path):
+    raise XGBoostLibraryNotFound("XGBoost library not found. Did you run "
+                                 "../make?")
+
+setup(name="xgboost",
+      version="0.32",
+      description="Python wrappers for XGBoost: eXtreme Gradient Boosting",
+      zip_safe=False,
+      py_modules=['xgboost'],
+      data_files=[dll_path],
+      url="https://github.com/dmlc/xgboost")

From ceb62e923140513512b0161119266d22d1066ae1 Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Wed, 8 Apr 2015 14:20:52 -0500
Subject: [PATCH 2/3] Update docs about python module install

---
 windows/README.md | 12 ++++++++----
 wrapper/README.md |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/windows/README.md b/windows/README.md
index 82efbc54a..6fca36d1c 100644
--- a/windows/README.md
+++ b/windows/README.md
@@ -10,13 +10,17 @@ This should give you xgboost.exe for CLI version and xgboost_wrapper.dll for pyt
 
 Use Python Module
 =====
-* After you build the dll, you can simply add the path to [../wrapper](../wrapper) to sys.path and import xgboost
+* After you build the dll, you can install the Python package from the [../wrapper](../wrapper) folder
+
+```
+python setup.py install
+```
+
+And import it as usual
+
 ```
-sys.path.append('path/to/xgboost/wrapper')
 import xgboost as xgb
 ```
-* Alternatively, you can add that path to system enviroment variable ```PYTHONPATH```
-  - Doing so allows you to import xgboost directly like other python packages
 
 R Package
 ====
diff --git a/wrapper/README.md b/wrapper/README.md
index 09851b97f..0a170257f 100644
--- a/wrapper/README.md
+++ b/wrapper/README.md
@@ -5,6 +5,7 @@ This folder provides wrapper of xgboost to other languages
 Python
 =====
 * To make the python module, type ```make``` in the root directory of project
+* Install with `python setup.py install` from this directory.
 * Refer also to the walk through example in [demo folder](../demo/guide-python)
 
 R

From a0e07f16c491faea1fa917b302e9089ed3a05ad7 Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Wed, 8 Apr 2015 14:22:54 -0500
Subject: [PATCH 3/3] Update demo scripts to use installed python library

---
 demo/binary_classification/mapfeat.py         | 15 +++++++--------
 demo/guide-python/basic_walkthrough.py        |  6 +-----
 demo/guide-python/boost_from_prediction.py    |  2 --
 demo/guide-python/cross_validation.py         |  4 +---
 demo/guide-python/custom_objective.py         |  4 +---
 demo/guide-python/generalized_linear_model.py |  8 +++-----
 demo/guide-python/predict_first_ntree.py      |  2 --
 demo/guide-python/predict_leaf_indices.py     |  2 --
 demo/guide-python/sklearn_examples.py         |  2 --
 demo/kaggle-higgs/higgs-cv.py                 |  2 --
 demo/kaggle-higgs/higgs-numpy.py              | 16 ++++------------
 demo/kaggle-higgs/higgs-pred.py               | 13 +++++--------
 demo/kaggle-higgs/speedtest.py                |  3 ---
 demo/multiclass_classification/train.py       |  2 --
 demo/regression/mapfeat.py                    | 11 +++++------
 15 files changed, 27 insertions(+), 65 deletions(-)

diff --git a/demo/binary_classification/mapfeat.py b/demo/binary_classification/mapfeat.py
index 186af29e6..5eb8878f9 100755
--- a/demo/binary_classification/mapfeat.py
+++ b/demo/binary_classification/mapfeat.py
@@ -1,17 +1,16 @@
 #!/usr/bin/python
-import sys
 
 def loadfmap( fname ):
     fmap = {}
     nmap = {}
-    
+
     for l in open( fname ):
         arr = l.split()
-        if arr[0].find('.') != -1:            
+        if arr[0].find('.') != -1:
             idx = int( arr[0].strip('.') )
-            assert idx not in fmap        
+            assert idx not in fmap
             fmap[ idx ] = {}
-            ftype = arr[1].strip(':')        
+            ftype = arr[1].strip(':')
             content = arr[2]
         else:
             content = arr[0]
@@ -23,7 +22,7 @@ def loadfmap( fname ):
             nmap[ len(nmap) ] = ftype+'='+k
     return fmap, nmap
 
-def write_nmap( fo, nmap ):    
+def write_nmap( fo, nmap ):
     for i in range( len(nmap) ):
         fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
 
@@ -33,7 +32,7 @@ fo = open( 'featmap.txt', 'w' )
 write_nmap( fo, nmap )
 fo.close()
 
-fo = open( 'agaricus.txt', 'w' ) 
+fo = open( 'agaricus.txt', 'w' )
 for l in open( 'agaricus-lepiota.data' ):
     arr = l.split(',')
     if arr[0] == 'p':
@@ -47,4 +46,4 @@ for l in open( 'agaricus-lepiota.data' ):
 
 fo.close()
 
- 
+
diff --git a/demo/guide-python/basic_walkthrough.py b/demo/guide-python/basic_walkthrough.py
index 81b35ab45..ba8a4319f 100755
--- a/demo/guide-python/basic_walkthrough.py
+++ b/demo/guide-python/basic_walkthrough.py
@@ -1,10 +1,6 @@
 #!/usr/bin/python
-import sys
 import numpy as np
 import scipy.sparse
-# append the path to xgboost, you may need to change the following line
-# alternatively, you can add the path to PYTHONPATH environment variable
-sys.path.append('../../wrapper')
 import xgboost as xgb
 
 ### simple example
@@ -33,7 +29,7 @@ bst.dump_model('dump.nice.txt','../data/featmap.txt')
 # save dmatrix into binary buffer
 dtest.save_binary('dtest.buffer')
 bst.save_model('xgb.model')
-# load model and data in 
+# load model and data in
 bst2 = xgb.Booster(model_file='xgb.model')
 dtest2 = xgb.DMatrix('dtest.buffer')
 preds2 = bst2.predict(dtest2)
diff --git a/demo/guide-python/boost_from_prediction.py b/demo/guide-python/boost_from_prediction.py
index 0aa2e56ab..4870fc49c 100755
--- a/demo/guide-python/boost_from_prediction.py
+++ b/demo/guide-python/boost_from_prediction.py
@@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb
 
 dtrain = xgb.DMatrix('../data/agaricus.txt.train')
diff --git a/demo/guide-python/cross_validation.py b/demo/guide-python/cross_validation.py
index a50586c58..6ca13d460 100755
--- a/demo/guide-python/cross_validation.py
+++ b/demo/guide-python/cross_validation.py
@@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb
 
 ### load data in do training
@@ -56,7 +54,7 @@ def evalerror(preds, dtrain):
     labels = dtrain.get_label()
     return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
 
-param = {'max_depth':2, 'eta':1, 'silent':1} 
+param = {'max_depth':2, 'eta':1, 'silent':1}
 # train with customized objective
 xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
        obj = logregobj, feval=evalerror)
diff --git a/demo/guide-python/custom_objective.py b/demo/guide-python/custom_objective.py
index 5a7f110f4..d2bd4d9b2 100755
--- a/demo/guide-python/custom_objective.py
+++ b/demo/guide-python/custom_objective.py
@@ -1,11 +1,9 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb
 ###
 # advanced: cutomsized loss function
-# 
+#
 print ('start running example to used cutomized objective function')
 
 dtrain = xgb.DMatrix('../data/agaricus.txt.train')
diff --git a/demo/guide-python/generalized_linear_model.py b/demo/guide-python/generalized_linear_model.py
index b6b60be35..243bd603c 100755
--- a/demo/guide-python/generalized_linear_model.py
+++ b/demo/guide-python/generalized_linear_model.py
@@ -1,6 +1,4 @@
 #!/usr/bin/python
-import sys
-sys.path.append('../../wrapper')
 import xgboost as xgb
 ##
 #  this script demonstrate how to fit generalized linear model in xgboost
@@ -9,17 +7,17 @@ import xgboost as xgb
 dtrain = xgb.DMatrix('../data/agaricus.txt.train')
 dtest = xgb.DMatrix('../data/agaricus.txt.test')
 # change booster to gblinear, so that we are fitting a linear model
-# alpha is the L1 regularizer 
+# alpha is the L1 regularizer
 # lambda is the L2 regularizer
 # you can also set lambda_bias which is L2 regularizer on the bias term
 param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
          'alpha': 0.0001, 'lambda': 1 }
 
 # normally, you do not need to set eta (step_size)
-# XGBoost uses a parallel coordinate descent algorithm (shotgun), 
+# XGBoost uses a parallel coordinate descent algorithm (shotgun),
 # there could be affection on convergence with parallelization on certain cases
 # setting eta to be smaller value, e.g 0.5 can make the optimization more stable
-# param['eta'] = 1 
+# param['eta'] = 1
 
 ##
 # the rest of settings are the same
diff --git a/demo/guide-python/predict_first_ntree.py b/demo/guide-python/predict_first_ntree.py
index 03f327e7f..2ea91232e 100755
--- a/demo/guide-python/predict_first_ntree.py
+++ b/demo/guide-python/predict_first_ntree.py
@@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb
 
 ### load data in do training
diff --git a/demo/guide-python/predict_leaf_indices.py b/demo/guide-python/predict_leaf_indices.py
index 291ad1ee7..6f7d68da6 100755
--- a/demo/guide-python/predict_leaf_indices.py
+++ b/demo/guide-python/predict_leaf_indices.py
@@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb
 
 ### load data in do training
diff --git a/demo/guide-python/sklearn_examples.py b/demo/guide-python/sklearn_examples.py
index b30d785fa..96cd876e9 100644
--- a/demo/guide-python/sklearn_examples.py
+++ b/demo/guide-python/sklearn_examples.py
@@ -4,8 +4,6 @@ Created on 1 Apr 2015
 @author: Jamie Hall
 '''
 
-import sys
-sys.path.append('../../wrapper')
 import xgboost as xgb
 
 import numpy as np
diff --git a/demo/kaggle-higgs/higgs-cv.py b/demo/kaggle-higgs/higgs-cv.py
index 3e36fa66b..d5bbc39ef 100755
--- a/demo/kaggle-higgs/higgs-cv.py
+++ b/demo/kaggle-higgs/higgs-cv.py
@@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb
 
 ### load data in do training
diff --git a/demo/kaggle-higgs/higgs-numpy.py b/demo/kaggle-higgs/higgs-numpy.py
index 1e7448a4c..02b76b362 100755
--- a/demo/kaggle-higgs/higgs-numpy.py
+++ b/demo/kaggle-higgs/higgs-numpy.py
@@ -1,14 +1,6 @@
 #!/usr/bin/python
-# this is the example script to use xgboost to train 
-import inspect
-import os
-import sys
+# this is the example script to use xgboost to train
 import numpy as np
-# add path of xgboost python module
-code_path = os.path.join(
-    os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../wrapper")
-
-sys.path.append(code_path)
 
 import xgboost as xgb
 
@@ -29,7 +21,7 @@ weight = dtrain[:,31] * float(test_size) / len(label)
 sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0  )
 sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0  )
 
-# print weight statistics 
+# print weight statistics
 print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
 
 # construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
@@ -42,13 +34,13 @@ param = {}
 param['objective'] = 'binary:logitraw'
 # scale weight of positive examples
 param['scale_pos_weight'] = sum_wneg/sum_wpos
-param['eta'] = 0.1 
+param['eta'] = 0.1
 param['max_depth'] = 6
 param['eval_metric'] = 'auc'
 param['silent'] = 1
 param['nthread'] = 16
 
-# you can directly throw param in, though we want to watch multiple metrics here 
+# you can directly throw param in, though we want to watch multiple metrics here
 plst = list(param.items())+[('eval_metric', 'ams@0.15')]
 
 watchlist = [ (xgmat,'train') ]
diff --git a/demo/kaggle-higgs/higgs-pred.py b/demo/kaggle-higgs/higgs-pred.py
index e5383f89d..bc669f557 100755
--- a/demo/kaggle-higgs/higgs-pred.py
+++ b/demo/kaggle-higgs/higgs-pred.py
@@ -1,9 +1,6 @@
 #!/usr/bin/python
-# make prediction 
-import sys
+# make prediction
 import numpy as np
-# add path of xgboost python module
-sys.path.append('../../wrapper/')
 import xgboost as xgb
 
 # path to where the data lies
@@ -11,7 +8,7 @@ dpath = 'data'
 
 modelfile = 'higgs.model'
 outfile = 'higgs.pred.csv'
-# make top 15% as positive 
+# make top 15% as positive
 threshold_ratio = 0.15
 
 # load in training data, directly use numpy
@@ -24,7 +21,7 @@ xgmat = xgb.DMatrix( data, missing = -999.0 )
 bst = xgb.Booster({'nthread':16}, model_file = modelfile)
 ypred = bst.predict( xgmat )
 
-res  = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ] 
+res  = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
 
 rorder = {}
 for k, v in sorted( res, key = lambda x:-x[1] ):
@@ -36,12 +33,12 @@ fo = open(outfile, 'w')
 nhit = 0
 ntot = 0
 fo.write('EventId,RankOrder,Class\n')
-for k, v in res:        
+for k, v in res:
     if rorder[k] <= ntop:
         lb = 's'
         nhit += 1
     else:
-        lb = 'b'        
+        lb = 'b'
     # change output rank order to follow Kaggle convention
     fo.write('%s,%d,%s\n' % ( k,  len(rorder)+1-rorder[k], lb ) )
     ntot += 1
diff --git a/demo/kaggle-higgs/speedtest.py b/demo/kaggle-higgs/speedtest.py
index c5cc2fd29..472f9f44b 100755
--- a/demo/kaggle-higgs/speedtest.py
+++ b/demo/kaggle-higgs/speedtest.py
@@ -1,9 +1,6 @@
 #!/usr/bin/python
 # this is the example script to use xgboost to train
-import sys
 import numpy as np
-# add path of xgboost python module
-sys.path.append('../../wrapper/')
 import xgboost as xgb
 from sklearn.ensemble import GradientBoostingClassifier
 import time
diff --git a/demo/multiclass_classification/train.py b/demo/multiclass_classification/train.py
index f387de7c0..9e2a82ed2 100755
--- a/demo/multiclass_classification/train.py
+++ b/demo/multiclass_classification/train.py
@@ -1,7 +1,5 @@
 #! /usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper/')
 import xgboost as xgb
 
 # label need to be 0 to num_class -1
diff --git a/demo/regression/mapfeat.py b/demo/regression/mapfeat.py
index d86dca38a..c747c7b49 100755
--- a/demo/regression/mapfeat.py
+++ b/demo/regression/mapfeat.py
@@ -1,7 +1,6 @@
 #!/usr/bin/python
-import sys
 
-fo = open( 'machine.txt', 'w' ) 
+fo = open( 'machine.txt', 'w' )
 cnt = 6
 fmap = {}
 for l in open( 'machine.data' ):
@@ -9,12 +8,12 @@ for l in open( 'machine.data' ):
     fo.write(arr[8])
     for i in range( 0,6 ):
         fo.write( ' %d:%s' %(i,arr[i+2]) )
-    
+
     if arr[0] not in fmap:
         fmap[arr[0]] = cnt
         cnt += 1
-    
-    fo.write( ' %d:1' % fmap[arr[0]] )	
+
+    fo.write( ' %d:1' % fmap[arr[0]] )
     fo.write('\n')
 
 fo.close()
@@ -22,7 +21,7 @@ fo.close()
 # create feature map for machine data
 fo = open('featmap.txt', 'w')
 # list from machine.names
-names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ]; 
+names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
 
 for i in range(0,6):
     fo.write( '%d\t%s\tint\n' % (i, names[i+1]))