diff --git a/CHANGES.md b/CHANGES.md index 4484a321b..a8ddcd7ea 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -43,4 +43,5 @@ on going at master * Python module now has importance plot and tree plot functions. * Java api is ready for use * Added more test cases and continuous integration to make each build more robust -* Improvements in sklearn compatible module \ No newline at end of file +* Improvements in sklearn compatible module +* Added pip installation functionality for python module diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index ad6c01f2f..6ae79f795 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -45,4 +45,6 @@ List of Contributors - Jamie is the initial creator of xgboost sklearn modue. * [Yen-Ying Lee](https://github.com/white1033) * [Masaaki Horikoshi](https://github.com/sinhrks) - - Masaaki is the initial creator of xgboost python plotting module. \ No newline at end of file + - Masaaki is the initial creator of xgboost python plotting module. +* [Hongliang Liu](https://github.com/phunterlau) + - Hongliang is the maintainer of xgboost python PyPI package for pip installation. diff --git a/Makefile b/Makefile index c9e35e80c..c790f6b72 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,5 @@ export CC = gcc +#build on the fly export CXX = g++ export MPICXX = mpicxx export LDFLAGS= -pthread -lm @@ -167,6 +168,30 @@ Rcheck: make Rbuild R CMD check --as-cran xgboost*.tar.gz +pythonpack: + #make clean + cd subtree/rabit;make clean;cd .. + rm -rf xgboost-deploy xgboost*.tar.gz + cp -r python-package xgboost-deploy + cp *.md xgboost-deploy/ + cp LICENSE xgboost-deploy/ + cp Makefile xgboost-deploy/xgboost + cp -r wrapper xgboost-deploy/xgboost + cp -r subtree xgboost-deploy/xgboost + cp -r multi-node xgboost-deploy/xgboost + cp -r windows xgboost-deploy/xgboost + cp -r src xgboost-deploy/xgboost + + #make python + +pythonbuild: + make pythonpack + python setup.py install + +pythoncheck: + make pythonbuild + python -c 'import xgboost;print xgboost.core.find_lib_path()' + # lint requires dmlc to be in current folder lint: dmlc-core/scripts/lint.py xgboost $(LINT_LANG) src wrapper R-package python-package diff --git a/python-package/MANIFEST.in b/python-package/MANIFEST.in new file mode 100644 index 000000000..2d93429a9 --- /dev/null +++ b/python-package/MANIFEST.in @@ -0,0 +1,7 @@ +include *.sh *.md +recursive-include xgboost * +recursive-include xgboost/wrapper * +recursive-include xgboost/windows * +recursive-include xgboost/subtree * +recursive-include xgboost/src * +recursive-include xgboost/multi-node * diff --git a/python-package/README.md b/python-package/README.md index a4ac71d4d..20d609864 100644 --- a/python-package/README.md +++ b/python-package/README.md @@ -1,7 +1,26 @@ XGBoost Python Package ====================== +Installation +------------ +We are on [PyPI](https://pypi.python.org/pypi/xgboost) now. For stable version, please install using pip: + +* ```pip install xgboost``` +* Note for windows users: this pip installation may not work on some windows environment. Please install from github if pip doesn't work on windows. + +For up-to-date version, please install from github. + * To make the python module, type ```./build.sh``` in the root directory of project * Make sure you have [setuptools](https://pypi.python.org/pypi/setuptools) * Install with `python setup.py install` from this directory. + +Examples +------ + * Refer also to the walk through example in [demo folder](../demo/guide-python) -* **NOTE**: if you want to run XGBoost process in parallel using the fork backend for joblib/multiprocessing, you must build XGBoost without support for OpenMP by `make no_omp=1`. Otherwise, use the forkserver (in Python 3.4) or spawn backend. See the sklearn_parallel.py demo. +* See also the [example scripts](../demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](../demo/kaggle-higgs/speedtest.py) on this dataset. + +Note +----- + +* If you want to build xgboost on Mac OS X with multiprocessing support where clang in XCode by default doesn't support, please install gcc 4.9 or higher using [homebrew](http://brew.sh/) ```brew tap homebrew/versions; brew install gcc49``` +* If you want to run XGBoost process in parallel using the fork backend for joblib/multiprocessing, you must build XGBoost without support for OpenMP by `make no_omp=1`. Otherwise, use the forkserver (in Python 3.4) or spawn backend. See the [sklearn_parallel.py](../demo/guide-python/sklearn_parallel.py) demo. diff --git a/python-package/setup.cfg b/python-package/setup.cfg new file mode 100644 index 000000000..b88034e41 --- /dev/null +++ b/python-package/setup.cfg @@ -0,0 +1,2 @@ +[metadata] +description-file = README.md diff --git a/python-package/setup.py b/python-package/setup.py index 42e39f3ba..a446983be 100644 --- a/python-package/setup.py +++ b/python-package/setup.py @@ -2,20 +2,43 @@ """Setup xgboost package.""" from __future__ import absolute_import import sys -from setuptools import setup +from setuptools import setup, find_packages +import subprocess sys.path.insert(0, '.') +#build on the fly + +build_sh = subprocess.Popen(['sh', 'xgboost/build-python.sh']) +build_sh.wait() +output = build_sh.communicate() +print output + import xgboost LIB_PATH = xgboost.core.find_lib_path() +#print LIB_PATH +#to deploy to pip, please use +#make pythonpack +#python setup.py register sdist upload +#and be sure to test it firstly using "python setup.py register sdist upload -r pypitest" setup(name='xgboost', version=xgboost.__version__, + #version='0.4a12', description=xgboost.__doc__, install_requires=[ 'numpy', 'scipy', ], + maintainer='Hongliang Liu', + maintainer_email='phunter.lau@gmail.com', zip_safe=False, - packages=['xgboost'], - data_files=[('xgboost', [LIB_PATH[0]])], + packages=find_packages(), + #don't need this and don't use this, give everything to MANIFEST.in + #package_dir = {'':'xgboost'}, + #package_data = {'': ['*.txt','*.md','*.sh'], + # } + #this will use MANIFEST.in during install where we specify additional files, + #this is the golden line + include_package_data=True, + data_files=[('xgboost', LIB_PATH)], url='https://github.com/dmlc/xgboost') diff --git a/python-package/xgboost/build-python.sh b/python-package/xgboost/build-python.sh new file mode 100755 index 000000000..398b076b8 --- /dev/null +++ b/python-package/xgboost/build-python.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# This is a simple script to make xgboost in MAC and Linux for python wrapper only +# Basically, it first try to make with OpenMP, if fails, disable OpenMP and make it again. +# This will automatically make xgboost for MAC users who don't have OpenMP support. +# In most cases, type make will give what you want. + +# See additional instruction in doc/build.md + +# note: this script is build for python package only, and it might have some filename +# conflict with build.sh which is for everything. + + +pushd xgboost +if make python; then + echo "Successfully build multi-thread xgboost" +else + echo "-----------------------------" + echo "Building multi-thread xgboost failed" + echo "Start to build single-thread xgboost" + make clean + make python no_omp=1 + echo "Successfully build single-thread xgboost" + echo "If you want multi-threaded version" + echo "See additional instructions in doc/build.md" +fi +popd diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 0849d276c..85b6a1818 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -39,7 +39,9 @@ def find_lib_path(): List of all found library path to xgboost """ curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) - dll_path = [curr_path, os.path.join(curr_path, '../../wrapper/')] + #make pythonpack hack: copy this directory one level upper for setup.py + dll_path = [curr_path, os.path.join(curr_path, '../../wrapper/') + , os.path.join(curr_path, './wrapper/')] if os.name == 'nt': if platform.architecture()[0] == '64bit': dll_path.append(os.path.join(curr_path, '../../windows/x64/Release/'))